{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.97991967871486, "eval_steps": 500, "global_step": 5595, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.3809523809523811e-07, "loss": 12.5594, "step": 2 }, { "epoch": 0.01, "learning_rate": 4.7619047619047623e-07, "loss": 13.0638, "step": 4 }, { "epoch": 0.02, "learning_rate": 7.142857142857143e-07, "loss": 12.4793, "step": 6 }, { "epoch": 0.02, "learning_rate": 9.523809523809525e-07, "loss": 11.3959, "step": 8 }, { "epoch": 0.03, "learning_rate": 1.1904761904761906e-06, "loss": 12.0418, "step": 10 }, { "epoch": 0.03, "learning_rate": 1.4285714285714286e-06, "loss": 11.322, "step": 12 }, { "epoch": 0.04, "learning_rate": 1.6666666666666667e-06, "loss": 11.2914, "step": 14 }, { "epoch": 0.04, "learning_rate": 1.904761904761905e-06, "loss": 9.875, "step": 16 }, { "epoch": 0.05, "learning_rate": 2.1428571428571427e-06, "loss": 8.7854, "step": 18 }, { "epoch": 0.05, "learning_rate": 2.380952380952381e-06, "loss": 5.9327, "step": 20 }, { "epoch": 0.06, "learning_rate": 2.6190476190476192e-06, "loss": 3.6544, "step": 22 }, { "epoch": 0.06, "learning_rate": 2.8571428571428573e-06, "loss": 2.7828, "step": 24 }, { "epoch": 0.07, "learning_rate": 3.0952380952380957e-06, "loss": 2.148, "step": 26 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "loss": 1.9546, "step": 28 }, { "epoch": 0.08, "learning_rate": 3.5714285714285718e-06, "loss": 1.5922, "step": 30 }, { "epoch": 0.09, "learning_rate": 3.80952380952381e-06, "loss": 1.4238, "step": 32 }, { "epoch": 0.09, "learning_rate": 4.047619047619048e-06, "loss": 1.2005, "step": 34 }, { "epoch": 0.1, "learning_rate": 4.2857142857142855e-06, "loss": 1.1876, "step": 36 }, { "epoch": 0.1, "learning_rate": 4.523809523809524e-06, "loss": 0.9505, "step": 38 }, { "epoch": 0.11, "learning_rate": 4.761904761904762e-06, "loss": 1.1628, "step": 40 }, { "epoch": 0.11, "learning_rate": 5e-06, "loss": 1.3191, "step": 42 }, { "epoch": 0.12, "learning_rate": 5.2380952380952384e-06, "loss": 0.7919, "step": 44 }, { "epoch": 0.12, "learning_rate": 5.476190476190477e-06, "loss": 0.7624, "step": 46 }, { "epoch": 0.13, "learning_rate": 5.7142857142857145e-06, "loss": 0.3865, "step": 48 }, { "epoch": 0.13, "learning_rate": 5.9523809523809525e-06, "loss": 0.9239, "step": 50 }, { "epoch": 0.14, "learning_rate": 6.1904761904761914e-06, "loss": 0.6573, "step": 52 }, { "epoch": 0.14, "learning_rate": 6.4285714285714295e-06, "loss": 0.7432, "step": 54 }, { "epoch": 0.15, "learning_rate": 6.666666666666667e-06, "loss": 0.389, "step": 56 }, { "epoch": 0.16, "learning_rate": 6.9047619047619055e-06, "loss": 0.3553, "step": 58 }, { "epoch": 0.16, "learning_rate": 7.1428571428571436e-06, "loss": 0.8575, "step": 60 }, { "epoch": 0.17, "learning_rate": 7.380952380952382e-06, "loss": 0.5041, "step": 62 }, { "epoch": 0.17, "learning_rate": 7.61904761904762e-06, "loss": 0.5238, "step": 64 }, { "epoch": 0.18, "learning_rate": 7.857142857142858e-06, "loss": 0.6583, "step": 66 }, { "epoch": 0.18, "learning_rate": 8.095238095238097e-06, "loss": 0.4538, "step": 68 }, { "epoch": 0.19, "learning_rate": 8.333333333333334e-06, "loss": 0.3631, "step": 70 }, { "epoch": 0.19, "learning_rate": 8.571428571428571e-06, "loss": 0.5219, "step": 72 }, { "epoch": 0.2, "learning_rate": 8.80952380952381e-06, "loss": 0.5786, "step": 74 }, { "epoch": 0.2, "learning_rate": 9.047619047619049e-06, "loss": 0.3789, "step": 76 }, { "epoch": 0.21, "learning_rate": 9.285714285714288e-06, "loss": 0.5004, "step": 78 }, { "epoch": 0.21, "learning_rate": 9.523809523809525e-06, "loss": 0.3671, "step": 80 }, { "epoch": 0.22, "learning_rate": 9.761904761904762e-06, "loss": 0.526, "step": 82 }, { "epoch": 0.22, "learning_rate": 1e-05, "loss": 0.5381, "step": 84 }, { "epoch": 0.23, "learning_rate": 1.0238095238095238e-05, "loss": 0.4922, "step": 86 }, { "epoch": 0.24, "learning_rate": 1.0476190476190477e-05, "loss": 0.3988, "step": 88 }, { "epoch": 0.24, "learning_rate": 1.0714285714285714e-05, "loss": 0.239, "step": 90 }, { "epoch": 0.25, "learning_rate": 1.0952380952380955e-05, "loss": 0.2102, "step": 92 }, { "epoch": 0.25, "learning_rate": 1.1190476190476192e-05, "loss": 0.4542, "step": 94 }, { "epoch": 0.26, "learning_rate": 1.1428571428571429e-05, "loss": 0.5112, "step": 96 }, { "epoch": 0.26, "learning_rate": 1.1666666666666668e-05, "loss": 0.602, "step": 98 }, { "epoch": 0.27, "learning_rate": 1.1904761904761905e-05, "loss": 0.5682, "step": 100 }, { "epoch": 0.27, "learning_rate": 1.2142857142857142e-05, "loss": 0.5206, "step": 102 }, { "epoch": 0.28, "learning_rate": 1.2380952380952383e-05, "loss": 0.3205, "step": 104 }, { "epoch": 0.28, "learning_rate": 1.261904761904762e-05, "loss": 0.6555, "step": 106 }, { "epoch": 0.29, "learning_rate": 1.2857142857142859e-05, "loss": 0.2197, "step": 108 }, { "epoch": 0.29, "learning_rate": 1.3095238095238096e-05, "loss": 0.2883, "step": 110 }, { "epoch": 0.3, "learning_rate": 1.3333333333333333e-05, "loss": 0.4768, "step": 112 }, { "epoch": 0.31, "learning_rate": 1.3571428571428574e-05, "loss": 0.3124, "step": 114 }, { "epoch": 0.31, "learning_rate": 1.3809523809523811e-05, "loss": 0.2498, "step": 116 }, { "epoch": 0.32, "learning_rate": 1.4047619047619048e-05, "loss": 0.3259, "step": 118 }, { "epoch": 0.32, "learning_rate": 1.4285714285714287e-05, "loss": 0.3764, "step": 120 }, { "epoch": 0.33, "learning_rate": 1.4523809523809524e-05, "loss": 0.3794, "step": 122 }, { "epoch": 0.33, "learning_rate": 1.4761904761904763e-05, "loss": 0.2509, "step": 124 }, { "epoch": 0.34, "learning_rate": 1.5000000000000002e-05, "loss": 0.4569, "step": 126 }, { "epoch": 0.34, "learning_rate": 1.523809523809524e-05, "loss": 0.5856, "step": 128 }, { "epoch": 0.35, "learning_rate": 1.5476190476190476e-05, "loss": 0.2465, "step": 130 }, { "epoch": 0.35, "learning_rate": 1.5714285714285715e-05, "loss": 0.0759, "step": 132 }, { "epoch": 0.36, "learning_rate": 1.5952380952380954e-05, "loss": 0.3887, "step": 134 }, { "epoch": 0.36, "learning_rate": 1.6190476190476193e-05, "loss": 0.3218, "step": 136 }, { "epoch": 0.37, "learning_rate": 1.642857142857143e-05, "loss": 0.5441, "step": 138 }, { "epoch": 0.37, "learning_rate": 1.6666666666666667e-05, "loss": 0.4805, "step": 140 }, { "epoch": 0.38, "learning_rate": 1.6904761904761906e-05, "loss": 0.3455, "step": 142 }, { "epoch": 0.39, "learning_rate": 1.7142857142857142e-05, "loss": 0.6335, "step": 144 }, { "epoch": 0.39, "learning_rate": 1.7380952380952384e-05, "loss": 0.2465, "step": 146 }, { "epoch": 0.4, "learning_rate": 1.761904761904762e-05, "loss": 0.1856, "step": 148 }, { "epoch": 0.4, "learning_rate": 1.785714285714286e-05, "loss": 0.5019, "step": 150 }, { "epoch": 0.41, "learning_rate": 1.8095238095238097e-05, "loss": 0.5929, "step": 152 }, { "epoch": 0.41, "learning_rate": 1.8333333333333333e-05, "loss": 0.1597, "step": 154 }, { "epoch": 0.42, "learning_rate": 1.8571428571428575e-05, "loss": 0.3712, "step": 156 }, { "epoch": 0.42, "learning_rate": 1.880952380952381e-05, "loss": 0.2877, "step": 158 }, { "epoch": 0.43, "learning_rate": 1.904761904761905e-05, "loss": 0.3374, "step": 160 }, { "epoch": 0.43, "learning_rate": 1.928571428571429e-05, "loss": 0.1534, "step": 162 }, { "epoch": 0.44, "learning_rate": 1.9523809523809524e-05, "loss": 0.4258, "step": 164 }, { "epoch": 0.44, "learning_rate": 1.9761904761904763e-05, "loss": 0.5418, "step": 166 }, { "epoch": 0.45, "learning_rate": 2e-05, "loss": 0.2858, "step": 168 }, { "epoch": 0.46, "learning_rate": 1.9999993297913182e-05, "loss": 0.3358, "step": 170 }, { "epoch": 0.46, "learning_rate": 1.999997319166171e-05, "loss": 0.5259, "step": 172 }, { "epoch": 0.47, "learning_rate": 1.9999939681272535e-05, "loss": 0.4373, "step": 174 }, { "epoch": 0.47, "learning_rate": 1.9999892766790575e-05, "loss": 0.4796, "step": 176 }, { "epoch": 0.48, "learning_rate": 1.9999832448278715e-05, "loss": 0.4495, "step": 178 }, { "epoch": 0.48, "learning_rate": 1.9999758725817802e-05, "loss": 0.3336, "step": 180 }, { "epoch": 0.49, "learning_rate": 1.9999671599506662e-05, "loss": 0.6372, "step": 182 }, { "epoch": 0.49, "learning_rate": 1.999957106946208e-05, "loss": 0.3545, "step": 184 }, { "epoch": 0.5, "learning_rate": 1.999945713581881e-05, "loss": 0.3317, "step": 186 }, { "epoch": 0.5, "learning_rate": 1.9999329798729558e-05, "loss": 0.4832, "step": 188 }, { "epoch": 0.51, "learning_rate": 1.9999189058365022e-05, "loss": 0.762, "step": 190 }, { "epoch": 0.51, "learning_rate": 1.9999034914913848e-05, "loss": 0.3316, "step": 192 }, { "epoch": 0.52, "learning_rate": 1.999886736858266e-05, "loss": 0.404, "step": 194 }, { "epoch": 0.52, "learning_rate": 1.9998686419596023e-05, "loss": 0.4049, "step": 196 }, { "epoch": 0.53, "learning_rate": 1.99984920681965e-05, "loss": 0.4246, "step": 198 }, { "epoch": 0.54, "learning_rate": 1.99982843146446e-05, "loss": 0.3114, "step": 200 }, { "epoch": 0.54, "learning_rate": 1.999806315921879e-05, "loss": 0.5133, "step": 202 }, { "epoch": 0.55, "learning_rate": 1.999782860221552e-05, "loss": 0.4473, "step": 204 }, { "epoch": 0.55, "learning_rate": 1.9997580643949194e-05, "loss": 0.2796, "step": 206 }, { "epoch": 0.56, "learning_rate": 1.9997319284752176e-05, "loss": 0.4623, "step": 208 }, { "epoch": 0.56, "learning_rate": 1.9997044524974797e-05, "loss": 0.3898, "step": 210 }, { "epoch": 0.57, "learning_rate": 1.9996756364985353e-05, "loss": 0.2763, "step": 212 }, { "epoch": 0.57, "learning_rate": 1.9996454805170094e-05, "loss": 0.3855, "step": 214 }, { "epoch": 0.58, "learning_rate": 1.999613984593324e-05, "loss": 0.2388, "step": 216 }, { "epoch": 0.58, "learning_rate": 1.9995811487696962e-05, "loss": 0.1727, "step": 218 }, { "epoch": 0.59, "learning_rate": 1.9995469730901407e-05, "loss": 0.3933, "step": 220 }, { "epoch": 0.59, "learning_rate": 1.999511457600466e-05, "loss": 0.6963, "step": 222 }, { "epoch": 0.6, "learning_rate": 1.999474602348279e-05, "loss": 0.2378, "step": 224 }, { "epoch": 0.61, "learning_rate": 1.9994364073829795e-05, "loss": 0.2076, "step": 226 }, { "epoch": 0.61, "learning_rate": 1.999396872755766e-05, "loss": 0.39, "step": 228 }, { "epoch": 0.62, "learning_rate": 1.999355998519631e-05, "loss": 0.2078, "step": 230 }, { "epoch": 0.62, "learning_rate": 1.9993137847293628e-05, "loss": 0.258, "step": 232 }, { "epoch": 0.63, "learning_rate": 1.9992702314415463e-05, "loss": 0.3686, "step": 234 }, { "epoch": 0.63, "learning_rate": 1.9992253387145597e-05, "loss": 0.4398, "step": 236 }, { "epoch": 0.64, "learning_rate": 1.9991791066085794e-05, "loss": 0.3618, "step": 238 }, { "epoch": 0.64, "learning_rate": 1.999131535185575e-05, "loss": 0.3896, "step": 240 }, { "epoch": 0.65, "learning_rate": 1.9990826245093116e-05, "loss": 0.2657, "step": 242 }, { "epoch": 0.65, "learning_rate": 1.999032374645351e-05, "loss": 0.361, "step": 244 }, { "epoch": 0.66, "learning_rate": 1.9989807856610483e-05, "loss": 0.1554, "step": 246 }, { "epoch": 0.66, "learning_rate": 1.998927857625555e-05, "loss": 0.3155, "step": 248 }, { "epoch": 0.67, "learning_rate": 1.9988735906098157e-05, "loss": 0.1541, "step": 250 }, { "epoch": 0.67, "learning_rate": 1.998817984686571e-05, "loss": 0.2779, "step": 252 }, { "epoch": 0.68, "learning_rate": 1.9987610399303568e-05, "loss": 0.378, "step": 254 }, { "epoch": 0.69, "learning_rate": 1.9987027564175027e-05, "loss": 0.3571, "step": 256 }, { "epoch": 0.69, "learning_rate": 1.998643134226132e-05, "loss": 0.5658, "step": 258 }, { "epoch": 0.7, "learning_rate": 1.9985821734361646e-05, "loss": 0.1391, "step": 260 }, { "epoch": 0.7, "learning_rate": 1.9985198741293126e-05, "loss": 0.2903, "step": 262 }, { "epoch": 0.71, "learning_rate": 1.9984562363890834e-05, "loss": 0.1965, "step": 264 }, { "epoch": 0.71, "learning_rate": 1.998391260300778e-05, "loss": 0.3378, "step": 266 }, { "epoch": 0.72, "learning_rate": 1.998324945951491e-05, "loss": 0.2984, "step": 268 }, { "epoch": 0.72, "learning_rate": 1.998257293430112e-05, "loss": 0.2129, "step": 270 }, { "epoch": 0.73, "learning_rate": 1.9981883028273237e-05, "loss": 0.1894, "step": 272 }, { "epoch": 0.73, "learning_rate": 1.998117974235602e-05, "loss": 0.2298, "step": 274 }, { "epoch": 0.74, "learning_rate": 1.998046307749216e-05, "loss": 0.535, "step": 276 }, { "epoch": 0.74, "learning_rate": 1.9979733034642297e-05, "loss": 0.4103, "step": 278 }, { "epoch": 0.75, "learning_rate": 1.997898961478499e-05, "loss": 0.187, "step": 280 }, { "epoch": 0.76, "learning_rate": 1.9978232818916727e-05, "loss": 0.2796, "step": 282 }, { "epoch": 0.76, "learning_rate": 1.9977462648051934e-05, "loss": 0.2729, "step": 284 }, { "epoch": 0.77, "learning_rate": 1.9976679103222963e-05, "loss": 0.4135, "step": 286 }, { "epoch": 0.77, "learning_rate": 1.997588218548009e-05, "loss": 0.229, "step": 288 }, { "epoch": 0.78, "learning_rate": 1.9975071895891517e-05, "loss": 0.2805, "step": 290 }, { "epoch": 0.78, "learning_rate": 1.997424823554337e-05, "loss": 0.287, "step": 292 }, { "epoch": 0.79, "learning_rate": 1.9973411205539695e-05, "loss": 0.2239, "step": 294 }, { "epoch": 0.79, "learning_rate": 1.9972560807002465e-05, "loss": 0.1155, "step": 296 }, { "epoch": 0.8, "learning_rate": 1.9971697041071573e-05, "loss": 0.2644, "step": 298 }, { "epoch": 0.8, "learning_rate": 1.9970819908904815e-05, "loss": 0.3994, "step": 300 }, { "epoch": 0.81, "learning_rate": 1.996992941167792e-05, "loss": 0.2231, "step": 302 }, { "epoch": 0.81, "learning_rate": 1.996902555058453e-05, "loss": 0.3249, "step": 304 }, { "epoch": 0.82, "learning_rate": 1.9968108326836192e-05, "loss": 0.1634, "step": 306 }, { "epoch": 0.82, "learning_rate": 1.996717774166237e-05, "loss": 0.1715, "step": 308 }, { "epoch": 0.83, "learning_rate": 1.996623379631043e-05, "loss": 0.3497, "step": 310 }, { "epoch": 0.84, "learning_rate": 1.9965276492045662e-05, "loss": 0.192, "step": 312 }, { "epoch": 0.84, "learning_rate": 1.996430583015125e-05, "loss": 0.2062, "step": 314 }, { "epoch": 0.85, "learning_rate": 1.9963321811928287e-05, "loss": 0.5813, "step": 316 }, { "epoch": 0.85, "learning_rate": 1.9962324438695764e-05, "loss": 0.4921, "step": 318 }, { "epoch": 0.86, "learning_rate": 1.9961313711790578e-05, "loss": 0.3229, "step": 320 }, { "epoch": 0.86, "learning_rate": 1.996028963256753e-05, "loss": 0.2858, "step": 322 }, { "epoch": 0.87, "learning_rate": 1.995925220239931e-05, "loss": 0.1917, "step": 324 }, { "epoch": 0.87, "learning_rate": 1.995820142267651e-05, "loss": 0.1787, "step": 326 }, { "epoch": 0.88, "learning_rate": 1.995713729480761e-05, "loss": 0.3714, "step": 328 }, { "epoch": 0.88, "learning_rate": 1.9956059820218982e-05, "loss": 0.3405, "step": 330 }, { "epoch": 0.89, "learning_rate": 1.99549690003549e-05, "loss": 0.1545, "step": 332 }, { "epoch": 0.89, "learning_rate": 1.9953864836677516e-05, "loss": 0.1519, "step": 334 }, { "epoch": 0.9, "learning_rate": 1.9952747330666865e-05, "loss": 0.1179, "step": 336 }, { "epoch": 0.9, "learning_rate": 1.9951616483820877e-05, "loss": 0.376, "step": 338 }, { "epoch": 0.91, "learning_rate": 1.995047229765536e-05, "loss": 0.2752, "step": 340 }, { "epoch": 0.92, "learning_rate": 1.994931477370399e-05, "loss": 0.2004, "step": 342 }, { "epoch": 0.92, "learning_rate": 1.9948143913518346e-05, "loss": 0.4564, "step": 344 }, { "epoch": 0.93, "learning_rate": 1.994695971866786e-05, "loss": 0.3796, "step": 346 }, { "epoch": 0.93, "learning_rate": 1.9945762190739854e-05, "loss": 0.0914, "step": 348 }, { "epoch": 0.94, "learning_rate": 1.994455133133951e-05, "loss": 0.2289, "step": 350 }, { "epoch": 0.94, "learning_rate": 1.9943327142089885e-05, "loss": 0.1411, "step": 352 }, { "epoch": 0.95, "learning_rate": 1.9942089624631907e-05, "loss": 0.1362, "step": 354 }, { "epoch": 0.95, "learning_rate": 1.9940838780624366e-05, "loss": 0.2948, "step": 356 }, { "epoch": 0.96, "learning_rate": 1.993957461174391e-05, "loss": 0.096, "step": 358 }, { "epoch": 0.96, "learning_rate": 1.9938297119685054e-05, "loss": 0.2465, "step": 360 }, { "epoch": 0.97, "learning_rate": 1.993700630616018e-05, "loss": 0.2679, "step": 362 }, { "epoch": 0.97, "learning_rate": 1.99357021728995e-05, "loss": 0.1359, "step": 364 }, { "epoch": 0.98, "learning_rate": 1.9934384721651115e-05, "loss": 0.2159, "step": 366 }, { "epoch": 0.99, "learning_rate": 1.9933053954180947e-05, "loss": 0.2175, "step": 368 }, { "epoch": 0.99, "learning_rate": 1.9931709872272783e-05, "loss": 0.2823, "step": 370 }, { "epoch": 1.0, "learning_rate": 1.993035247772826e-05, "loss": 0.248, "step": 372 }, { "epoch": 1.0, "learning_rate": 1.992898177236684e-05, "loss": 0.2528, "step": 374 }, { "epoch": 1.01, "learning_rate": 1.992759775802585e-05, "loss": 0.0823, "step": 376 }, { "epoch": 1.01, "learning_rate": 1.9926200436560448e-05, "loss": 0.2583, "step": 378 }, { "epoch": 1.02, "learning_rate": 1.992478980984362e-05, "loss": 0.1116, "step": 380 }, { "epoch": 1.02, "learning_rate": 1.9923365879766204e-05, "loss": 0.1395, "step": 382 }, { "epoch": 1.03, "learning_rate": 1.9921928648236855e-05, "loss": 0.1708, "step": 384 }, { "epoch": 1.03, "learning_rate": 1.992047811718206e-05, "loss": 0.1675, "step": 386 }, { "epoch": 1.04, "learning_rate": 1.9919014288546146e-05, "loss": 0.1754, "step": 388 }, { "epoch": 1.04, "learning_rate": 1.9917537164291244e-05, "loss": 0.1902, "step": 390 }, { "epoch": 1.05, "learning_rate": 1.9916046746397325e-05, "loss": 0.1031, "step": 392 }, { "epoch": 1.05, "learning_rate": 1.9914543036862164e-05, "loss": 0.1257, "step": 394 }, { "epoch": 1.06, "learning_rate": 1.9913026037701364e-05, "loss": 0.0986, "step": 396 }, { "epoch": 1.07, "learning_rate": 1.9911495750948335e-05, "loss": 0.0686, "step": 398 }, { "epoch": 1.07, "learning_rate": 1.99099521786543e-05, "loss": 0.0577, "step": 400 }, { "epoch": 1.08, "learning_rate": 1.9908395322888296e-05, "loss": 0.1589, "step": 402 }, { "epoch": 1.08, "learning_rate": 1.9906825185737146e-05, "loss": 0.1065, "step": 404 }, { "epoch": 1.09, "learning_rate": 1.99052417693055e-05, "loss": 0.1492, "step": 406 }, { "epoch": 1.09, "learning_rate": 1.9903645075715798e-05, "loss": 0.1965, "step": 408 }, { "epoch": 1.1, "learning_rate": 1.9902035107108268e-05, "loss": 0.1856, "step": 410 }, { "epoch": 1.1, "learning_rate": 1.9900411865640947e-05, "loss": 0.0286, "step": 412 }, { "epoch": 1.11, "learning_rate": 1.9898775353489653e-05, "loss": 0.3616, "step": 414 }, { "epoch": 1.11, "learning_rate": 1.9897125572847994e-05, "loss": 0.2518, "step": 416 }, { "epoch": 1.12, "learning_rate": 1.9895462525927364e-05, "loss": 0.4248, "step": 418 }, { "epoch": 1.12, "learning_rate": 1.9893786214956946e-05, "loss": 0.1179, "step": 420 }, { "epoch": 1.13, "learning_rate": 1.989209664218369e-05, "loss": 0.1806, "step": 422 }, { "epoch": 1.14, "learning_rate": 1.989039380987233e-05, "loss": 0.065, "step": 424 }, { "epoch": 1.14, "learning_rate": 1.9888677720305375e-05, "loss": 0.0844, "step": 426 }, { "epoch": 1.15, "learning_rate": 1.9886948375783096e-05, "loss": 0.1577, "step": 428 }, { "epoch": 1.15, "learning_rate": 1.9885205778623545e-05, "loss": 0.082, "step": 430 }, { "epoch": 1.16, "learning_rate": 1.9883449931162517e-05, "loss": 0.2219, "step": 432 }, { "epoch": 1.16, "learning_rate": 1.988168083575359e-05, "loss": 0.19, "step": 434 }, { "epoch": 1.17, "learning_rate": 1.9879898494768093e-05, "loss": 0.246, "step": 436 }, { "epoch": 1.17, "learning_rate": 1.9878102910595097e-05, "loss": 0.1687, "step": 438 }, { "epoch": 1.18, "learning_rate": 1.9876294085641436e-05, "loss": 0.0853, "step": 440 }, { "epoch": 1.18, "learning_rate": 1.98744720223317e-05, "loss": 0.2049, "step": 442 }, { "epoch": 1.19, "learning_rate": 1.98726367231082e-05, "loss": 0.1125, "step": 444 }, { "epoch": 1.19, "learning_rate": 1.9870788190431015e-05, "loss": 0.1744, "step": 446 }, { "epoch": 1.2, "learning_rate": 1.9868926426777943e-05, "loss": 0.0956, "step": 448 }, { "epoch": 1.2, "learning_rate": 1.9867051434644532e-05, "loss": 0.1981, "step": 450 }, { "epoch": 1.21, "learning_rate": 1.9865163216544044e-05, "loss": 0.0386, "step": 452 }, { "epoch": 1.22, "learning_rate": 1.9863261775007486e-05, "loss": 0.1104, "step": 454 }, { "epoch": 1.22, "learning_rate": 1.9861347112583578e-05, "loss": 0.1795, "step": 456 }, { "epoch": 1.23, "learning_rate": 1.9859419231838776e-05, "loss": 0.1433, "step": 458 }, { "epoch": 1.23, "learning_rate": 1.9857478135357234e-05, "loss": 0.204, "step": 460 }, { "epoch": 1.24, "learning_rate": 1.9855523825740842e-05, "loss": 0.1753, "step": 462 }, { "epoch": 1.24, "learning_rate": 1.985355630560918e-05, "loss": 0.1125, "step": 464 }, { "epoch": 1.25, "learning_rate": 1.9851575577599552e-05, "loss": 0.1494, "step": 466 }, { "epoch": 1.25, "learning_rate": 1.9849581644366964e-05, "loss": 0.0661, "step": 468 }, { "epoch": 1.26, "learning_rate": 1.984757450858411e-05, "loss": 0.1408, "step": 470 }, { "epoch": 1.26, "learning_rate": 1.9845554172941396e-05, "loss": 0.3131, "step": 472 }, { "epoch": 1.27, "learning_rate": 1.984352064014691e-05, "loss": 0.0754, "step": 474 }, { "epoch": 1.27, "learning_rate": 1.984147391292644e-05, "loss": 0.0223, "step": 476 }, { "epoch": 1.28, "learning_rate": 1.9839413994023454e-05, "loss": 0.1337, "step": 478 }, { "epoch": 1.29, "learning_rate": 1.9837340886199097e-05, "loss": 0.0402, "step": 480 }, { "epoch": 1.29, "learning_rate": 1.9835254592232203e-05, "loss": 0.1794, "step": 482 }, { "epoch": 1.3, "learning_rate": 1.983315511491928e-05, "loss": 0.0849, "step": 484 }, { "epoch": 1.3, "learning_rate": 1.9831042457074498e-05, "loss": 0.1642, "step": 486 }, { "epoch": 1.31, "learning_rate": 1.9828916621529703e-05, "loss": 0.1141, "step": 488 }, { "epoch": 1.31, "learning_rate": 1.9826777611134402e-05, "loss": 0.16, "step": 490 }, { "epoch": 1.32, "learning_rate": 1.982462542875576e-05, "loss": 0.157, "step": 492 }, { "epoch": 1.32, "learning_rate": 1.9822460077278606e-05, "loss": 0.0762, "step": 494 }, { "epoch": 1.33, "learning_rate": 1.9820281559605407e-05, "loss": 0.1537, "step": 496 }, { "epoch": 1.33, "learning_rate": 1.9818089878656284e-05, "loss": 0.1486, "step": 498 }, { "epoch": 1.34, "learning_rate": 1.9815885037369017e-05, "loss": 0.1609, "step": 500 }, { "epoch": 1.34, "learning_rate": 1.9813667038698997e-05, "loss": 0.092, "step": 502 }, { "epoch": 1.35, "learning_rate": 1.981143588561928e-05, "loss": 0.1063, "step": 504 }, { "epoch": 1.35, "learning_rate": 1.980919158112054e-05, "loss": 0.1448, "step": 506 }, { "epoch": 1.36, "learning_rate": 1.9806934128211076e-05, "loss": 0.2281, "step": 508 }, { "epoch": 1.37, "learning_rate": 1.9804663529916825e-05, "loss": 0.2334, "step": 510 }, { "epoch": 1.37, "learning_rate": 1.9802379789281334e-05, "loss": 0.1601, "step": 512 }, { "epoch": 1.38, "learning_rate": 1.9800082909365764e-05, "loss": 0.1317, "step": 514 }, { "epoch": 1.38, "learning_rate": 1.97977728932489e-05, "loss": 0.061, "step": 516 }, { "epoch": 1.39, "learning_rate": 1.979544974402712e-05, "loss": 0.0921, "step": 518 }, { "epoch": 1.39, "learning_rate": 1.9793113464814416e-05, "loss": 0.1101, "step": 520 }, { "epoch": 1.4, "learning_rate": 1.9790764058742384e-05, "loss": 0.1167, "step": 522 }, { "epoch": 1.4, "learning_rate": 1.9788401528960198e-05, "loss": 0.2337, "step": 524 }, { "epoch": 1.41, "learning_rate": 1.9786025878634644e-05, "loss": 0.1965, "step": 526 }, { "epoch": 1.41, "learning_rate": 1.9783637110950077e-05, "loss": 0.1706, "step": 528 }, { "epoch": 1.42, "learning_rate": 1.9781235229108445e-05, "loss": 0.0369, "step": 530 }, { "epoch": 1.42, "learning_rate": 1.9778820236329276e-05, "loss": 0.1566, "step": 532 }, { "epoch": 1.43, "learning_rate": 1.9776392135849662e-05, "loss": 0.0689, "step": 534 }, { "epoch": 1.44, "learning_rate": 1.9773950930924277e-05, "loss": 0.0962, "step": 536 }, { "epoch": 1.44, "learning_rate": 1.977149662482535e-05, "loss": 0.3116, "step": 538 }, { "epoch": 1.45, "learning_rate": 1.9769029220842678e-05, "loss": 0.0526, "step": 540 }, { "epoch": 1.45, "learning_rate": 1.976654872228361e-05, "loss": 0.1136, "step": 542 }, { "epoch": 1.46, "learning_rate": 1.9764055132473056e-05, "loss": 0.1787, "step": 544 }, { "epoch": 1.46, "learning_rate": 1.9761548454753455e-05, "loss": 0.0761, "step": 546 }, { "epoch": 1.47, "learning_rate": 1.9759028692484808e-05, "loss": 0.1467, "step": 548 }, { "epoch": 1.47, "learning_rate": 1.9756495849044653e-05, "loss": 0.0559, "step": 550 }, { "epoch": 1.48, "learning_rate": 1.975394992782805e-05, "loss": 0.3224, "step": 552 }, { "epoch": 1.48, "learning_rate": 1.97513909322476e-05, "loss": 0.0993, "step": 554 }, { "epoch": 1.49, "learning_rate": 1.9748818865733424e-05, "loss": 0.1348, "step": 556 }, { "epoch": 1.49, "learning_rate": 1.9746233731733163e-05, "loss": 0.2154, "step": 558 }, { "epoch": 1.5, "learning_rate": 1.9743635533711978e-05, "loss": 0.2136, "step": 560 }, { "epoch": 1.5, "learning_rate": 1.9741024275152532e-05, "loss": 0.0422, "step": 562 }, { "epoch": 1.51, "learning_rate": 1.973839995955501e-05, "loss": 0.0495, "step": 564 }, { "epoch": 1.52, "learning_rate": 1.9735762590437094e-05, "loss": 0.3029, "step": 566 }, { "epoch": 1.52, "learning_rate": 1.9733112171333944e-05, "loss": 0.2605, "step": 568 }, { "epoch": 1.53, "learning_rate": 1.973044870579824e-05, "loss": 0.1267, "step": 570 }, { "epoch": 1.53, "learning_rate": 1.972777219740013e-05, "loss": 0.1616, "step": 572 }, { "epoch": 1.54, "learning_rate": 1.972508264972726e-05, "loss": 0.3601, "step": 574 }, { "epoch": 1.54, "learning_rate": 1.972238006638474e-05, "loss": 0.2432, "step": 576 }, { "epoch": 1.55, "learning_rate": 1.9719664450995166e-05, "loss": 0.1421, "step": 578 }, { "epoch": 1.55, "learning_rate": 1.9716935807198587e-05, "loss": 0.2793, "step": 580 }, { "epoch": 1.56, "learning_rate": 1.9714194138652533e-05, "loss": 0.207, "step": 582 }, { "epoch": 1.56, "learning_rate": 1.9711439449031978e-05, "loss": 0.1182, "step": 584 }, { "epoch": 1.57, "learning_rate": 1.970867174202936e-05, "loss": 0.3376, "step": 586 }, { "epoch": 1.57, "learning_rate": 1.970589102135456e-05, "loss": 0.2572, "step": 588 }, { "epoch": 1.58, "learning_rate": 1.9703097290734904e-05, "loss": 0.1036, "step": 590 }, { "epoch": 1.59, "learning_rate": 1.9700290553915157e-05, "loss": 0.2767, "step": 592 }, { "epoch": 1.59, "learning_rate": 1.969747081465752e-05, "loss": 0.0798, "step": 594 }, { "epoch": 1.6, "learning_rate": 1.9694638076741615e-05, "loss": 0.104, "step": 596 }, { "epoch": 1.6, "learning_rate": 1.96917923439645e-05, "loss": 0.135, "step": 598 }, { "epoch": 1.61, "learning_rate": 1.9688933620140638e-05, "loss": 0.1968, "step": 600 }, { "epoch": 1.61, "learning_rate": 1.9686061909101917e-05, "loss": 0.2878, "step": 602 }, { "epoch": 1.62, "learning_rate": 1.968317721469763e-05, "loss": 0.2443, "step": 604 }, { "epoch": 1.62, "learning_rate": 1.9680279540794465e-05, "loss": 0.1559, "step": 606 }, { "epoch": 1.63, "learning_rate": 1.9677368891276515e-05, "loss": 0.1438, "step": 608 }, { "epoch": 1.63, "learning_rate": 1.967444527004527e-05, "loss": 0.1196, "step": 610 }, { "epoch": 1.64, "learning_rate": 1.96715086810196e-05, "loss": 0.2756, "step": 612 }, { "epoch": 1.64, "learning_rate": 1.966855912813576e-05, "loss": 0.1375, "step": 614 }, { "epoch": 1.65, "learning_rate": 1.9665596615347385e-05, "loss": 0.1833, "step": 616 }, { "epoch": 1.65, "learning_rate": 1.9662621146625474e-05, "loss": 0.1568, "step": 618 }, { "epoch": 1.66, "learning_rate": 1.9659632725958398e-05, "loss": 0.1185, "step": 620 }, { "epoch": 1.67, "learning_rate": 1.965663135735189e-05, "loss": 0.1368, "step": 622 }, { "epoch": 1.67, "learning_rate": 1.9653617044829033e-05, "loss": 0.1812, "step": 624 }, { "epoch": 1.68, "learning_rate": 1.9650589792430267e-05, "loss": 0.1291, "step": 626 }, { "epoch": 1.68, "learning_rate": 1.9647549604213374e-05, "loss": 0.3009, "step": 628 }, { "epoch": 1.69, "learning_rate": 1.9644496484253473e-05, "loss": 0.1277, "step": 630 }, { "epoch": 1.69, "learning_rate": 1.964143043664302e-05, "loss": 0.2304, "step": 632 }, { "epoch": 1.7, "learning_rate": 1.96383514654918e-05, "loss": 0.0814, "step": 634 }, { "epoch": 1.7, "learning_rate": 1.9635259574926912e-05, "loss": 0.1647, "step": 636 }, { "epoch": 1.71, "learning_rate": 1.9632154769092793e-05, "loss": 0.2173, "step": 638 }, { "epoch": 1.71, "learning_rate": 1.9629037052151164e-05, "loss": 0.3396, "step": 640 }, { "epoch": 1.72, "learning_rate": 1.962590642828108e-05, "loss": 0.0854, "step": 642 }, { "epoch": 1.72, "learning_rate": 1.9622762901678875e-05, "loss": 0.3178, "step": 644 }, { "epoch": 1.73, "learning_rate": 1.961960647655819e-05, "loss": 0.1166, "step": 646 }, { "epoch": 1.73, "learning_rate": 1.961643715714995e-05, "loss": 0.2133, "step": 648 }, { "epoch": 1.74, "learning_rate": 1.961325494770237e-05, "loss": 0.1042, "step": 650 }, { "epoch": 1.75, "learning_rate": 1.9610059852480933e-05, "loss": 0.1527, "step": 652 }, { "epoch": 1.75, "learning_rate": 1.9606851875768404e-05, "loss": 0.206, "step": 654 }, { "epoch": 1.76, "learning_rate": 1.960363102186481e-05, "loss": 0.0966, "step": 656 }, { "epoch": 1.76, "learning_rate": 1.960039729508744e-05, "loss": 0.0875, "step": 658 }, { "epoch": 1.77, "learning_rate": 1.9597150699770834e-05, "loss": 0.1208, "step": 660 }, { "epoch": 1.77, "learning_rate": 1.959389124026679e-05, "loss": 0.1676, "step": 662 }, { "epoch": 1.78, "learning_rate": 1.959061892094434e-05, "loss": 0.1284, "step": 664 }, { "epoch": 1.78, "learning_rate": 1.958733374618976e-05, "loss": 0.1303, "step": 666 }, { "epoch": 1.79, "learning_rate": 1.9584035720406556e-05, "loss": 0.1734, "step": 668 }, { "epoch": 1.79, "learning_rate": 1.9580724848015454e-05, "loss": 0.0779, "step": 670 }, { "epoch": 1.8, "learning_rate": 1.957740113345441e-05, "loss": 0.0817, "step": 672 }, { "epoch": 1.8, "learning_rate": 1.957406458117859e-05, "loss": 0.0998, "step": 674 }, { "epoch": 1.81, "learning_rate": 1.9570715195660362e-05, "loss": 0.0991, "step": 676 }, { "epoch": 1.82, "learning_rate": 1.9567352981389297e-05, "loss": 0.1175, "step": 678 }, { "epoch": 1.82, "learning_rate": 1.956397794287218e-05, "loss": 0.1142, "step": 680 }, { "epoch": 1.83, "learning_rate": 1.9560590084632953e-05, "loss": 0.2734, "step": 682 }, { "epoch": 1.83, "learning_rate": 1.9557189411212774e-05, "loss": 0.2176, "step": 684 }, { "epoch": 1.84, "learning_rate": 1.955377592716996e-05, "loss": 0.1363, "step": 686 }, { "epoch": 1.84, "learning_rate": 1.955034963708e-05, "loss": 0.0634, "step": 688 }, { "epoch": 1.85, "learning_rate": 1.954691054553556e-05, "loss": 0.1658, "step": 690 }, { "epoch": 1.85, "learning_rate": 1.954345865714645e-05, "loss": 0.1092, "step": 692 }, { "epoch": 1.86, "learning_rate": 1.953999397653965e-05, "loss": 0.0811, "step": 694 }, { "epoch": 1.86, "learning_rate": 1.9536516508359273e-05, "loss": 0.1518, "step": 696 }, { "epoch": 1.87, "learning_rate": 1.9533026257266578e-05, "loss": 0.05, "step": 698 }, { "epoch": 1.87, "learning_rate": 1.952952322793996e-05, "loss": 0.1077, "step": 700 }, { "epoch": 1.88, "learning_rate": 1.9526007425074938e-05, "loss": 0.1132, "step": 702 }, { "epoch": 1.88, "learning_rate": 1.9522478853384154e-05, "loss": 0.1445, "step": 704 }, { "epoch": 1.89, "learning_rate": 1.951893751759737e-05, "loss": 0.1514, "step": 706 }, { "epoch": 1.9, "learning_rate": 1.9515383422461457e-05, "loss": 0.0937, "step": 708 }, { "epoch": 1.9, "learning_rate": 1.9511816572740378e-05, "loss": 0.1516, "step": 710 }, { "epoch": 1.91, "learning_rate": 1.9508236973215204e-05, "loss": 0.0749, "step": 712 }, { "epoch": 1.91, "learning_rate": 1.9504644628684093e-05, "loss": 0.0994, "step": 714 }, { "epoch": 1.92, "learning_rate": 1.9501039543962287e-05, "loss": 0.0958, "step": 716 }, { "epoch": 1.92, "learning_rate": 1.94974217238821e-05, "loss": 0.1638, "step": 718 }, { "epoch": 1.93, "learning_rate": 1.9493791173292924e-05, "loss": 0.1721, "step": 720 }, { "epoch": 1.93, "learning_rate": 1.949014789706121e-05, "loss": 0.1029, "step": 722 }, { "epoch": 1.94, "learning_rate": 1.9486491900070474e-05, "loss": 0.0275, "step": 724 }, { "epoch": 1.94, "learning_rate": 1.948282318722127e-05, "loss": 0.1131, "step": 726 }, { "epoch": 1.95, "learning_rate": 1.9479141763431212e-05, "loss": 0.2477, "step": 728 }, { "epoch": 1.95, "learning_rate": 1.947544763363494e-05, "loss": 0.0653, "step": 730 }, { "epoch": 1.96, "learning_rate": 1.947174080278413e-05, "loss": 0.2583, "step": 732 }, { "epoch": 1.97, "learning_rate": 1.9468021275847482e-05, "loss": 0.1673, "step": 734 }, { "epoch": 1.97, "learning_rate": 1.9464289057810716e-05, "loss": 0.1714, "step": 736 }, { "epoch": 1.98, "learning_rate": 1.9460544153676562e-05, "loss": 0.1566, "step": 738 }, { "epoch": 1.98, "learning_rate": 1.9456786568464754e-05, "loss": 0.0762, "step": 740 }, { "epoch": 1.99, "learning_rate": 1.9453016307212025e-05, "loss": 0.1156, "step": 742 }, { "epoch": 1.99, "learning_rate": 1.9449233374972098e-05, "loss": 0.0828, "step": 744 }, { "epoch": 2.0, "learning_rate": 1.9445437776815677e-05, "loss": 0.3062, "step": 746 }, { "epoch": 2.0, "learning_rate": 1.9441629517830458e-05, "loss": 0.1578, "step": 748 }, { "epoch": 2.01, "learning_rate": 1.9437808603121086e-05, "loss": 0.0462, "step": 750 }, { "epoch": 2.01, "learning_rate": 1.9433975037809188e-05, "loss": 0.1186, "step": 752 }, { "epoch": 2.02, "learning_rate": 1.943012882703334e-05, "loss": 0.2722, "step": 754 }, { "epoch": 2.02, "learning_rate": 1.9426269975949074e-05, "loss": 0.0921, "step": 756 }, { "epoch": 2.03, "learning_rate": 1.942239848972885e-05, "loss": 0.1172, "step": 758 }, { "epoch": 2.03, "learning_rate": 1.9418514373562084e-05, "loss": 0.1313, "step": 760 }, { "epoch": 2.04, "learning_rate": 1.9414617632655114e-05, "loss": 0.0992, "step": 762 }, { "epoch": 2.05, "learning_rate": 1.9410708272231194e-05, "loss": 0.1611, "step": 764 }, { "epoch": 2.05, "learning_rate": 1.94067862975305e-05, "loss": 0.1634, "step": 766 }, { "epoch": 2.06, "learning_rate": 1.940285171381012e-05, "loss": 0.0829, "step": 768 }, { "epoch": 2.06, "learning_rate": 1.939890452634403e-05, "loss": 0.204, "step": 770 }, { "epoch": 2.07, "learning_rate": 1.939494474042311e-05, "loss": 0.04, "step": 772 }, { "epoch": 2.07, "learning_rate": 1.9390972361355132e-05, "loss": 0.1016, "step": 774 }, { "epoch": 2.08, "learning_rate": 1.938698739446474e-05, "loss": 0.0964, "step": 776 }, { "epoch": 2.08, "learning_rate": 1.938298984509345e-05, "loss": 0.1185, "step": 778 }, { "epoch": 2.09, "learning_rate": 1.9378979718599647e-05, "loss": 0.075, "step": 780 }, { "epoch": 2.09, "learning_rate": 1.9374957020358575e-05, "loss": 0.2464, "step": 782 }, { "epoch": 2.1, "learning_rate": 1.937092175576233e-05, "loss": 0.0909, "step": 784 }, { "epoch": 2.1, "learning_rate": 1.936687393021985e-05, "loss": 0.1513, "step": 786 }, { "epoch": 2.11, "learning_rate": 1.9362813549156904e-05, "loss": 0.023, "step": 788 }, { "epoch": 2.12, "learning_rate": 1.9358740618016107e-05, "loss": 0.1411, "step": 790 }, { "epoch": 2.12, "learning_rate": 1.9354655142256883e-05, "loss": 0.0815, "step": 792 }, { "epoch": 2.13, "learning_rate": 1.9350557127355472e-05, "loss": 0.1032, "step": 794 }, { "epoch": 2.13, "learning_rate": 1.934644657880493e-05, "loss": 0.1151, "step": 796 }, { "epoch": 2.14, "learning_rate": 1.9342323502115103e-05, "loss": 0.0447, "step": 798 }, { "epoch": 2.14, "learning_rate": 1.9338187902812637e-05, "loss": 0.0726, "step": 800 }, { "epoch": 2.15, "learning_rate": 1.9334039786440955e-05, "loss": 0.1474, "step": 802 }, { "epoch": 2.15, "learning_rate": 1.9329879158560274e-05, "loss": 0.0885, "step": 804 }, { "epoch": 2.16, "learning_rate": 1.9325706024747565e-05, "loss": 0.065, "step": 806 }, { "epoch": 2.16, "learning_rate": 1.9321520390596575e-05, "loss": 0.0769, "step": 808 }, { "epoch": 2.17, "learning_rate": 1.9317322261717794e-05, "loss": 0.0403, "step": 810 }, { "epoch": 2.17, "learning_rate": 1.931311164373847e-05, "loss": 0.0973, "step": 812 }, { "epoch": 2.18, "learning_rate": 1.930888854230259e-05, "loss": 0.1384, "step": 814 }, { "epoch": 2.18, "learning_rate": 1.9304652963070868e-05, "loss": 0.073, "step": 816 }, { "epoch": 2.19, "learning_rate": 1.9300404911720752e-05, "loss": 0.1108, "step": 818 }, { "epoch": 2.2, "learning_rate": 1.92961443939464e-05, "loss": 0.1656, "step": 820 }, { "epoch": 2.2, "learning_rate": 1.9291871415458688e-05, "loss": 0.076, "step": 822 }, { "epoch": 2.21, "learning_rate": 1.928758598198519e-05, "loss": 0.0695, "step": 824 }, { "epoch": 2.21, "learning_rate": 1.9283288099270174e-05, "loss": 0.0686, "step": 826 }, { "epoch": 2.22, "learning_rate": 1.9278977773074597e-05, "loss": 0.1595, "step": 828 }, { "epoch": 2.22, "learning_rate": 1.9274655009176095e-05, "loss": 0.1474, "step": 830 }, { "epoch": 2.23, "learning_rate": 1.9270319813368977e-05, "loss": 0.108, "step": 832 }, { "epoch": 2.23, "learning_rate": 1.9265972191464213e-05, "loss": 0.1321, "step": 834 }, { "epoch": 2.24, "learning_rate": 1.926161214928943e-05, "loss": 0.0832, "step": 836 }, { "epoch": 2.24, "learning_rate": 1.9257239692688907e-05, "loss": 0.1419, "step": 838 }, { "epoch": 2.25, "learning_rate": 1.9252854827523557e-05, "loss": 0.2261, "step": 840 }, { "epoch": 2.25, "learning_rate": 1.9248457559670934e-05, "loss": 0.0724, "step": 842 }, { "epoch": 2.26, "learning_rate": 1.924404789502521e-05, "loss": 0.0696, "step": 844 }, { "epoch": 2.27, "learning_rate": 1.9239625839497176e-05, "loss": 0.0545, "step": 846 }, { "epoch": 2.27, "learning_rate": 1.9235191399014233e-05, "loss": 0.0425, "step": 848 }, { "epoch": 2.28, "learning_rate": 1.923074457952038e-05, "loss": 0.1309, "step": 850 }, { "epoch": 2.28, "learning_rate": 1.9226285386976212e-05, "loss": 0.0911, "step": 852 }, { "epoch": 2.29, "learning_rate": 1.922181382735891e-05, "loss": 0.0671, "step": 854 }, { "epoch": 2.29, "learning_rate": 1.921732990666223e-05, "loss": 0.0446, "step": 856 }, { "epoch": 2.3, "learning_rate": 1.921283363089649e-05, "loss": 0.0592, "step": 858 }, { "epoch": 2.3, "learning_rate": 1.9208325006088587e-05, "loss": 0.0862, "step": 860 }, { "epoch": 2.31, "learning_rate": 1.9203804038281954e-05, "loss": 0.0569, "step": 862 }, { "epoch": 2.31, "learning_rate": 1.9199270733536572e-05, "loss": 0.031, "step": 864 }, { "epoch": 2.32, "learning_rate": 1.9194725097928968e-05, "loss": 0.0716, "step": 866 }, { "epoch": 2.32, "learning_rate": 1.919016713755219e-05, "loss": 0.1314, "step": 868 }, { "epoch": 2.33, "learning_rate": 1.9185596858515797e-05, "loss": 0.0642, "step": 870 }, { "epoch": 2.33, "learning_rate": 1.918101426694589e-05, "loss": 0.0922, "step": 872 }, { "epoch": 2.34, "learning_rate": 1.917641936898503e-05, "loss": 0.0883, "step": 874 }, { "epoch": 2.35, "learning_rate": 1.917181217079232e-05, "loss": 0.0783, "step": 876 }, { "epoch": 2.35, "learning_rate": 1.9167192678543315e-05, "loss": 0.083, "step": 878 }, { "epoch": 2.36, "learning_rate": 1.916256089843007e-05, "loss": 0.0505, "step": 880 }, { "epoch": 2.36, "learning_rate": 1.9157916836661095e-05, "loss": 0.1175, "step": 882 }, { "epoch": 2.37, "learning_rate": 1.9153260499461382e-05, "loss": 0.0244, "step": 884 }, { "epoch": 2.37, "learning_rate": 1.9148591893072356e-05, "loss": 0.0529, "step": 886 }, { "epoch": 2.38, "learning_rate": 1.9143911023751907e-05, "loss": 0.0733, "step": 888 }, { "epoch": 2.38, "learning_rate": 1.9139217897774345e-05, "loss": 0.1076, "step": 890 }, { "epoch": 2.39, "learning_rate": 1.9134512521430424e-05, "loss": 0.1062, "step": 892 }, { "epoch": 2.39, "learning_rate": 1.912979490102731e-05, "loss": 0.0233, "step": 894 }, { "epoch": 2.4, "learning_rate": 1.9125065042888583e-05, "loss": 0.0755, "step": 896 }, { "epoch": 2.4, "learning_rate": 1.9120322953354228e-05, "loss": 0.0443, "step": 898 }, { "epoch": 2.41, "learning_rate": 1.911556863878062e-05, "loss": 0.0764, "step": 900 }, { "epoch": 2.41, "learning_rate": 1.911080210554053e-05, "loss": 0.0843, "step": 902 }, { "epoch": 2.42, "learning_rate": 1.91060233600231e-05, "loss": 0.07, "step": 904 }, { "epoch": 2.43, "learning_rate": 1.9101232408633844e-05, "loss": 0.0677, "step": 906 }, { "epoch": 2.43, "learning_rate": 1.909642925779464e-05, "loss": 0.1277, "step": 908 }, { "epoch": 2.44, "learning_rate": 1.9091613913943706e-05, "loss": 0.0606, "step": 910 }, { "epoch": 2.44, "learning_rate": 1.9086786383535614e-05, "loss": 0.0722, "step": 912 }, { "epoch": 2.45, "learning_rate": 1.9081946673041277e-05, "loss": 0.0396, "step": 914 }, { "epoch": 2.45, "learning_rate": 1.907709478894792e-05, "loss": 0.236, "step": 916 }, { "epoch": 2.46, "learning_rate": 1.9072230737759103e-05, "loss": 0.022, "step": 918 }, { "epoch": 2.46, "learning_rate": 1.9067354525994667e-05, "loss": 0.1932, "step": 920 }, { "epoch": 2.47, "learning_rate": 1.906246616019079e-05, "loss": 0.2579, "step": 922 }, { "epoch": 2.47, "learning_rate": 1.905756564689991e-05, "loss": 0.1123, "step": 924 }, { "epoch": 2.48, "learning_rate": 1.905265299269076e-05, "loss": 0.0479, "step": 926 }, { "epoch": 2.48, "learning_rate": 1.904772820414835e-05, "loss": 0.0828, "step": 928 }, { "epoch": 2.49, "learning_rate": 1.9042791287873958e-05, "loss": 0.0615, "step": 930 }, { "epoch": 2.5, "learning_rate": 1.9037842250485106e-05, "loss": 0.0761, "step": 932 }, { "epoch": 2.5, "learning_rate": 1.903288109861557e-05, "loss": 0.0392, "step": 934 }, { "epoch": 2.51, "learning_rate": 1.9027907838915363e-05, "loss": 0.1698, "step": 936 }, { "epoch": 2.51, "learning_rate": 1.9022922478050732e-05, "loss": 0.1444, "step": 938 }, { "epoch": 2.52, "learning_rate": 1.901792502270414e-05, "loss": 0.1097, "step": 940 }, { "epoch": 2.52, "learning_rate": 1.9012915479574263e-05, "loss": 0.0573, "step": 942 }, { "epoch": 2.53, "learning_rate": 1.9007893855375977e-05, "loss": 0.1112, "step": 944 }, { "epoch": 2.53, "learning_rate": 1.9002860156840355e-05, "loss": 0.082, "step": 946 }, { "epoch": 2.54, "learning_rate": 1.899781439071466e-05, "loss": 0.0248, "step": 948 }, { "epoch": 2.54, "learning_rate": 1.8992756563762316e-05, "loss": 0.1502, "step": 950 }, { "epoch": 2.55, "learning_rate": 1.8987686682762928e-05, "loss": 0.0899, "step": 952 }, { "epoch": 2.55, "learning_rate": 1.898260475451225e-05, "loss": 0.1252, "step": 954 }, { "epoch": 2.56, "learning_rate": 1.897751078582219e-05, "loss": 0.2221, "step": 956 }, { "epoch": 2.56, "learning_rate": 1.8972404783520786e-05, "loss": 0.0808, "step": 958 }, { "epoch": 2.57, "learning_rate": 1.8967286754452214e-05, "loss": 0.1635, "step": 960 }, { "epoch": 2.58, "learning_rate": 1.8962156705476773e-05, "loss": 0.2068, "step": 962 }, { "epoch": 2.58, "learning_rate": 1.895701464347087e-05, "loss": 0.2006, "step": 964 }, { "epoch": 2.59, "learning_rate": 1.895186057532701e-05, "loss": 0.2157, "step": 966 }, { "epoch": 2.59, "learning_rate": 1.8946694507953793e-05, "loss": 0.1479, "step": 968 }, { "epoch": 2.6, "learning_rate": 1.894151644827591e-05, "loss": 0.2523, "step": 970 }, { "epoch": 2.6, "learning_rate": 1.8936326403234125e-05, "loss": 0.2886, "step": 972 }, { "epoch": 2.61, "learning_rate": 1.893112437978526e-05, "loss": 0.1352, "step": 974 }, { "epoch": 2.61, "learning_rate": 1.8925910384902195e-05, "loss": 0.1098, "step": 976 }, { "epoch": 2.62, "learning_rate": 1.8920684425573865e-05, "loss": 0.1046, "step": 978 }, { "epoch": 2.62, "learning_rate": 1.8915446508805235e-05, "loss": 0.115, "step": 980 }, { "epoch": 2.63, "learning_rate": 1.89101966416173e-05, "loss": 0.1473, "step": 982 }, { "epoch": 2.63, "learning_rate": 1.8904934831047073e-05, "loss": 0.1107, "step": 984 }, { "epoch": 2.64, "learning_rate": 1.889966108414757e-05, "loss": 0.1637, "step": 986 }, { "epoch": 2.65, "learning_rate": 1.8894375407987824e-05, "loss": 0.1841, "step": 988 }, { "epoch": 2.65, "learning_rate": 1.8889077809652837e-05, "loss": 0.0548, "step": 990 }, { "epoch": 2.66, "learning_rate": 1.888376829624361e-05, "loss": 0.047, "step": 992 }, { "epoch": 2.66, "learning_rate": 1.8878446874877103e-05, "loss": 0.1024, "step": 994 }, { "epoch": 2.67, "learning_rate": 1.887311355268624e-05, "loss": 0.0568, "step": 996 }, { "epoch": 2.67, "learning_rate": 1.8867768336819902e-05, "loss": 0.0564, "step": 998 }, { "epoch": 2.68, "learning_rate": 1.8862411234442905e-05, "loss": 0.1072, "step": 1000 }, { "epoch": 2.68, "learning_rate": 1.8857042252736004e-05, "loss": 0.2154, "step": 1002 }, { "epoch": 2.69, "learning_rate": 1.885166139889588e-05, "loss": 0.1573, "step": 1004 }, { "epoch": 2.69, "learning_rate": 1.8846268680135115e-05, "loss": 0.066, "step": 1006 }, { "epoch": 2.7, "learning_rate": 1.884086410368221e-05, "loss": 0.1834, "step": 1008 }, { "epoch": 2.7, "learning_rate": 1.8835447676781545e-05, "loss": 0.1386, "step": 1010 }, { "epoch": 2.71, "learning_rate": 1.8830019406693403e-05, "loss": 0.0911, "step": 1012 }, { "epoch": 2.71, "learning_rate": 1.8824579300693923e-05, "loss": 0.0647, "step": 1014 }, { "epoch": 2.72, "learning_rate": 1.8819127366075122e-05, "loss": 0.129, "step": 1016 }, { "epoch": 2.73, "learning_rate": 1.8813663610144867e-05, "loss": 0.0405, "step": 1018 }, { "epoch": 2.73, "learning_rate": 1.880818804022687e-05, "loss": 0.1358, "step": 1020 }, { "epoch": 2.74, "learning_rate": 1.8802700663660677e-05, "loss": 0.0919, "step": 1022 }, { "epoch": 2.74, "learning_rate": 1.879720148780167e-05, "loss": 0.0831, "step": 1024 }, { "epoch": 2.75, "learning_rate": 1.879169052002104e-05, "loss": 0.1327, "step": 1026 }, { "epoch": 2.75, "learning_rate": 1.8786167767705774e-05, "loss": 0.0611, "step": 1028 }, { "epoch": 2.76, "learning_rate": 1.8780633238258674e-05, "loss": 0.1422, "step": 1030 }, { "epoch": 2.76, "learning_rate": 1.877508693909831e-05, "loss": 0.0587, "step": 1032 }, { "epoch": 2.77, "learning_rate": 1.8769528877659057e-05, "loss": 0.0787, "step": 1034 }, { "epoch": 2.77, "learning_rate": 1.8763959061391017e-05, "loss": 0.1229, "step": 1036 }, { "epoch": 2.78, "learning_rate": 1.8758377497760074e-05, "loss": 0.1729, "step": 1038 }, { "epoch": 2.78, "learning_rate": 1.875278419424786e-05, "loss": 0.0327, "step": 1040 }, { "epoch": 2.79, "learning_rate": 1.8747179158351727e-05, "loss": 0.0853, "step": 1042 }, { "epoch": 2.8, "learning_rate": 1.874156239758477e-05, "loss": 0.1016, "step": 1044 }, { "epoch": 2.8, "learning_rate": 1.8735933919475784e-05, "loss": 0.188, "step": 1046 }, { "epoch": 2.81, "learning_rate": 1.8730293731569285e-05, "loss": 0.0866, "step": 1048 }, { "epoch": 2.81, "learning_rate": 1.872464184142548e-05, "loss": 0.1136, "step": 1050 }, { "epoch": 2.82, "learning_rate": 1.8718978256620257e-05, "loss": 0.1952, "step": 1052 }, { "epoch": 2.82, "learning_rate": 1.8713302984745185e-05, "loss": 0.0245, "step": 1054 }, { "epoch": 2.83, "learning_rate": 1.87076160334075e-05, "loss": 0.0684, "step": 1056 }, { "epoch": 2.83, "learning_rate": 1.8701917410230082e-05, "loss": 0.0476, "step": 1058 }, { "epoch": 2.84, "learning_rate": 1.8696207122851468e-05, "loss": 0.1612, "step": 1060 }, { "epoch": 2.84, "learning_rate": 1.869048517892583e-05, "loss": 0.1529, "step": 1062 }, { "epoch": 2.85, "learning_rate": 1.868475158612296e-05, "loss": 0.0972, "step": 1064 }, { "epoch": 2.85, "learning_rate": 1.8679006352128264e-05, "loss": 0.1108, "step": 1066 }, { "epoch": 2.86, "learning_rate": 1.8673249484642752e-05, "loss": 0.0894, "step": 1068 }, { "epoch": 2.86, "learning_rate": 1.8667480991383034e-05, "loss": 0.2866, "step": 1070 }, { "epoch": 2.87, "learning_rate": 1.866170088008129e-05, "loss": 0.0613, "step": 1072 }, { "epoch": 2.88, "learning_rate": 1.865590915848529e-05, "loss": 0.1467, "step": 1074 }, { "epoch": 2.88, "learning_rate": 1.8650105834358353e-05, "loss": 0.0867, "step": 1076 }, { "epoch": 2.89, "learning_rate": 1.864429091547936e-05, "loss": 0.2163, "step": 1078 }, { "epoch": 2.89, "learning_rate": 1.8638464409642724e-05, "loss": 0.1838, "step": 1080 }, { "epoch": 2.9, "learning_rate": 1.8632626324658397e-05, "loss": 0.035, "step": 1082 }, { "epoch": 2.9, "learning_rate": 1.8626776668351852e-05, "loss": 0.035, "step": 1084 }, { "epoch": 2.91, "learning_rate": 1.862091544856407e-05, "loss": 0.1265, "step": 1086 }, { "epoch": 2.91, "learning_rate": 1.8615042673151522e-05, "loss": 0.077, "step": 1088 }, { "epoch": 2.92, "learning_rate": 1.860915834998619e-05, "loss": 0.0633, "step": 1090 }, { "epoch": 2.92, "learning_rate": 1.8603262486955516e-05, "loss": 0.0445, "step": 1092 }, { "epoch": 2.93, "learning_rate": 1.859735509196242e-05, "loss": 0.0858, "step": 1094 }, { "epoch": 2.93, "learning_rate": 1.8591436172925278e-05, "loss": 0.0913, "step": 1096 }, { "epoch": 2.94, "learning_rate": 1.858550573777791e-05, "loss": 0.1103, "step": 1098 }, { "epoch": 2.95, "learning_rate": 1.8579563794469575e-05, "loss": 0.0815, "step": 1100 }, { "epoch": 2.95, "learning_rate": 1.8573610350964956e-05, "loss": 0.0471, "step": 1102 }, { "epoch": 2.96, "learning_rate": 1.856764541524415e-05, "loss": 0.0484, "step": 1104 }, { "epoch": 2.96, "learning_rate": 1.8561668995302668e-05, "loss": 0.1788, "step": 1106 }, { "epoch": 2.97, "learning_rate": 1.8555681099151397e-05, "loss": 0.1511, "step": 1108 }, { "epoch": 2.97, "learning_rate": 1.8549681734816624e-05, "loss": 0.0559, "step": 1110 }, { "epoch": 2.98, "learning_rate": 1.8543670910339998e-05, "loss": 0.037, "step": 1112 }, { "epoch": 2.98, "learning_rate": 1.8537648633778537e-05, "loss": 0.1399, "step": 1114 }, { "epoch": 2.99, "learning_rate": 1.85316149132046e-05, "loss": 0.1177, "step": 1116 }, { "epoch": 2.99, "learning_rate": 1.852556975670589e-05, "loss": 0.0615, "step": 1118 }, { "epoch": 3.0, "learning_rate": 1.8519513172385445e-05, "loss": 0.0485, "step": 1120 }, { "epoch": 3.0, "learning_rate": 1.8513445168361613e-05, "loss": 0.0729, "step": 1122 }, { "epoch": 3.01, "learning_rate": 1.850736575276805e-05, "loss": 0.1278, "step": 1124 }, { "epoch": 3.01, "learning_rate": 1.8501274933753713e-05, "loss": 0.131, "step": 1126 }, { "epoch": 3.02, "learning_rate": 1.8495172719482842e-05, "loss": 0.1208, "step": 1128 }, { "epoch": 3.03, "learning_rate": 1.848905911813495e-05, "loss": 0.0434, "step": 1130 }, { "epoch": 3.03, "learning_rate": 1.8482934137904813e-05, "loss": 0.0925, "step": 1132 }, { "epoch": 3.04, "learning_rate": 1.847679778700246e-05, "loss": 0.0534, "step": 1134 }, { "epoch": 3.04, "learning_rate": 1.8470650073653166e-05, "loss": 0.0469, "step": 1136 }, { "epoch": 3.05, "learning_rate": 1.8464491006097432e-05, "loss": 0.0925, "step": 1138 }, { "epoch": 3.05, "learning_rate": 1.8458320592590976e-05, "loss": 0.072, "step": 1140 }, { "epoch": 3.06, "learning_rate": 1.845213884140473e-05, "loss": 0.0889, "step": 1142 }, { "epoch": 3.06, "learning_rate": 1.8445945760824823e-05, "loss": 0.1202, "step": 1144 }, { "epoch": 3.07, "learning_rate": 1.843974135915256e-05, "loss": 0.0961, "step": 1146 }, { "epoch": 3.07, "learning_rate": 1.8433525644704438e-05, "loss": 0.0561, "step": 1148 }, { "epoch": 3.08, "learning_rate": 1.84272986258121e-05, "loss": 0.1229, "step": 1150 }, { "epoch": 3.08, "learning_rate": 1.8421060310822358e-05, "loss": 0.0848, "step": 1152 }, { "epoch": 3.09, "learning_rate": 1.8414810708097154e-05, "loss": 0.1847, "step": 1154 }, { "epoch": 3.1, "learning_rate": 1.8408549826013563e-05, "loss": 0.1095, "step": 1156 }, { "epoch": 3.1, "learning_rate": 1.8402277672963782e-05, "loss": 0.1686, "step": 1158 }, { "epoch": 3.11, "learning_rate": 1.8395994257355113e-05, "loss": 0.0163, "step": 1160 }, { "epoch": 3.11, "learning_rate": 1.8389699587609952e-05, "loss": 0.0555, "step": 1162 }, { "epoch": 3.12, "learning_rate": 1.8383393672165792e-05, "loss": 0.1175, "step": 1164 }, { "epoch": 3.12, "learning_rate": 1.8377076519475184e-05, "loss": 0.0771, "step": 1166 }, { "epoch": 3.13, "learning_rate": 1.8370748138005755e-05, "loss": 0.1123, "step": 1168 }, { "epoch": 3.13, "learning_rate": 1.836440853624017e-05, "loss": 0.1189, "step": 1170 }, { "epoch": 3.14, "learning_rate": 1.8358057722676146e-05, "loss": 0.044, "step": 1172 }, { "epoch": 3.14, "learning_rate": 1.8351695705826428e-05, "loss": 0.0426, "step": 1174 }, { "epoch": 3.15, "learning_rate": 1.8345322494218763e-05, "loss": 0.0463, "step": 1176 }, { "epoch": 3.15, "learning_rate": 1.8338938096395923e-05, "loss": 0.0266, "step": 1178 }, { "epoch": 3.16, "learning_rate": 1.8332542520915667e-05, "loss": 0.091, "step": 1180 }, { "epoch": 3.16, "learning_rate": 1.8326135776350727e-05, "loss": 0.0504, "step": 1182 }, { "epoch": 3.17, "learning_rate": 1.831971787128882e-05, "loss": 0.1485, "step": 1184 }, { "epoch": 3.18, "learning_rate": 1.8313288814332617e-05, "loss": 0.0809, "step": 1186 }, { "epoch": 3.18, "learning_rate": 1.830684861409974e-05, "loss": 0.0136, "step": 1188 }, { "epoch": 3.19, "learning_rate": 1.8300397279222738e-05, "loss": 0.0378, "step": 1190 }, { "epoch": 3.19, "learning_rate": 1.82939348183491e-05, "loss": 0.0719, "step": 1192 }, { "epoch": 3.2, "learning_rate": 1.8287461240141217e-05, "loss": 0.047, "step": 1194 }, { "epoch": 3.2, "learning_rate": 1.8280976553276386e-05, "loss": 0.0994, "step": 1196 }, { "epoch": 3.21, "learning_rate": 1.8274480766446796e-05, "loss": 0.0794, "step": 1198 }, { "epoch": 3.21, "learning_rate": 1.826797388835951e-05, "loss": 0.0197, "step": 1200 }, { "epoch": 3.22, "learning_rate": 1.826145592773646e-05, "loss": 0.1017, "step": 1202 }, { "epoch": 3.22, "learning_rate": 1.8254926893314436e-05, "loss": 0.0404, "step": 1204 }, { "epoch": 3.23, "learning_rate": 1.8248386793845063e-05, "loss": 0.0704, "step": 1206 }, { "epoch": 3.23, "learning_rate": 1.8241835638094813e-05, "loss": 0.0804, "step": 1208 }, { "epoch": 3.24, "learning_rate": 1.8235273434844964e-05, "loss": 0.176, "step": 1210 }, { "epoch": 3.24, "learning_rate": 1.8228700192891605e-05, "loss": 0.0694, "step": 1212 }, { "epoch": 3.25, "learning_rate": 1.822211592104563e-05, "loss": 0.019, "step": 1214 }, { "epoch": 3.26, "learning_rate": 1.8215520628132708e-05, "loss": 0.0394, "step": 1216 }, { "epoch": 3.26, "learning_rate": 1.820891432299328e-05, "loss": 0.0928, "step": 1218 }, { "epoch": 3.27, "learning_rate": 1.8202297014482558e-05, "loss": 0.0592, "step": 1220 }, { "epoch": 3.27, "learning_rate": 1.8195668711470496e-05, "loss": 0.067, "step": 1222 }, { "epoch": 3.28, "learning_rate": 1.8189029422841786e-05, "loss": 0.0275, "step": 1224 }, { "epoch": 3.28, "learning_rate": 1.8182379157495842e-05, "loss": 0.0976, "step": 1226 }, { "epoch": 3.29, "learning_rate": 1.81757179243468e-05, "loss": 0.1629, "step": 1228 }, { "epoch": 3.29, "learning_rate": 1.8169045732323495e-05, "loss": 0.0132, "step": 1230 }, { "epoch": 3.3, "learning_rate": 1.816236259036944e-05, "loss": 0.0918, "step": 1232 }, { "epoch": 3.3, "learning_rate": 1.815566850744284e-05, "loss": 0.044, "step": 1234 }, { "epoch": 3.31, "learning_rate": 1.814896349251656e-05, "loss": 0.1429, "step": 1236 }, { "epoch": 3.31, "learning_rate": 1.814224755457812e-05, "loss": 0.0263, "step": 1238 }, { "epoch": 3.32, "learning_rate": 1.8135520702629677e-05, "loss": 0.0431, "step": 1240 }, { "epoch": 3.33, "learning_rate": 1.8128782945688022e-05, "loss": 0.0449, "step": 1242 }, { "epoch": 3.33, "learning_rate": 1.8122034292784558e-05, "loss": 0.0439, "step": 1244 }, { "epoch": 3.34, "learning_rate": 1.81152747529653e-05, "loss": 0.0652, "step": 1246 }, { "epoch": 3.34, "learning_rate": 1.8108504335290852e-05, "loss": 0.0329, "step": 1248 }, { "epoch": 3.35, "learning_rate": 1.81017230488364e-05, "loss": 0.1611, "step": 1250 }, { "epoch": 3.35, "learning_rate": 1.8094930902691693e-05, "loss": 0.0335, "step": 1252 }, { "epoch": 3.36, "learning_rate": 1.8088127905961047e-05, "loss": 0.0159, "step": 1254 }, { "epoch": 3.36, "learning_rate": 1.8081314067763318e-05, "loss": 0.0628, "step": 1256 }, { "epoch": 3.37, "learning_rate": 1.807448939723189e-05, "loss": 0.0439, "step": 1258 }, { "epoch": 3.37, "learning_rate": 1.8067653903514674e-05, "loss": 0.0797, "step": 1260 }, { "epoch": 3.38, "learning_rate": 1.8060807595774075e-05, "loss": 0.1888, "step": 1262 }, { "epoch": 3.38, "learning_rate": 1.805395048318701e-05, "loss": 0.1524, "step": 1264 }, { "epoch": 3.39, "learning_rate": 1.804708257494487e-05, "loss": 0.0476, "step": 1266 }, { "epoch": 3.39, "learning_rate": 1.804020388025352e-05, "loss": 0.0281, "step": 1268 }, { "epoch": 3.4, "learning_rate": 1.8033314408333283e-05, "loss": 0.0778, "step": 1270 }, { "epoch": 3.41, "learning_rate": 1.802641416841892e-05, "loss": 0.0491, "step": 1272 }, { "epoch": 3.41, "learning_rate": 1.801950316975964e-05, "loss": 0.0373, "step": 1274 }, { "epoch": 3.42, "learning_rate": 1.8012581421619065e-05, "loss": 0.154, "step": 1276 }, { "epoch": 3.42, "learning_rate": 1.800564893327522e-05, "loss": 0.0848, "step": 1278 }, { "epoch": 3.43, "learning_rate": 1.7998705714020536e-05, "loss": 0.2885, "step": 1280 }, { "epoch": 3.43, "learning_rate": 1.7991751773161827e-05, "loss": 0.182, "step": 1282 }, { "epoch": 3.44, "learning_rate": 1.7984787120020275e-05, "loss": 0.0598, "step": 1284 }, { "epoch": 3.44, "learning_rate": 1.797781176393142e-05, "loss": 0.0453, "step": 1286 }, { "epoch": 3.45, "learning_rate": 1.7970825714245153e-05, "loss": 0.1592, "step": 1288 }, { "epoch": 3.45, "learning_rate": 1.7963828980325696e-05, "loss": 0.0631, "step": 1290 }, { "epoch": 3.46, "learning_rate": 1.7956821571551593e-05, "loss": 0.1016, "step": 1292 }, { "epoch": 3.46, "learning_rate": 1.7949803497315693e-05, "loss": 0.0887, "step": 1294 }, { "epoch": 3.47, "learning_rate": 1.7942774767025145e-05, "loss": 0.1221, "step": 1296 }, { "epoch": 3.48, "learning_rate": 1.7935735390101387e-05, "loss": 0.0674, "step": 1298 }, { "epoch": 3.48, "learning_rate": 1.7928685375980116e-05, "loss": 0.0857, "step": 1300 }, { "epoch": 3.49, "learning_rate": 1.7921624734111292e-05, "loss": 0.1339, "step": 1302 }, { "epoch": 3.49, "learning_rate": 1.791455347395913e-05, "loss": 0.1081, "step": 1304 }, { "epoch": 3.5, "learning_rate": 1.7907471605002066e-05, "loss": 0.0197, "step": 1306 }, { "epoch": 3.5, "learning_rate": 1.7900379136732756e-05, "loss": 0.0604, "step": 1308 }, { "epoch": 3.51, "learning_rate": 1.7893276078658073e-05, "loss": 0.1054, "step": 1310 }, { "epoch": 3.51, "learning_rate": 1.7886162440299074e-05, "loss": 0.0498, "step": 1312 }, { "epoch": 3.52, "learning_rate": 1.787903823119101e-05, "loss": 0.1311, "step": 1314 }, { "epoch": 3.52, "learning_rate": 1.787190346088329e-05, "loss": 0.0491, "step": 1316 }, { "epoch": 3.53, "learning_rate": 1.7864758138939486e-05, "loss": 0.0391, "step": 1318 }, { "epoch": 3.53, "learning_rate": 1.785760227493731e-05, "loss": 0.0538, "step": 1320 }, { "epoch": 3.54, "learning_rate": 1.7850435878468607e-05, "loss": 0.1496, "step": 1322 }, { "epoch": 3.54, "learning_rate": 1.7843258959139338e-05, "loss": 0.0539, "step": 1324 }, { "epoch": 3.55, "learning_rate": 1.7836071526569573e-05, "loss": 0.0358, "step": 1326 }, { "epoch": 3.56, "learning_rate": 1.782887359039347e-05, "loss": 0.0614, "step": 1328 }, { "epoch": 3.56, "learning_rate": 1.782166516025927e-05, "loss": 0.1167, "step": 1330 }, { "epoch": 3.57, "learning_rate": 1.7814446245829272e-05, "loss": 0.1015, "step": 1332 }, { "epoch": 3.57, "learning_rate": 1.7807216856779842e-05, "loss": 0.1844, "step": 1334 }, { "epoch": 3.58, "learning_rate": 1.7799977002801375e-05, "loss": 0.1855, "step": 1336 }, { "epoch": 3.58, "learning_rate": 1.7792726693598293e-05, "loss": 0.06, "step": 1338 }, { "epoch": 3.59, "learning_rate": 1.778546593888904e-05, "loss": 0.0283, "step": 1340 }, { "epoch": 3.59, "learning_rate": 1.7778194748406063e-05, "loss": 0.1074, "step": 1342 }, { "epoch": 3.6, "learning_rate": 1.7770913131895778e-05, "loss": 0.0332, "step": 1344 }, { "epoch": 3.6, "learning_rate": 1.7763621099118604e-05, "loss": 0.1431, "step": 1346 }, { "epoch": 3.61, "learning_rate": 1.7756318659848902e-05, "loss": 0.0818, "step": 1348 }, { "epoch": 3.61, "learning_rate": 1.774900582387499e-05, "loss": 0.1438, "step": 1350 }, { "epoch": 3.62, "learning_rate": 1.774168260099912e-05, "loss": 0.0837, "step": 1352 }, { "epoch": 3.63, "learning_rate": 1.7734349001037468e-05, "loss": 0.0746, "step": 1354 }, { "epoch": 3.63, "learning_rate": 1.7727005033820117e-05, "loss": 0.0327, "step": 1356 }, { "epoch": 3.64, "learning_rate": 1.7719650709191046e-05, "loss": 0.2153, "step": 1358 }, { "epoch": 3.64, "learning_rate": 1.7712286037008125e-05, "loss": 0.1408, "step": 1360 }, { "epoch": 3.65, "learning_rate": 1.7704911027143087e-05, "loss": 0.124, "step": 1362 }, { "epoch": 3.65, "learning_rate": 1.769752568948152e-05, "loss": 0.1465, "step": 1364 }, { "epoch": 3.66, "learning_rate": 1.769013003392286e-05, "loss": 0.0773, "step": 1366 }, { "epoch": 3.66, "learning_rate": 1.7682724070380376e-05, "loss": 0.0492, "step": 1368 }, { "epoch": 3.67, "learning_rate": 1.7675307808781145e-05, "loss": 0.0718, "step": 1370 }, { "epoch": 3.67, "learning_rate": 1.7667881259066056e-05, "loss": 0.2018, "step": 1372 }, { "epoch": 3.68, "learning_rate": 1.766044443118978e-05, "loss": 0.0891, "step": 1374 }, { "epoch": 3.68, "learning_rate": 1.765299733512078e-05, "loss": 0.0649, "step": 1376 }, { "epoch": 3.69, "learning_rate": 1.7645539980841265e-05, "loss": 0.1352, "step": 1378 }, { "epoch": 3.69, "learning_rate": 1.7638072378347205e-05, "loss": 0.1002, "step": 1380 }, { "epoch": 3.7, "learning_rate": 1.7630594537648302e-05, "loss": 0.0737, "step": 1382 }, { "epoch": 3.71, "learning_rate": 1.7623106468767986e-05, "loss": 0.1771, "step": 1384 }, { "epoch": 3.71, "learning_rate": 1.7615608181743394e-05, "loss": 0.0215, "step": 1386 }, { "epoch": 3.72, "learning_rate": 1.760809968662536e-05, "loss": 0.0768, "step": 1388 }, { "epoch": 3.72, "learning_rate": 1.76005809934784e-05, "loss": 0.0856, "step": 1390 }, { "epoch": 3.73, "learning_rate": 1.75930521123807e-05, "loss": 0.0623, "step": 1392 }, { "epoch": 3.73, "learning_rate": 1.7585513053424108e-05, "loss": 0.0477, "step": 1394 }, { "epoch": 3.74, "learning_rate": 1.7577963826714103e-05, "loss": 0.0471, "step": 1396 }, { "epoch": 3.74, "learning_rate": 1.7570404442369805e-05, "loss": 0.1482, "step": 1398 }, { "epoch": 3.75, "learning_rate": 1.7562834910523942e-05, "loss": 0.0465, "step": 1400 }, { "epoch": 3.75, "learning_rate": 1.7555255241322842e-05, "loss": 0.0325, "step": 1402 }, { "epoch": 3.76, "learning_rate": 1.7547665444926437e-05, "loss": 0.0574, "step": 1404 }, { "epoch": 3.76, "learning_rate": 1.7540065531508207e-05, "loss": 0.1029, "step": 1406 }, { "epoch": 3.77, "learning_rate": 1.7532455511255215e-05, "loss": 0.0255, "step": 1408 }, { "epoch": 3.78, "learning_rate": 1.752483539436807e-05, "loss": 0.0301, "step": 1410 }, { "epoch": 3.78, "learning_rate": 1.7517205191060895e-05, "loss": 0.116, "step": 1412 }, { "epoch": 3.79, "learning_rate": 1.7509564911561358e-05, "loss": 0.0903, "step": 1414 }, { "epoch": 3.79, "learning_rate": 1.750191456611062e-05, "loss": 0.0291, "step": 1416 }, { "epoch": 3.8, "learning_rate": 1.749425416496334e-05, "loss": 0.0682, "step": 1418 }, { "epoch": 3.8, "learning_rate": 1.7486583718387643e-05, "loss": 0.092, "step": 1420 }, { "epoch": 3.81, "learning_rate": 1.7478903236665137e-05, "loss": 0.0884, "step": 1422 }, { "epoch": 3.81, "learning_rate": 1.747121273009087e-05, "loss": 0.0334, "step": 1424 }, { "epoch": 3.82, "learning_rate": 1.746351220897333e-05, "loss": 0.0243, "step": 1426 }, { "epoch": 3.82, "learning_rate": 1.7455801683634433e-05, "loss": 0.1882, "step": 1428 }, { "epoch": 3.83, "learning_rate": 1.74480811644095e-05, "loss": 0.0316, "step": 1430 }, { "epoch": 3.83, "learning_rate": 1.7440350661647244e-05, "loss": 0.109, "step": 1432 }, { "epoch": 3.84, "learning_rate": 1.743261018570977e-05, "loss": 0.088, "step": 1434 }, { "epoch": 3.84, "learning_rate": 1.7424859746972543e-05, "loss": 0.0791, "step": 1436 }, { "epoch": 3.85, "learning_rate": 1.7417099355824393e-05, "loss": 0.0958, "step": 1438 }, { "epoch": 3.86, "learning_rate": 1.740932902266747e-05, "loss": 0.0772, "step": 1440 }, { "epoch": 3.86, "learning_rate": 1.740154875791728e-05, "loss": 0.0482, "step": 1442 }, { "epoch": 3.87, "learning_rate": 1.739375857200261e-05, "loss": 0.0536, "step": 1444 }, { "epoch": 3.87, "learning_rate": 1.738595847536557e-05, "loss": 0.0931, "step": 1446 }, { "epoch": 3.88, "learning_rate": 1.737814847846154e-05, "loss": 0.0627, "step": 1448 }, { "epoch": 3.88, "learning_rate": 1.7370328591759177e-05, "loss": 0.1294, "step": 1450 }, { "epoch": 3.89, "learning_rate": 1.7362498825740395e-05, "loss": 0.1131, "step": 1452 }, { "epoch": 3.89, "learning_rate": 1.7354659190900345e-05, "loss": 0.0494, "step": 1454 }, { "epoch": 3.9, "learning_rate": 1.734680969774741e-05, "loss": 0.1112, "step": 1456 }, { "epoch": 3.9, "learning_rate": 1.733895035680319e-05, "loss": 0.1257, "step": 1458 }, { "epoch": 3.91, "learning_rate": 1.733108117860248e-05, "loss": 0.0781, "step": 1460 }, { "epoch": 3.91, "learning_rate": 1.7323202173693263e-05, "loss": 0.0521, "step": 1462 }, { "epoch": 3.92, "learning_rate": 1.731531335263669e-05, "loss": 0.2669, "step": 1464 }, { "epoch": 3.93, "learning_rate": 1.7307414726007082e-05, "loss": 0.0814, "step": 1466 }, { "epoch": 3.93, "learning_rate": 1.729950630439189e-05, "loss": 0.0594, "step": 1468 }, { "epoch": 3.94, "learning_rate": 1.72915880983917e-05, "loss": 0.0881, "step": 1470 }, { "epoch": 3.94, "learning_rate": 1.7283660118620214e-05, "loss": 0.1478, "step": 1472 }, { "epoch": 3.95, "learning_rate": 1.7275722375704237e-05, "loss": 0.1106, "step": 1474 }, { "epoch": 3.95, "learning_rate": 1.726777488028365e-05, "loss": 0.1008, "step": 1476 }, { "epoch": 3.96, "learning_rate": 1.7259817643011418e-05, "loss": 0.0837, "step": 1478 }, { "epoch": 3.96, "learning_rate": 1.725185067455356e-05, "loss": 0.1195, "step": 1480 }, { "epoch": 3.97, "learning_rate": 1.7243873985589135e-05, "loss": 0.144, "step": 1482 }, { "epoch": 3.97, "learning_rate": 1.7235887586810246e-05, "loss": 0.047, "step": 1484 }, { "epoch": 3.98, "learning_rate": 1.722789148892199e-05, "loss": 0.0704, "step": 1486 }, { "epoch": 3.98, "learning_rate": 1.721988570264248e-05, "loss": 0.0973, "step": 1488 }, { "epoch": 3.99, "learning_rate": 1.7211870238702807e-05, "loss": 0.0283, "step": 1490 }, { "epoch": 3.99, "learning_rate": 1.7203845107847043e-05, "loss": 0.0951, "step": 1492 }, { "epoch": 4.0, "learning_rate": 1.719581032083221e-05, "loss": 0.1429, "step": 1494 }, { "epoch": 4.01, "learning_rate": 1.7187765888428277e-05, "loss": 0.052, "step": 1496 }, { "epoch": 4.01, "learning_rate": 1.717971182141814e-05, "loss": 0.1517, "step": 1498 }, { "epoch": 4.02, "learning_rate": 1.717164813059761e-05, "loss": 0.0139, "step": 1500 }, { "epoch": 4.02, "learning_rate": 1.71635748267754e-05, "loss": 0.0209, "step": 1502 }, { "epoch": 4.03, "learning_rate": 1.715549192077311e-05, "loss": 0.0251, "step": 1504 }, { "epoch": 4.03, "learning_rate": 1.7147399423425196e-05, "loss": 0.0708, "step": 1506 }, { "epoch": 4.04, "learning_rate": 1.7139297345578992e-05, "loss": 0.0193, "step": 1508 }, { "epoch": 4.04, "learning_rate": 1.713118569809466e-05, "loss": 0.0504, "step": 1510 }, { "epoch": 4.05, "learning_rate": 1.7123064491845195e-05, "loss": 0.0565, "step": 1512 }, { "epoch": 4.05, "learning_rate": 1.7114933737716404e-05, "loss": 0.1954, "step": 1514 }, { "epoch": 4.06, "learning_rate": 1.7106793446606888e-05, "loss": 0.0191, "step": 1516 }, { "epoch": 4.06, "learning_rate": 1.7098643629428035e-05, "loss": 0.1474, "step": 1518 }, { "epoch": 4.07, "learning_rate": 1.7090484297104e-05, "loss": 0.1289, "step": 1520 }, { "epoch": 4.07, "learning_rate": 1.70823154605717e-05, "loss": 0.0607, "step": 1522 }, { "epoch": 4.08, "learning_rate": 1.707413713078078e-05, "loss": 0.0417, "step": 1524 }, { "epoch": 4.09, "learning_rate": 1.706594931869361e-05, "loss": 0.0449, "step": 1526 }, { "epoch": 4.09, "learning_rate": 1.705775203528529e-05, "loss": 0.072, "step": 1528 }, { "epoch": 4.1, "learning_rate": 1.704954529154359e-05, "loss": 0.0213, "step": 1530 }, { "epoch": 4.1, "learning_rate": 1.7041329098468974e-05, "loss": 0.0388, "step": 1532 }, { "epoch": 4.11, "learning_rate": 1.703310346707457e-05, "loss": 0.0444, "step": 1534 }, { "epoch": 4.11, "learning_rate": 1.702486840838616e-05, "loss": 0.0934, "step": 1536 }, { "epoch": 4.12, "learning_rate": 1.7016623933442155e-05, "loss": 0.0324, "step": 1538 }, { "epoch": 4.12, "learning_rate": 1.7008370053293598e-05, "loss": 0.0901, "step": 1540 }, { "epoch": 4.13, "learning_rate": 1.7000106779004126e-05, "loss": 0.1236, "step": 1542 }, { "epoch": 4.13, "learning_rate": 1.699183412164998e-05, "loss": 0.1974, "step": 1544 }, { "epoch": 4.14, "learning_rate": 1.6983552092319974e-05, "loss": 0.065, "step": 1546 }, { "epoch": 4.14, "learning_rate": 1.697526070211548e-05, "loss": 0.0943, "step": 1548 }, { "epoch": 4.15, "learning_rate": 1.696695996215043e-05, "loss": 0.0088, "step": 1550 }, { "epoch": 4.16, "learning_rate": 1.695864988355127e-05, "loss": 0.0114, "step": 1552 }, { "epoch": 4.16, "learning_rate": 1.6950330477456976e-05, "loss": 0.0445, "step": 1554 }, { "epoch": 4.17, "learning_rate": 1.6942001755019034e-05, "loss": 0.1166, "step": 1556 }, { "epoch": 4.17, "learning_rate": 1.6933663727401394e-05, "loss": 0.027, "step": 1558 }, { "epoch": 4.18, "learning_rate": 1.69253164057805e-05, "loss": 0.1758, "step": 1560 }, { "epoch": 4.18, "learning_rate": 1.6916959801345246e-05, "loss": 0.1335, "step": 1562 }, { "epoch": 4.19, "learning_rate": 1.690859392529697e-05, "loss": 0.2288, "step": 1564 }, { "epoch": 4.19, "learning_rate": 1.6900218788849438e-05, "loss": 0.023, "step": 1566 }, { "epoch": 4.2, "learning_rate": 1.689183440322883e-05, "loss": 0.023, "step": 1568 }, { "epoch": 4.2, "learning_rate": 1.6883440779673717e-05, "loss": 0.084, "step": 1570 }, { "epoch": 4.21, "learning_rate": 1.687503792943506e-05, "loss": 0.1126, "step": 1572 }, { "epoch": 4.21, "learning_rate": 1.6866625863776187e-05, "loss": 0.051, "step": 1574 }, { "epoch": 4.22, "learning_rate": 1.685820459397278e-05, "loss": 0.0492, "step": 1576 }, { "epoch": 4.22, "learning_rate": 1.6849774131312843e-05, "loss": 0.0717, "step": 1578 }, { "epoch": 4.23, "learning_rate": 1.684133448709673e-05, "loss": 0.0031, "step": 1580 }, { "epoch": 4.24, "learning_rate": 1.6832885672637075e-05, "loss": 0.0529, "step": 1582 }, { "epoch": 4.24, "learning_rate": 1.682442769925882e-05, "loss": 0.2156, "step": 1584 }, { "epoch": 4.25, "learning_rate": 1.681596057829918e-05, "loss": 0.1627, "step": 1586 }, { "epoch": 4.25, "learning_rate": 1.680748432110763e-05, "loss": 0.1399, "step": 1588 }, { "epoch": 4.26, "learning_rate": 1.6798998939045893e-05, "loss": 0.1596, "step": 1590 }, { "epoch": 4.26, "learning_rate": 1.6790504443487917e-05, "loss": 0.0501, "step": 1592 }, { "epoch": 4.27, "learning_rate": 1.6782000845819884e-05, "loss": 0.0534, "step": 1594 }, { "epoch": 4.27, "learning_rate": 1.6773488157440154e-05, "loss": 0.0883, "step": 1596 }, { "epoch": 4.28, "learning_rate": 1.6764966389759283e-05, "loss": 0.0409, "step": 1598 }, { "epoch": 4.28, "learning_rate": 1.67564355542e-05, "loss": 0.0719, "step": 1600 }, { "epoch": 4.29, "learning_rate": 1.674789566219718e-05, "loss": 0.0962, "step": 1602 }, { "epoch": 4.29, "learning_rate": 1.673934672519785e-05, "loss": 0.0092, "step": 1604 }, { "epoch": 4.3, "learning_rate": 1.6730788754661144e-05, "loss": 0.0832, "step": 1606 }, { "epoch": 4.31, "learning_rate": 1.6722221762058322e-05, "loss": 0.0506, "step": 1608 }, { "epoch": 4.31, "learning_rate": 1.6713645758872727e-05, "loss": 0.0351, "step": 1610 }, { "epoch": 4.32, "learning_rate": 1.6705060756599784e-05, "loss": 0.1045, "step": 1612 }, { "epoch": 4.32, "learning_rate": 1.6696466766746976e-05, "loss": 0.0872, "step": 1614 }, { "epoch": 4.33, "learning_rate": 1.6687863800833838e-05, "loss": 0.0737, "step": 1616 }, { "epoch": 4.33, "learning_rate": 1.6679251870391938e-05, "loss": 0.0399, "step": 1618 }, { "epoch": 4.34, "learning_rate": 1.667063098696485e-05, "loss": 0.0185, "step": 1620 }, { "epoch": 4.34, "learning_rate": 1.6662001162108164e-05, "loss": 0.0786, "step": 1622 }, { "epoch": 4.35, "learning_rate": 1.665336240738944e-05, "loss": 0.0312, "step": 1624 }, { "epoch": 4.35, "learning_rate": 1.664471473438822e-05, "loss": 0.3046, "step": 1626 }, { "epoch": 4.36, "learning_rate": 1.6636058154695992e-05, "loss": 0.0247, "step": 1628 }, { "epoch": 4.36, "learning_rate": 1.6627392679916187e-05, "loss": 0.0188, "step": 1630 }, { "epoch": 4.37, "learning_rate": 1.6618718321664153e-05, "loss": 0.0786, "step": 1632 }, { "epoch": 4.37, "learning_rate": 1.661003509156716e-05, "loss": 0.1084, "step": 1634 }, { "epoch": 4.38, "learning_rate": 1.6601343001264353e-05, "loss": 0.0424, "step": 1636 }, { "epoch": 4.39, "learning_rate": 1.6592642062406764e-05, "loss": 0.07, "step": 1638 }, { "epoch": 4.39, "learning_rate": 1.658393228665728e-05, "loss": 0.0894, "step": 1640 }, { "epoch": 4.4, "learning_rate": 1.657521368569064e-05, "loss": 0.0225, "step": 1642 }, { "epoch": 4.4, "learning_rate": 1.6566486271193403e-05, "loss": 0.0254, "step": 1644 }, { "epoch": 4.41, "learning_rate": 1.655775005486395e-05, "loss": 0.0136, "step": 1646 }, { "epoch": 4.41, "learning_rate": 1.6549005048412453e-05, "loss": 0.0496, "step": 1648 }, { "epoch": 4.42, "learning_rate": 1.654025126356088e-05, "loss": 0.0914, "step": 1650 }, { "epoch": 4.42, "learning_rate": 1.6531488712042946e-05, "loss": 0.0743, "step": 1652 }, { "epoch": 4.43, "learning_rate": 1.6522717405604132e-05, "loss": 0.0155, "step": 1654 }, { "epoch": 4.43, "learning_rate": 1.651393735600165e-05, "loss": 0.0433, "step": 1656 }, { "epoch": 4.44, "learning_rate": 1.6505148575004427e-05, "loss": 0.0677, "step": 1658 }, { "epoch": 4.44, "learning_rate": 1.64963510743931e-05, "loss": 0.1185, "step": 1660 }, { "epoch": 4.45, "learning_rate": 1.6487544865959995e-05, "loss": 0.0083, "step": 1662 }, { "epoch": 4.46, "learning_rate": 1.6478729961509102e-05, "loss": 0.042, "step": 1664 }, { "epoch": 4.46, "learning_rate": 1.6469906372856073e-05, "loss": 0.1567, "step": 1666 }, { "epoch": 4.47, "learning_rate": 1.6461074111828203e-05, "loss": 0.0655, "step": 1668 }, { "epoch": 4.47, "learning_rate": 1.64522331902644e-05, "loss": 0.0421, "step": 1670 }, { "epoch": 4.48, "learning_rate": 1.6443383620015198e-05, "loss": 0.0664, "step": 1672 }, { "epoch": 4.48, "learning_rate": 1.643452541294271e-05, "loss": 0.0354, "step": 1674 }, { "epoch": 4.49, "learning_rate": 1.6425658580920626e-05, "loss": 0.0548, "step": 1676 }, { "epoch": 4.49, "learning_rate": 1.6416783135834214e-05, "loss": 0.022, "step": 1678 }, { "epoch": 4.5, "learning_rate": 1.6407899089580263e-05, "loss": 0.017, "step": 1680 }, { "epoch": 4.5, "learning_rate": 1.639900645406711e-05, "loss": 0.0711, "step": 1682 }, { "epoch": 4.51, "learning_rate": 1.639010524121459e-05, "loss": 0.1306, "step": 1684 }, { "epoch": 4.51, "learning_rate": 1.638119546295405e-05, "loss": 0.1103, "step": 1686 }, { "epoch": 4.52, "learning_rate": 1.6372277131228314e-05, "loss": 0.0627, "step": 1688 }, { "epoch": 4.52, "learning_rate": 1.636335025799166e-05, "loss": 0.0622, "step": 1690 }, { "epoch": 4.53, "learning_rate": 1.6354414855209833e-05, "loss": 0.0832, "step": 1692 }, { "epoch": 4.54, "learning_rate": 1.6345470934859994e-05, "loss": 0.1933, "step": 1694 }, { "epoch": 4.54, "learning_rate": 1.6336518508930735e-05, "loss": 0.1601, "step": 1696 }, { "epoch": 4.55, "learning_rate": 1.6327557589422037e-05, "loss": 0.0528, "step": 1698 }, { "epoch": 4.55, "learning_rate": 1.631858818834528e-05, "loss": 0.0534, "step": 1700 }, { "epoch": 4.56, "learning_rate": 1.63096103177232e-05, "loss": 0.0544, "step": 1702 }, { "epoch": 4.56, "learning_rate": 1.630062398958989e-05, "loss": 0.1001, "step": 1704 }, { "epoch": 4.57, "learning_rate": 1.6291629215990784e-05, "loss": 0.1037, "step": 1706 }, { "epoch": 4.57, "learning_rate": 1.6282626008982632e-05, "loss": 0.1739, "step": 1708 }, { "epoch": 4.58, "learning_rate": 1.6273614380633484e-05, "loss": 0.0346, "step": 1710 }, { "epoch": 4.58, "learning_rate": 1.626459434302269e-05, "loss": 0.0744, "step": 1712 }, { "epoch": 4.59, "learning_rate": 1.6255565908240857e-05, "loss": 0.1025, "step": 1714 }, { "epoch": 4.59, "learning_rate": 1.6246529088389867e-05, "loss": 0.1304, "step": 1716 }, { "epoch": 4.6, "learning_rate": 1.6237483895582818e-05, "loss": 0.0544, "step": 1718 }, { "epoch": 4.61, "learning_rate": 1.6228430341944054e-05, "loss": 0.1363, "step": 1720 }, { "epoch": 4.61, "learning_rate": 1.6219368439609105e-05, "loss": 0.0346, "step": 1722 }, { "epoch": 4.62, "learning_rate": 1.621029820072471e-05, "loss": 0.2124, "step": 1724 }, { "epoch": 4.62, "learning_rate": 1.6201219637448775e-05, "loss": 0.0603, "step": 1726 }, { "epoch": 4.63, "learning_rate": 1.6192132761950363e-05, "loss": 0.1255, "step": 1728 }, { "epoch": 4.63, "learning_rate": 1.618303758640967e-05, "loss": 0.0646, "step": 1730 }, { "epoch": 4.64, "learning_rate": 1.617393412301804e-05, "loss": 0.1226, "step": 1732 }, { "epoch": 4.64, "learning_rate": 1.6164822383977912e-05, "loss": 0.1429, "step": 1734 }, { "epoch": 4.65, "learning_rate": 1.6155702381502813e-05, "loss": 0.0319, "step": 1736 }, { "epoch": 4.65, "learning_rate": 1.6146574127817358e-05, "loss": 0.0251, "step": 1738 }, { "epoch": 4.66, "learning_rate": 1.6137437635157214e-05, "loss": 0.0747, "step": 1740 }, { "epoch": 4.66, "learning_rate": 1.6128292915769096e-05, "loss": 0.0462, "step": 1742 }, { "epoch": 4.67, "learning_rate": 1.611913998191074e-05, "loss": 0.0371, "step": 1744 }, { "epoch": 4.67, "learning_rate": 1.6109978845850902e-05, "loss": 0.0585, "step": 1746 }, { "epoch": 4.68, "learning_rate": 1.6100809519869326e-05, "loss": 0.0568, "step": 1748 }, { "epoch": 4.69, "learning_rate": 1.609163201625674e-05, "loss": 0.0688, "step": 1750 }, { "epoch": 4.69, "learning_rate": 1.608244634731482e-05, "loss": 0.0785, "step": 1752 }, { "epoch": 4.7, "learning_rate": 1.607325252535621e-05, "loss": 0.0668, "step": 1754 }, { "epoch": 4.7, "learning_rate": 1.6064050562704455e-05, "loss": 0.0162, "step": 1756 }, { "epoch": 4.71, "learning_rate": 1.6054840471694034e-05, "loss": 0.0331, "step": 1758 }, { "epoch": 4.71, "learning_rate": 1.6045622264670306e-05, "loss": 0.0943, "step": 1760 }, { "epoch": 4.72, "learning_rate": 1.6036395953989523e-05, "loss": 0.0703, "step": 1762 }, { "epoch": 4.72, "learning_rate": 1.602716155201879e-05, "loss": 0.0385, "step": 1764 }, { "epoch": 4.73, "learning_rate": 1.6017919071136053e-05, "loss": 0.0784, "step": 1766 }, { "epoch": 4.73, "learning_rate": 1.6008668523730104e-05, "loss": 0.0454, "step": 1768 }, { "epoch": 4.74, "learning_rate": 1.5999409922200534e-05, "loss": 0.045, "step": 1770 }, { "epoch": 4.74, "learning_rate": 1.5990143278957727e-05, "loss": 0.0153, "step": 1772 }, { "epoch": 4.75, "learning_rate": 1.598086860642286e-05, "loss": 0.0489, "step": 1774 }, { "epoch": 4.76, "learning_rate": 1.5971585917027864e-05, "loss": 0.0721, "step": 1776 }, { "epoch": 4.76, "learning_rate": 1.5962295223215415e-05, "loss": 0.1324, "step": 1778 }, { "epoch": 4.77, "learning_rate": 1.595299653743892e-05, "loss": 0.0632, "step": 1780 }, { "epoch": 4.77, "learning_rate": 1.59436898721625e-05, "loss": 0.0754, "step": 1782 }, { "epoch": 4.78, "learning_rate": 1.593437523986097e-05, "loss": 0.0355, "step": 1784 }, { "epoch": 4.78, "learning_rate": 1.5925052653019827e-05, "loss": 0.06, "step": 1786 }, { "epoch": 4.79, "learning_rate": 1.5915722124135227e-05, "loss": 0.178, "step": 1788 }, { "epoch": 4.79, "learning_rate": 1.590638366571397e-05, "loss": 0.0584, "step": 1790 }, { "epoch": 4.8, "learning_rate": 1.589703729027349e-05, "loss": 0.0099, "step": 1792 }, { "epoch": 4.8, "learning_rate": 1.5887683010341836e-05, "loss": 0.0253, "step": 1794 }, { "epoch": 4.81, "learning_rate": 1.5878320838457638e-05, "loss": 0.021, "step": 1796 }, { "epoch": 4.81, "learning_rate": 1.5868950787170124e-05, "loss": 0.1108, "step": 1798 }, { "epoch": 4.82, "learning_rate": 1.5859572869039063e-05, "loss": 0.0519, "step": 1800 }, { "epoch": 4.82, "learning_rate": 1.5850187096634784e-05, "loss": 0.1191, "step": 1802 }, { "epoch": 4.83, "learning_rate": 1.5840793482538143e-05, "loss": 0.043, "step": 1804 }, { "epoch": 4.84, "learning_rate": 1.58313920393405e-05, "loss": 0.0969, "step": 1806 }, { "epoch": 4.84, "learning_rate": 1.5821982779643707e-05, "loss": 0.0445, "step": 1808 }, { "epoch": 4.85, "learning_rate": 1.581256571606011e-05, "loss": 0.0379, "step": 1810 }, { "epoch": 4.85, "learning_rate": 1.58031408612125e-05, "loss": 0.0288, "step": 1812 }, { "epoch": 4.86, "learning_rate": 1.579370822773411e-05, "loss": 0.0188, "step": 1814 }, { "epoch": 4.86, "learning_rate": 1.5784267828268617e-05, "loss": 0.0436, "step": 1816 }, { "epoch": 4.87, "learning_rate": 1.5774819675470085e-05, "loss": 0.1483, "step": 1818 }, { "epoch": 4.87, "learning_rate": 1.576536378200299e-05, "loss": 0.2103, "step": 1820 }, { "epoch": 4.88, "learning_rate": 1.5755900160542176e-05, "loss": 0.1061, "step": 1822 }, { "epoch": 4.88, "learning_rate": 1.574642882377284e-05, "loss": 0.0977, "step": 1824 }, { "epoch": 4.89, "learning_rate": 1.5736949784390526e-05, "loss": 0.0437, "step": 1826 }, { "epoch": 4.89, "learning_rate": 1.572746305510111e-05, "loss": 0.0388, "step": 1828 }, { "epoch": 4.9, "learning_rate": 1.571796864862076e-05, "loss": 0.0146, "step": 1830 }, { "epoch": 4.9, "learning_rate": 1.5708466577675956e-05, "loss": 0.0631, "step": 1832 }, { "epoch": 4.91, "learning_rate": 1.5698956855003425e-05, "loss": 0.0966, "step": 1834 }, { "epoch": 4.92, "learning_rate": 1.5689439493350175e-05, "loss": 0.0108, "step": 1836 }, { "epoch": 4.92, "learning_rate": 1.5679914505473434e-05, "loss": 0.0177, "step": 1838 }, { "epoch": 4.93, "learning_rate": 1.5670381904140667e-05, "loss": 0.0781, "step": 1840 }, { "epoch": 4.93, "learning_rate": 1.5660841702129533e-05, "loss": 0.0477, "step": 1842 }, { "epoch": 4.94, "learning_rate": 1.5651293912227892e-05, "loss": 0.053, "step": 1844 }, { "epoch": 4.94, "learning_rate": 1.564173854723376e-05, "loss": 0.0546, "step": 1846 }, { "epoch": 4.95, "learning_rate": 1.563217561995532e-05, "loss": 0.0742, "step": 1848 }, { "epoch": 4.95, "learning_rate": 1.562260514321088e-05, "loss": 0.124, "step": 1850 }, { "epoch": 4.96, "learning_rate": 1.5613027129828885e-05, "loss": 0.0642, "step": 1852 }, { "epoch": 4.96, "learning_rate": 1.5603441592647858e-05, "loss": 0.0269, "step": 1854 }, { "epoch": 4.97, "learning_rate": 1.5593848544516415e-05, "loss": 0.0711, "step": 1856 }, { "epoch": 4.97, "learning_rate": 1.558424799829326e-05, "loss": 0.1162, "step": 1858 }, { "epoch": 4.98, "learning_rate": 1.5574639966847128e-05, "loss": 0.0459, "step": 1860 }, { "epoch": 4.99, "learning_rate": 1.5565024463056785e-05, "loss": 0.0385, "step": 1862 }, { "epoch": 4.99, "learning_rate": 1.5555401499811018e-05, "loss": 0.0637, "step": 1864 }, { "epoch": 5.0, "learning_rate": 1.5545771090008625e-05, "loss": 0.0475, "step": 1866 }, { "epoch": 5.0, "learning_rate": 1.553613324655836e-05, "loss": 0.0988, "step": 1868 }, { "epoch": 5.01, "learning_rate": 1.552648798237897e-05, "loss": 0.0049, "step": 1870 }, { "epoch": 5.01, "learning_rate": 1.5516835310399123e-05, "loss": 0.0258, "step": 1872 }, { "epoch": 5.02, "learning_rate": 1.5507175243557434e-05, "loss": 0.0327, "step": 1874 }, { "epoch": 5.02, "learning_rate": 1.5497507794802423e-05, "loss": 0.0499, "step": 1876 }, { "epoch": 5.03, "learning_rate": 1.548783297709251e-05, "loss": 0.0521, "step": 1878 }, { "epoch": 5.03, "learning_rate": 1.5478150803395982e-05, "loss": 0.0491, "step": 1880 }, { "epoch": 5.04, "learning_rate": 1.5468461286690996e-05, "loss": 0.0103, "step": 1882 }, { "epoch": 5.04, "learning_rate": 1.545876443996555e-05, "loss": 0.1328, "step": 1884 }, { "epoch": 5.05, "learning_rate": 1.544906027621746e-05, "loss": 0.0688, "step": 1886 }, { "epoch": 5.05, "learning_rate": 1.5439348808454365e-05, "loss": 0.0355, "step": 1888 }, { "epoch": 5.06, "learning_rate": 1.5429630049693676e-05, "loss": 0.0594, "step": 1890 }, { "epoch": 5.07, "learning_rate": 1.541990401296259e-05, "loss": 0.0863, "step": 1892 }, { "epoch": 5.07, "learning_rate": 1.5410170711298057e-05, "loss": 0.03, "step": 1894 }, { "epoch": 5.08, "learning_rate": 1.540043015774676e-05, "loss": 0.0173, "step": 1896 }, { "epoch": 5.08, "learning_rate": 1.539068236536511e-05, "loss": 0.0701, "step": 1898 }, { "epoch": 5.09, "learning_rate": 1.538092734721921e-05, "loss": 0.0437, "step": 1900 }, { "epoch": 5.09, "learning_rate": 1.5371165116384865e-05, "loss": 0.0171, "step": 1902 }, { "epoch": 5.1, "learning_rate": 1.5361395685947533e-05, "loss": 0.0149, "step": 1904 }, { "epoch": 5.1, "learning_rate": 1.5351619069002332e-05, "loss": 0.0061, "step": 1906 }, { "epoch": 5.11, "learning_rate": 1.5341835278654004e-05, "loss": 0.0089, "step": 1908 }, { "epoch": 5.11, "learning_rate": 1.5332044328016916e-05, "loss": 0.1108, "step": 1910 }, { "epoch": 5.12, "learning_rate": 1.5322246230215023e-05, "loss": 0.0352, "step": 1912 }, { "epoch": 5.12, "learning_rate": 1.5312440998381874e-05, "loss": 0.0225, "step": 1914 }, { "epoch": 5.13, "learning_rate": 1.5302628645660562e-05, "loss": 0.138, "step": 1916 }, { "epoch": 5.14, "learning_rate": 1.5292809185203742e-05, "loss": 0.0362, "step": 1918 }, { "epoch": 5.14, "learning_rate": 1.5282982630173587e-05, "loss": 0.0133, "step": 1920 }, { "epoch": 5.15, "learning_rate": 1.527314899374178e-05, "loss": 0.0571, "step": 1922 }, { "epoch": 5.15, "learning_rate": 1.52633082890895e-05, "loss": 0.0178, "step": 1924 }, { "epoch": 5.16, "learning_rate": 1.5253460529407404e-05, "loss": 0.0383, "step": 1926 }, { "epoch": 5.16, "learning_rate": 1.5243605727895591e-05, "loss": 0.0289, "step": 1928 }, { "epoch": 5.17, "learning_rate": 1.5233743897763611e-05, "loss": 0.0366, "step": 1930 }, { "epoch": 5.17, "learning_rate": 1.5223875052230437e-05, "loss": 0.018, "step": 1932 }, { "epoch": 5.18, "learning_rate": 1.5213999204524434e-05, "loss": 0.1079, "step": 1934 }, { "epoch": 5.18, "learning_rate": 1.5204116367883361e-05, "loss": 0.0087, "step": 1936 }, { "epoch": 5.19, "learning_rate": 1.519422655555435e-05, "loss": 0.1141, "step": 1938 }, { "epoch": 5.19, "learning_rate": 1.5184329780793867e-05, "loss": 0.0379, "step": 1940 }, { "epoch": 5.2, "learning_rate": 1.5174426056867728e-05, "loss": 0.085, "step": 1942 }, { "epoch": 5.2, "learning_rate": 1.5164515397051058e-05, "loss": 0.0036, "step": 1944 }, { "epoch": 5.21, "learning_rate": 1.5154597814628273e-05, "loss": 0.0232, "step": 1946 }, { "epoch": 5.22, "learning_rate": 1.5144673322893075e-05, "loss": 0.0372, "step": 1948 }, { "epoch": 5.22, "learning_rate": 1.513474193514842e-05, "loss": 0.0754, "step": 1950 }, { "epoch": 5.23, "learning_rate": 1.512480366470652e-05, "loss": 0.0275, "step": 1952 }, { "epoch": 5.23, "learning_rate": 1.5114858524888802e-05, "loss": 0.0176, "step": 1954 }, { "epoch": 5.24, "learning_rate": 1.5104906529025904e-05, "loss": 0.0499, "step": 1956 }, { "epoch": 5.24, "learning_rate": 1.5094947690457655e-05, "loss": 0.0087, "step": 1958 }, { "epoch": 5.25, "learning_rate": 1.5084982022533053e-05, "loss": 0.0175, "step": 1960 }, { "epoch": 5.25, "learning_rate": 1.5075009538610252e-05, "loss": 0.0275, "step": 1962 }, { "epoch": 5.26, "learning_rate": 1.5065030252056545e-05, "loss": 0.0131, "step": 1964 }, { "epoch": 5.26, "learning_rate": 1.5055044176248343e-05, "loss": 0.0563, "step": 1966 }, { "epoch": 5.27, "learning_rate": 1.5045051324571152e-05, "loss": 0.0231, "step": 1968 }, { "epoch": 5.27, "learning_rate": 1.5035051710419564e-05, "loss": 0.0227, "step": 1970 }, { "epoch": 5.28, "learning_rate": 1.5025045347197234e-05, "loss": 0.0799, "step": 1972 }, { "epoch": 5.29, "learning_rate": 1.5015032248316867e-05, "loss": 0.0518, "step": 1974 }, { "epoch": 5.29, "learning_rate": 1.5005012427200197e-05, "loss": 0.0204, "step": 1976 }, { "epoch": 5.3, "learning_rate": 1.4994985897277963e-05, "loss": 0.0258, "step": 1978 }, { "epoch": 5.3, "learning_rate": 1.49849526719899e-05, "loss": 0.1055, "step": 1980 }, { "epoch": 5.31, "learning_rate": 1.4974912764784717e-05, "loss": 0.0475, "step": 1982 }, { "epoch": 5.31, "learning_rate": 1.4964866189120082e-05, "loss": 0.0723, "step": 1984 }, { "epoch": 5.32, "learning_rate": 1.4954812958462599e-05, "loss": 0.0057, "step": 1986 }, { "epoch": 5.32, "learning_rate": 1.4944753086287793e-05, "loss": 0.0073, "step": 1988 }, { "epoch": 5.33, "learning_rate": 1.4934686586080087e-05, "loss": 0.0608, "step": 1990 }, { "epoch": 5.33, "learning_rate": 1.4924613471332801e-05, "loss": 0.058, "step": 1992 }, { "epoch": 5.34, "learning_rate": 1.4914533755548105e-05, "loss": 0.0214, "step": 1994 }, { "epoch": 5.34, "learning_rate": 1.4904447452237028e-05, "loss": 0.0331, "step": 1996 }, { "epoch": 5.35, "learning_rate": 1.489435457491943e-05, "loss": 0.0072, "step": 1998 }, { "epoch": 5.35, "learning_rate": 1.4884255137123971e-05, "loss": 0.0266, "step": 2000 }, { "epoch": 5.36, "learning_rate": 1.4874149152388122e-05, "loss": 0.0412, "step": 2002 }, { "epoch": 5.37, "learning_rate": 1.4864036634258112e-05, "loss": 0.0237, "step": 2004 }, { "epoch": 5.37, "learning_rate": 1.485391759628894e-05, "loss": 0.2433, "step": 2006 }, { "epoch": 5.38, "learning_rate": 1.4843792052044342e-05, "loss": 0.0833, "step": 2008 }, { "epoch": 5.38, "learning_rate": 1.4833660015096767e-05, "loss": 0.0204, "step": 2010 }, { "epoch": 5.39, "learning_rate": 1.4823521499027381e-05, "loss": 0.0117, "step": 2012 }, { "epoch": 5.39, "learning_rate": 1.4813376517426021e-05, "loss": 0.0842, "step": 2014 }, { "epoch": 5.4, "learning_rate": 1.4803225083891201e-05, "loss": 0.1426, "step": 2016 }, { "epoch": 5.4, "learning_rate": 1.4793067212030074e-05, "loss": 0.1112, "step": 2018 }, { "epoch": 5.41, "learning_rate": 1.478290291545843e-05, "loss": 0.0085, "step": 2020 }, { "epoch": 5.41, "learning_rate": 1.4772732207800673e-05, "loss": 0.0604, "step": 2022 }, { "epoch": 5.42, "learning_rate": 1.4762555102689789e-05, "loss": 0.0138, "step": 2024 }, { "epoch": 5.42, "learning_rate": 1.4752371613767352e-05, "loss": 0.0357, "step": 2026 }, { "epoch": 5.43, "learning_rate": 1.474218175468348e-05, "loss": 0.0073, "step": 2028 }, { "epoch": 5.44, "learning_rate": 1.4731985539096845e-05, "loss": 0.114, "step": 2030 }, { "epoch": 5.44, "learning_rate": 1.4721782980674629e-05, "loss": 0.0204, "step": 2032 }, { "epoch": 5.45, "learning_rate": 1.4711574093092518e-05, "loss": 0.0014, "step": 2034 }, { "epoch": 5.45, "learning_rate": 1.4701358890034682e-05, "loss": 0.032, "step": 2036 }, { "epoch": 5.46, "learning_rate": 1.4691137385193757e-05, "loss": 0.0327, "step": 2038 }, { "epoch": 5.46, "learning_rate": 1.468090959227082e-05, "loss": 0.0285, "step": 2040 }, { "epoch": 5.47, "learning_rate": 1.4670675524975393e-05, "loss": 0.0471, "step": 2042 }, { "epoch": 5.47, "learning_rate": 1.4660435197025391e-05, "loss": 0.0732, "step": 2044 }, { "epoch": 5.48, "learning_rate": 1.4650188622147127e-05, "loss": 0.068, "step": 2046 }, { "epoch": 5.48, "learning_rate": 1.463993581407529e-05, "loss": 0.1664, "step": 2048 }, { "epoch": 5.49, "learning_rate": 1.462967678655292e-05, "loss": 0.0185, "step": 2050 }, { "epoch": 5.49, "learning_rate": 1.4619411553331397e-05, "loss": 0.0187, "step": 2052 }, { "epoch": 5.5, "learning_rate": 1.4609140128170418e-05, "loss": 0.0246, "step": 2054 }, { "epoch": 5.5, "learning_rate": 1.4598862524837978e-05, "loss": 0.0186, "step": 2056 }, { "epoch": 5.51, "learning_rate": 1.4588578757110359e-05, "loss": 0.0726, "step": 2058 }, { "epoch": 5.52, "learning_rate": 1.4578288838772097e-05, "loss": 0.1599, "step": 2060 }, { "epoch": 5.52, "learning_rate": 1.456799278361598e-05, "loss": 0.1011, "step": 2062 }, { "epoch": 5.53, "learning_rate": 1.4557690605443015e-05, "loss": 0.0734, "step": 2064 }, { "epoch": 5.53, "learning_rate": 1.4547382318062428e-05, "loss": 0.0683, "step": 2066 }, { "epoch": 5.54, "learning_rate": 1.4537067935291622e-05, "loss": 0.0321, "step": 2068 }, { "epoch": 5.54, "learning_rate": 1.4526747470956175e-05, "loss": 0.1505, "step": 2070 }, { "epoch": 5.55, "learning_rate": 1.4516420938889817e-05, "loss": 0.0259, "step": 2072 }, { "epoch": 5.55, "learning_rate": 1.4506088352934406e-05, "loss": 0.0702, "step": 2074 }, { "epoch": 5.56, "learning_rate": 1.4495749726939928e-05, "loss": 0.0124, "step": 2076 }, { "epoch": 5.56, "learning_rate": 1.4485405074764453e-05, "loss": 0.0187, "step": 2078 }, { "epoch": 5.57, "learning_rate": 1.4475054410274132e-05, "loss": 0.0444, "step": 2080 }, { "epoch": 5.57, "learning_rate": 1.4464697747343176e-05, "loss": 0.0218, "step": 2082 }, { "epoch": 5.58, "learning_rate": 1.4454335099853833e-05, "loss": 0.0245, "step": 2084 }, { "epoch": 5.59, "learning_rate": 1.4443966481696381e-05, "loss": 0.0667, "step": 2086 }, { "epoch": 5.59, "learning_rate": 1.4433591906769091e-05, "loss": 0.1295, "step": 2088 }, { "epoch": 5.6, "learning_rate": 1.4423211388978229e-05, "loss": 0.0355, "step": 2090 }, { "epoch": 5.6, "learning_rate": 1.4412824942238013e-05, "loss": 0.019, "step": 2092 }, { "epoch": 5.61, "learning_rate": 1.4402432580470624e-05, "loss": 0.0807, "step": 2094 }, { "epoch": 5.61, "learning_rate": 1.439203431760616e-05, "loss": 0.088, "step": 2096 }, { "epoch": 5.62, "learning_rate": 1.4381630167582636e-05, "loss": 0.0432, "step": 2098 }, { "epoch": 5.62, "learning_rate": 1.4371220144345954e-05, "loss": 0.046, "step": 2100 }, { "epoch": 5.63, "learning_rate": 1.4360804261849887e-05, "loss": 0.0919, "step": 2102 }, { "epoch": 5.63, "learning_rate": 1.435038253405607e-05, "loss": 0.0127, "step": 2104 }, { "epoch": 5.64, "learning_rate": 1.4339954974933963e-05, "loss": 0.0337, "step": 2106 }, { "epoch": 5.64, "learning_rate": 1.4329521598460852e-05, "loss": 0.0313, "step": 2108 }, { "epoch": 5.65, "learning_rate": 1.4319082418621815e-05, "loss": 0.0261, "step": 2110 }, { "epoch": 5.65, "learning_rate": 1.4308637449409705e-05, "loss": 0.0628, "step": 2112 }, { "epoch": 5.66, "learning_rate": 1.4298186704825146e-05, "loss": 0.0322, "step": 2114 }, { "epoch": 5.67, "learning_rate": 1.4287730198876495e-05, "loss": 0.0155, "step": 2116 }, { "epoch": 5.67, "learning_rate": 1.4277267945579833e-05, "loss": 0.0434, "step": 2118 }, { "epoch": 5.68, "learning_rate": 1.4266799958958948e-05, "loss": 0.0203, "step": 2120 }, { "epoch": 5.68, "learning_rate": 1.425632625304531e-05, "loss": 0.0146, "step": 2122 }, { "epoch": 5.69, "learning_rate": 1.4245846841878056e-05, "loss": 0.1479, "step": 2124 }, { "epoch": 5.69, "learning_rate": 1.4235361739503972e-05, "loss": 0.0699, "step": 2126 }, { "epoch": 5.7, "learning_rate": 1.4224870959977472e-05, "loss": 0.0894, "step": 2128 }, { "epoch": 5.7, "learning_rate": 1.4214374517360576e-05, "loss": 0.0694, "step": 2130 }, { "epoch": 5.71, "learning_rate": 1.4203872425722903e-05, "loss": 0.0423, "step": 2132 }, { "epoch": 5.71, "learning_rate": 1.4193364699141632e-05, "loss": 0.01, "step": 2134 }, { "epoch": 5.72, "learning_rate": 1.4182851351701505e-05, "loss": 0.0292, "step": 2136 }, { "epoch": 5.72, "learning_rate": 1.4172332397494799e-05, "loss": 0.0577, "step": 2138 }, { "epoch": 5.73, "learning_rate": 1.4161807850621299e-05, "loss": 0.02, "step": 2140 }, { "epoch": 5.73, "learning_rate": 1.4151277725188291e-05, "loss": 0.1476, "step": 2142 }, { "epoch": 5.74, "learning_rate": 1.4140742035310537e-05, "loss": 0.042, "step": 2144 }, { "epoch": 5.75, "learning_rate": 1.4130200795110262e-05, "loss": 0.037, "step": 2146 }, { "epoch": 5.75, "learning_rate": 1.4119654018717126e-05, "loss": 0.1112, "step": 2148 }, { "epoch": 5.76, "learning_rate": 1.410910172026821e-05, "loss": 0.0757, "step": 2150 }, { "epoch": 5.76, "learning_rate": 1.4098543913907996e-05, "loss": 0.087, "step": 2152 }, { "epoch": 5.77, "learning_rate": 1.408798061378836e-05, "loss": 0.0416, "step": 2154 }, { "epoch": 5.77, "learning_rate": 1.4077411834068522e-05, "loss": 0.0624, "step": 2156 }, { "epoch": 5.78, "learning_rate": 1.4066837588915063e-05, "loss": 0.0644, "step": 2158 }, { "epoch": 5.78, "learning_rate": 1.4056257892501886e-05, "loss": 0.1617, "step": 2160 }, { "epoch": 5.79, "learning_rate": 1.4045672759010197e-05, "loss": 0.1127, "step": 2162 }, { "epoch": 5.79, "learning_rate": 1.403508220262849e-05, "loss": 0.0959, "step": 2164 }, { "epoch": 5.8, "learning_rate": 1.402448623755254e-05, "loss": 0.0304, "step": 2166 }, { "epoch": 5.8, "learning_rate": 1.4013884877985353e-05, "loss": 0.0221, "step": 2168 }, { "epoch": 5.81, "learning_rate": 1.4003278138137182e-05, "loss": 0.0117, "step": 2170 }, { "epoch": 5.82, "learning_rate": 1.3992666032225481e-05, "loss": 0.0149, "step": 2172 }, { "epoch": 5.82, "learning_rate": 1.3982048574474903e-05, "loss": 0.2039, "step": 2174 }, { "epoch": 5.83, "learning_rate": 1.3971425779117273e-05, "loss": 0.0398, "step": 2176 }, { "epoch": 5.83, "learning_rate": 1.396079766039157e-05, "loss": 0.0098, "step": 2178 }, { "epoch": 5.84, "learning_rate": 1.3950164232543908e-05, "loss": 0.0165, "step": 2180 }, { "epoch": 5.84, "learning_rate": 1.3939525509827521e-05, "loss": 0.0345, "step": 2182 }, { "epoch": 5.85, "learning_rate": 1.3928881506502734e-05, "loss": 0.1108, "step": 2184 }, { "epoch": 5.85, "learning_rate": 1.3918232236836955e-05, "loss": 0.0449, "step": 2186 }, { "epoch": 5.86, "learning_rate": 1.390757771510465e-05, "loss": 0.0566, "step": 2188 }, { "epoch": 5.86, "learning_rate": 1.3896917955587328e-05, "loss": 0.0745, "step": 2190 }, { "epoch": 5.87, "learning_rate": 1.3886252972573511e-05, "loss": 0.044, "step": 2192 }, { "epoch": 5.87, "learning_rate": 1.3875582780358732e-05, "loss": 0.0041, "step": 2194 }, { "epoch": 5.88, "learning_rate": 1.3864907393245499e-05, "loss": 0.0082, "step": 2196 }, { "epoch": 5.88, "learning_rate": 1.3854226825543284e-05, "loss": 0.0108, "step": 2198 }, { "epoch": 5.89, "learning_rate": 1.3843541091568508e-05, "loss": 0.0178, "step": 2200 }, { "epoch": 5.9, "learning_rate": 1.383285020564452e-05, "loss": 0.018, "step": 2202 }, { "epoch": 5.9, "learning_rate": 1.3822154182101559e-05, "loss": 0.0311, "step": 2204 }, { "epoch": 5.91, "learning_rate": 1.3811453035276767e-05, "loss": 0.0251, "step": 2206 }, { "epoch": 5.91, "learning_rate": 1.3800746779514144e-05, "loss": 0.0602, "step": 2208 }, { "epoch": 5.92, "learning_rate": 1.3790035429164546e-05, "loss": 0.0302, "step": 2210 }, { "epoch": 5.92, "learning_rate": 1.3779318998585647e-05, "loss": 0.0126, "step": 2212 }, { "epoch": 5.93, "learning_rate": 1.3768597502141938e-05, "loss": 0.0081, "step": 2214 }, { "epoch": 5.93, "learning_rate": 1.3757870954204701e-05, "loss": 0.0968, "step": 2216 }, { "epoch": 5.94, "learning_rate": 1.3747139369151989e-05, "loss": 0.1079, "step": 2218 }, { "epoch": 5.94, "learning_rate": 1.3736402761368597e-05, "loss": 0.0367, "step": 2220 }, { "epoch": 5.95, "learning_rate": 1.372566114524607e-05, "loss": 0.0366, "step": 2222 }, { "epoch": 5.95, "learning_rate": 1.3714914535182651e-05, "loss": 0.0902, "step": 2224 }, { "epoch": 5.96, "learning_rate": 1.3704162945583285e-05, "loss": 0.0719, "step": 2226 }, { "epoch": 5.97, "learning_rate": 1.3693406390859587e-05, "loss": 0.0039, "step": 2228 }, { "epoch": 5.97, "learning_rate": 1.3682644885429832e-05, "loss": 0.0927, "step": 2230 }, { "epoch": 5.98, "learning_rate": 1.367187844371893e-05, "loss": 0.0812, "step": 2232 }, { "epoch": 5.98, "learning_rate": 1.3661107080158402e-05, "loss": 0.0478, "step": 2234 }, { "epoch": 5.99, "learning_rate": 1.3650330809186374e-05, "loss": 0.0323, "step": 2236 }, { "epoch": 5.99, "learning_rate": 1.3639549645247545e-05, "loss": 0.0223, "step": 2238 }, { "epoch": 6.0, "learning_rate": 1.3628763602793175e-05, "loss": 0.0052, "step": 2240 }, { "epoch": 6.0, "learning_rate": 1.3617972696281063e-05, "loss": 0.1227, "step": 2242 }, { "epoch": 6.01, "learning_rate": 1.3607176940175531e-05, "loss": 0.0522, "step": 2244 }, { "epoch": 6.01, "learning_rate": 1.3596376348947391e-05, "loss": 0.0082, "step": 2246 }, { "epoch": 6.02, "learning_rate": 1.3585570937073946e-05, "loss": 0.0351, "step": 2248 }, { "epoch": 6.02, "learning_rate": 1.3574760719038959e-05, "loss": 0.0495, "step": 2250 }, { "epoch": 6.03, "learning_rate": 1.3563945709332631e-05, "loss": 0.0508, "step": 2252 }, { "epoch": 6.03, "learning_rate": 1.3553125922451591e-05, "loss": 0.0406, "step": 2254 }, { "epoch": 6.04, "learning_rate": 1.3542301372898872e-05, "loss": 0.0081, "step": 2256 }, { "epoch": 6.05, "learning_rate": 1.3531472075183882e-05, "loss": 0.0518, "step": 2258 }, { "epoch": 6.05, "learning_rate": 1.3520638043822404e-05, "loss": 0.0101, "step": 2260 }, { "epoch": 6.06, "learning_rate": 1.3509799293336562e-05, "loss": 0.0505, "step": 2262 }, { "epoch": 6.06, "learning_rate": 1.3498955838254804e-05, "loss": 0.0476, "step": 2264 }, { "epoch": 6.07, "learning_rate": 1.3488107693111883e-05, "loss": 0.1041, "step": 2266 }, { "epoch": 6.07, "learning_rate": 1.3477254872448843e-05, "loss": 0.0083, "step": 2268 }, { "epoch": 6.08, "learning_rate": 1.3466397390812998e-05, "loss": 0.0187, "step": 2270 }, { "epoch": 6.08, "learning_rate": 1.3455535262757897e-05, "loss": 0.1576, "step": 2272 }, { "epoch": 6.09, "learning_rate": 1.344466850284333e-05, "loss": 0.0183, "step": 2274 }, { "epoch": 6.09, "learning_rate": 1.3433797125635288e-05, "loss": 0.0097, "step": 2276 }, { "epoch": 6.1, "learning_rate": 1.3422921145705958e-05, "loss": 0.0182, "step": 2278 }, { "epoch": 6.1, "learning_rate": 1.3412040577633687e-05, "loss": 0.0297, "step": 2280 }, { "epoch": 6.11, "learning_rate": 1.3401155436002981e-05, "loss": 0.0117, "step": 2282 }, { "epoch": 6.12, "learning_rate": 1.3390265735404473e-05, "loss": 0.0172, "step": 2284 }, { "epoch": 6.12, "learning_rate": 1.3379371490434902e-05, "loss": 0.0878, "step": 2286 }, { "epoch": 6.13, "learning_rate": 1.336847271569711e-05, "loss": 0.0608, "step": 2288 }, { "epoch": 6.13, "learning_rate": 1.3357569425799999e-05, "loss": 0.0486, "step": 2290 }, { "epoch": 6.14, "learning_rate": 1.3346661635358532e-05, "loss": 0.0224, "step": 2292 }, { "epoch": 6.14, "learning_rate": 1.3335749358993697e-05, "loss": 0.0785, "step": 2294 }, { "epoch": 6.15, "learning_rate": 1.3324832611332498e-05, "loss": 0.0186, "step": 2296 }, { "epoch": 6.15, "learning_rate": 1.3313911407007935e-05, "loss": 0.0052, "step": 2298 }, { "epoch": 6.16, "learning_rate": 1.3302985760658982e-05, "loss": 0.0213, "step": 2300 }, { "epoch": 6.16, "learning_rate": 1.3292055686930565e-05, "loss": 0.0015, "step": 2302 }, { "epoch": 6.17, "learning_rate": 1.3281121200473542e-05, "loss": 0.011, "step": 2304 }, { "epoch": 6.17, "learning_rate": 1.3270182315944686e-05, "loss": 0.0279, "step": 2306 }, { "epoch": 6.18, "learning_rate": 1.3259239048006673e-05, "loss": 0.0045, "step": 2308 }, { "epoch": 6.18, "learning_rate": 1.3248291411328048e-05, "loss": 0.0768, "step": 2310 }, { "epoch": 6.19, "learning_rate": 1.3237339420583213e-05, "loss": 0.0453, "step": 2312 }, { "epoch": 6.2, "learning_rate": 1.3226383090452407e-05, "loss": 0.0133, "step": 2314 }, { "epoch": 6.2, "learning_rate": 1.3215422435621681e-05, "loss": 0.0418, "step": 2316 }, { "epoch": 6.21, "learning_rate": 1.3204457470782894e-05, "loss": 0.0495, "step": 2318 }, { "epoch": 6.21, "learning_rate": 1.3193488210633669e-05, "loss": 0.0664, "step": 2320 }, { "epoch": 6.22, "learning_rate": 1.3182514669877393e-05, "loss": 0.0525, "step": 2322 }, { "epoch": 6.22, "learning_rate": 1.3171536863223197e-05, "loss": 0.027, "step": 2324 }, { "epoch": 6.23, "learning_rate": 1.3160554805385921e-05, "loss": 0.0389, "step": 2326 }, { "epoch": 6.23, "learning_rate": 1.3149568511086104e-05, "loss": 0.0419, "step": 2328 }, { "epoch": 6.24, "learning_rate": 1.3138577995049966e-05, "loss": 0.0031, "step": 2330 }, { "epoch": 6.24, "learning_rate": 1.3127583272009386e-05, "loss": 0.0041, "step": 2332 }, { "epoch": 6.25, "learning_rate": 1.311658435670188e-05, "loss": 0.0073, "step": 2334 }, { "epoch": 6.25, "learning_rate": 1.310558126387059e-05, "loss": 0.0196, "step": 2336 }, { "epoch": 6.26, "learning_rate": 1.3094574008264247e-05, "loss": 0.0284, "step": 2338 }, { "epoch": 6.27, "learning_rate": 1.308356260463717e-05, "loss": 0.0099, "step": 2340 }, { "epoch": 6.27, "learning_rate": 1.3072547067749237e-05, "loss": 0.0206, "step": 2342 }, { "epoch": 6.28, "learning_rate": 1.306152741236586e-05, "loss": 0.0239, "step": 2344 }, { "epoch": 6.28, "learning_rate": 1.3050503653257982e-05, "loss": 0.0523, "step": 2346 }, { "epoch": 6.29, "learning_rate": 1.303947580520204e-05, "loss": 0.0574, "step": 2348 }, { "epoch": 6.29, "learning_rate": 1.3028443882979951e-05, "loss": 0.0354, "step": 2350 }, { "epoch": 6.3, "learning_rate": 1.3017407901379097e-05, "loss": 0.0023, "step": 2352 }, { "epoch": 6.3, "learning_rate": 1.3006367875192296e-05, "loss": 0.0236, "step": 2354 }, { "epoch": 6.31, "learning_rate": 1.2995323819217795e-05, "loss": 0.0287, "step": 2356 }, { "epoch": 6.31, "learning_rate": 1.2984275748259238e-05, "loss": 0.0515, "step": 2358 }, { "epoch": 6.32, "learning_rate": 1.2973223677125649e-05, "loss": 0.0025, "step": 2360 }, { "epoch": 6.32, "learning_rate": 1.2962167620631416e-05, "loss": 0.0571, "step": 2362 }, { "epoch": 6.33, "learning_rate": 1.295110759359627e-05, "loss": 0.0192, "step": 2364 }, { "epoch": 6.33, "learning_rate": 1.2940043610845263e-05, "loss": 0.0203, "step": 2366 }, { "epoch": 6.34, "learning_rate": 1.2928975687208751e-05, "loss": 0.0116, "step": 2368 }, { "epoch": 6.35, "learning_rate": 1.291790383752237e-05, "loss": 0.0129, "step": 2370 }, { "epoch": 6.35, "learning_rate": 1.2906828076627018e-05, "loss": 0.0886, "step": 2372 }, { "epoch": 6.36, "learning_rate": 1.289574841936884e-05, "loss": 0.03, "step": 2374 }, { "epoch": 6.36, "learning_rate": 1.2884664880599198e-05, "loss": 0.0377, "step": 2376 }, { "epoch": 6.37, "learning_rate": 1.2873577475174664e-05, "loss": 0.0076, "step": 2378 }, { "epoch": 6.37, "learning_rate": 1.2862486217956982e-05, "loss": 0.064, "step": 2380 }, { "epoch": 6.38, "learning_rate": 1.2851391123813075e-05, "loss": 0.043, "step": 2382 }, { "epoch": 6.38, "learning_rate": 1.2840292207614993e-05, "loss": 0.0137, "step": 2384 }, { "epoch": 6.39, "learning_rate": 1.2829189484239914e-05, "loss": 0.0018, "step": 2386 }, { "epoch": 6.39, "learning_rate": 1.281808296857013e-05, "loss": 0.037, "step": 2388 }, { "epoch": 6.4, "learning_rate": 1.2806972675492997e-05, "loss": 0.0369, "step": 2390 }, { "epoch": 6.4, "learning_rate": 1.2795858619900953e-05, "loss": 0.0892, "step": 2392 }, { "epoch": 6.41, "learning_rate": 1.278474081669147e-05, "loss": 0.0069, "step": 2394 }, { "epoch": 6.41, "learning_rate": 1.2773619280767042e-05, "loss": 0.0631, "step": 2396 }, { "epoch": 6.42, "learning_rate": 1.2762494027035171e-05, "loss": 0.0425, "step": 2398 }, { "epoch": 6.43, "learning_rate": 1.2751365070408335e-05, "loss": 0.0085, "step": 2400 }, { "epoch": 6.43, "learning_rate": 1.2740232425803987e-05, "loss": 0.0238, "step": 2402 }, { "epoch": 6.44, "learning_rate": 1.2729096108144514e-05, "loss": 0.0023, "step": 2404 }, { "epoch": 6.44, "learning_rate": 1.2717956132357232e-05, "loss": 0.0165, "step": 2406 }, { "epoch": 6.45, "learning_rate": 1.2706812513374356e-05, "loss": 0.0026, "step": 2408 }, { "epoch": 6.45, "learning_rate": 1.2695665266132984e-05, "loss": 0.031, "step": 2410 }, { "epoch": 6.46, "learning_rate": 1.2684514405575083e-05, "loss": 0.007, "step": 2412 }, { "epoch": 6.46, "learning_rate": 1.2673359946647463e-05, "loss": 0.0384, "step": 2414 }, { "epoch": 6.47, "learning_rate": 1.266220190430175e-05, "loss": 0.0069, "step": 2416 }, { "epoch": 6.47, "learning_rate": 1.2651040293494376e-05, "loss": 0.026, "step": 2418 }, { "epoch": 6.48, "learning_rate": 1.2639875129186562e-05, "loss": 0.0049, "step": 2420 }, { "epoch": 6.48, "learning_rate": 1.2628706426344284e-05, "loss": 0.0155, "step": 2422 }, { "epoch": 6.49, "learning_rate": 1.2617534199938274e-05, "loss": 0.0256, "step": 2424 }, { "epoch": 6.5, "learning_rate": 1.2606358464943966e-05, "loss": 0.1169, "step": 2426 }, { "epoch": 6.5, "learning_rate": 1.259517923634152e-05, "loss": 0.0865, "step": 2428 }, { "epoch": 6.51, "learning_rate": 1.2583996529115762e-05, "loss": 0.0199, "step": 2430 }, { "epoch": 6.51, "learning_rate": 1.2572810358256187e-05, "loss": 0.0118, "step": 2432 }, { "epoch": 6.52, "learning_rate": 1.2561620738756934e-05, "loss": 0.046, "step": 2434 }, { "epoch": 6.52, "learning_rate": 1.2550427685616767e-05, "loss": 0.0099, "step": 2436 }, { "epoch": 6.53, "learning_rate": 1.2539231213839043e-05, "loss": 0.0419, "step": 2438 }, { "epoch": 6.53, "learning_rate": 1.252803133843171e-05, "loss": 0.0276, "step": 2440 }, { "epoch": 6.54, "learning_rate": 1.2516828074407274e-05, "loss": 0.0618, "step": 2442 }, { "epoch": 6.54, "learning_rate": 1.2505621436782782e-05, "loss": 0.0582, "step": 2444 }, { "epoch": 6.55, "learning_rate": 1.2494411440579814e-05, "loss": 0.0043, "step": 2446 }, { "epoch": 6.55, "learning_rate": 1.2483198100824438e-05, "loss": 0.0039, "step": 2448 }, { "epoch": 6.56, "learning_rate": 1.2471981432547212e-05, "loss": 0.0793, "step": 2450 }, { "epoch": 6.56, "learning_rate": 1.2460761450783145e-05, "loss": 0.0512, "step": 2452 }, { "epoch": 6.57, "learning_rate": 1.2449538170571706e-05, "loss": 0.0589, "step": 2454 }, { "epoch": 6.58, "learning_rate": 1.2438311606956765e-05, "loss": 0.0375, "step": 2456 }, { "epoch": 6.58, "learning_rate": 1.2427081774986613e-05, "loss": 0.0028, "step": 2458 }, { "epoch": 6.59, "learning_rate": 1.2415848689713904e-05, "loss": 0.0318, "step": 2460 }, { "epoch": 6.59, "learning_rate": 1.2404612366195668e-05, "loss": 0.0201, "step": 2462 }, { "epoch": 6.6, "learning_rate": 1.2393372819493259e-05, "loss": 0.0607, "step": 2464 }, { "epoch": 6.6, "learning_rate": 1.2382130064672366e-05, "loss": 0.047, "step": 2466 }, { "epoch": 6.61, "learning_rate": 1.2370884116802969e-05, "loss": 0.0108, "step": 2468 }, { "epoch": 6.61, "learning_rate": 1.2359634990959335e-05, "loss": 0.0137, "step": 2470 }, { "epoch": 6.62, "learning_rate": 1.234838270221999e-05, "loss": 0.0128, "step": 2472 }, { "epoch": 6.62, "learning_rate": 1.2337127265667692e-05, "loss": 0.0218, "step": 2474 }, { "epoch": 6.63, "learning_rate": 1.2325868696389428e-05, "loss": 0.0194, "step": 2476 }, { "epoch": 6.63, "learning_rate": 1.2314607009476374e-05, "loss": 0.0147, "step": 2478 }, { "epoch": 6.64, "learning_rate": 1.2303342220023895e-05, "loss": 0.0069, "step": 2480 }, { "epoch": 6.65, "learning_rate": 1.2292074343131513e-05, "loss": 0.0285, "step": 2482 }, { "epoch": 6.65, "learning_rate": 1.228080339390288e-05, "loss": 0.0281, "step": 2484 }, { "epoch": 6.66, "learning_rate": 1.2269529387445775e-05, "loss": 0.0064, "step": 2486 }, { "epoch": 6.66, "learning_rate": 1.2258252338872073e-05, "loss": 0.0855, "step": 2488 }, { "epoch": 6.67, "learning_rate": 1.2246972263297718e-05, "loss": 0.0521, "step": 2490 }, { "epoch": 6.67, "learning_rate": 1.223568917584273e-05, "loss": 0.0442, "step": 2492 }, { "epoch": 6.68, "learning_rate": 1.2224403091631151e-05, "loss": 0.0041, "step": 2494 }, { "epoch": 6.68, "learning_rate": 1.2213114025791044e-05, "loss": 0.0085, "step": 2496 }, { "epoch": 6.69, "learning_rate": 1.220182199345447e-05, "loss": 0.0631, "step": 2498 }, { "epoch": 6.69, "learning_rate": 1.219052700975746e-05, "loss": 0.0149, "step": 2500 }, { "epoch": 6.7, "learning_rate": 1.2179229089840014e-05, "loss": 0.0697, "step": 2502 }, { "epoch": 6.7, "learning_rate": 1.2167928248846055e-05, "loss": 0.0152, "step": 2504 }, { "epoch": 6.71, "learning_rate": 1.2156624501923429e-05, "loss": 0.0151, "step": 2506 }, { "epoch": 6.71, "learning_rate": 1.2145317864223874e-05, "loss": 0.0217, "step": 2508 }, { "epoch": 6.72, "learning_rate": 1.2134008350903e-05, "loss": 0.0122, "step": 2510 }, { "epoch": 6.73, "learning_rate": 1.212269597712028e-05, "loss": 0.0486, "step": 2512 }, { "epoch": 6.73, "learning_rate": 1.2111380758039013e-05, "loss": 0.0602, "step": 2514 }, { "epoch": 6.74, "learning_rate": 1.2100062708826319e-05, "loss": 0.0315, "step": 2516 }, { "epoch": 6.74, "learning_rate": 1.2088741844653102e-05, "loss": 0.0059, "step": 2518 }, { "epoch": 6.75, "learning_rate": 1.2077418180694049e-05, "loss": 0.1065, "step": 2520 }, { "epoch": 6.75, "learning_rate": 1.2066091732127597e-05, "loss": 0.0247, "step": 2522 }, { "epoch": 6.76, "learning_rate": 1.2054762514135908e-05, "loss": 0.1028, "step": 2524 }, { "epoch": 6.76, "learning_rate": 1.204343054190487e-05, "loss": 0.0285, "step": 2526 }, { "epoch": 6.77, "learning_rate": 1.2032095830624051e-05, "loss": 0.0154, "step": 2528 }, { "epoch": 6.77, "learning_rate": 1.2020758395486698e-05, "loss": 0.0031, "step": 2530 }, { "epoch": 6.78, "learning_rate": 1.2009418251689702e-05, "loss": 0.0446, "step": 2532 }, { "epoch": 6.78, "learning_rate": 1.1998075414433588e-05, "loss": 0.018, "step": 2534 }, { "epoch": 6.79, "learning_rate": 1.1986729898922498e-05, "loss": 0.0414, "step": 2536 }, { "epoch": 6.8, "learning_rate": 1.1975381720364153e-05, "loss": 0.0998, "step": 2538 }, { "epoch": 6.8, "learning_rate": 1.196403089396985e-05, "loss": 0.0476, "step": 2540 }, { "epoch": 6.81, "learning_rate": 1.1952677434954434e-05, "loss": 0.0144, "step": 2542 }, { "epoch": 6.81, "learning_rate": 1.1941321358536278e-05, "loss": 0.0438, "step": 2544 }, { "epoch": 6.82, "learning_rate": 1.1929962679937264e-05, "loss": 0.0206, "step": 2546 }, { "epoch": 6.82, "learning_rate": 1.1918601414382762e-05, "loss": 0.0284, "step": 2548 }, { "epoch": 6.83, "learning_rate": 1.1907237577101612e-05, "loss": 0.0068, "step": 2550 }, { "epoch": 6.83, "learning_rate": 1.1895871183326095e-05, "loss": 0.0694, "step": 2552 }, { "epoch": 6.84, "learning_rate": 1.1884502248291927e-05, "loss": 0.0026, "step": 2554 }, { "epoch": 6.84, "learning_rate": 1.187313078723822e-05, "loss": 0.0016, "step": 2556 }, { "epoch": 6.85, "learning_rate": 1.1861756815407483e-05, "loss": 0.0081, "step": 2558 }, { "epoch": 6.85, "learning_rate": 1.1850380348045585e-05, "loss": 0.0042, "step": 2560 }, { "epoch": 6.86, "learning_rate": 1.1839001400401738e-05, "loss": 0.0112, "step": 2562 }, { "epoch": 6.86, "learning_rate": 1.182761998772848e-05, "loss": 0.0022, "step": 2564 }, { "epoch": 6.87, "learning_rate": 1.1816236125281657e-05, "loss": 0.0997, "step": 2566 }, { "epoch": 6.88, "learning_rate": 1.1804849828320395e-05, "loss": 0.0197, "step": 2568 }, { "epoch": 6.88, "learning_rate": 1.1793461112107086e-05, "loss": 0.0802, "step": 2570 }, { "epoch": 6.89, "learning_rate": 1.1782069991907359e-05, "loss": 0.0265, "step": 2572 }, { "epoch": 6.89, "learning_rate": 1.177067648299007e-05, "loss": 0.0054, "step": 2574 }, { "epoch": 6.9, "learning_rate": 1.1759280600627283e-05, "loss": 0.0165, "step": 2576 }, { "epoch": 6.9, "learning_rate": 1.1747882360094225e-05, "loss": 0.0145, "step": 2578 }, { "epoch": 6.91, "learning_rate": 1.1736481776669307e-05, "loss": 0.1349, "step": 2580 }, { "epoch": 6.91, "learning_rate": 1.172507886563406e-05, "loss": 0.0136, "step": 2582 }, { "epoch": 6.92, "learning_rate": 1.1713673642273146e-05, "loss": 0.0746, "step": 2584 }, { "epoch": 6.92, "learning_rate": 1.1702266121874331e-05, "loss": 0.1014, "step": 2586 }, { "epoch": 6.93, "learning_rate": 1.1690856319728447e-05, "loss": 0.0107, "step": 2588 }, { "epoch": 6.93, "learning_rate": 1.1679444251129393e-05, "loss": 0.0113, "step": 2590 }, { "epoch": 6.94, "learning_rate": 1.1668029931374098e-05, "loss": 0.0225, "step": 2592 }, { "epoch": 6.95, "learning_rate": 1.1656613375762526e-05, "loss": 0.0218, "step": 2594 }, { "epoch": 6.95, "learning_rate": 1.1645194599597618e-05, "loss": 0.0338, "step": 2596 }, { "epoch": 6.96, "learning_rate": 1.1633773618185302e-05, "loss": 0.0334, "step": 2598 }, { "epoch": 6.96, "learning_rate": 1.1622350446834461e-05, "loss": 0.0079, "step": 2600 }, { "epoch": 6.97, "learning_rate": 1.1610925100856909e-05, "loss": 0.0702, "step": 2602 }, { "epoch": 6.97, "learning_rate": 1.1599497595567381e-05, "loss": 0.0325, "step": 2604 }, { "epoch": 6.98, "learning_rate": 1.1588067946283504e-05, "loss": 0.088, "step": 2606 }, { "epoch": 6.98, "learning_rate": 1.1576636168325774e-05, "loss": 0.0113, "step": 2608 }, { "epoch": 6.99, "learning_rate": 1.1565202277017551e-05, "loss": 0.0025, "step": 2610 }, { "epoch": 6.99, "learning_rate": 1.1553766287685015e-05, "loss": 0.008, "step": 2612 }, { "epoch": 7.0, "learning_rate": 1.1542328215657168e-05, "loss": 0.0631, "step": 2614 }, { "epoch": 7.0, "learning_rate": 1.1530888076265802e-05, "loss": 0.0138, "step": 2616 }, { "epoch": 7.01, "learning_rate": 1.1519445884845477e-05, "loss": 0.0343, "step": 2618 }, { "epoch": 7.01, "learning_rate": 1.1508001656733502e-05, "loss": 0.0064, "step": 2620 }, { "epoch": 7.02, "learning_rate": 1.1496555407269926e-05, "loss": 0.003, "step": 2622 }, { "epoch": 7.03, "learning_rate": 1.148510715179749e-05, "loss": 0.0375, "step": 2624 }, { "epoch": 7.03, "learning_rate": 1.1473656905661645e-05, "loss": 0.0315, "step": 2626 }, { "epoch": 7.04, "learning_rate": 1.1462204684210497e-05, "loss": 0.0024, "step": 2628 }, { "epoch": 7.04, "learning_rate": 1.1450750502794796e-05, "loss": 0.0344, "step": 2630 }, { "epoch": 7.05, "learning_rate": 1.1439294376767935e-05, "loss": 0.0155, "step": 2632 }, { "epoch": 7.05, "learning_rate": 1.1427836321485896e-05, "loss": 0.0727, "step": 2634 }, { "epoch": 7.06, "learning_rate": 1.1416376352307261e-05, "loss": 0.0603, "step": 2636 }, { "epoch": 7.06, "learning_rate": 1.140491448459317e-05, "loss": 0.017, "step": 2638 }, { "epoch": 7.07, "learning_rate": 1.139345073370731e-05, "loss": 0.0026, "step": 2640 }, { "epoch": 7.07, "learning_rate": 1.138198511501589e-05, "loss": 0.0312, "step": 2642 }, { "epoch": 7.08, "learning_rate": 1.1370517643887627e-05, "loss": 0.0062, "step": 2644 }, { "epoch": 7.08, "learning_rate": 1.1359048335693711e-05, "loss": 0.005, "step": 2646 }, { "epoch": 7.09, "learning_rate": 1.1347577205807813e-05, "loss": 0.005, "step": 2648 }, { "epoch": 7.1, "learning_rate": 1.1336104269606027e-05, "loss": 0.0032, "step": 2650 }, { "epoch": 7.1, "learning_rate": 1.132462954246688e-05, "loss": 0.0131, "step": 2652 }, { "epoch": 7.11, "learning_rate": 1.1313153039771294e-05, "loss": 0.0417, "step": 2654 }, { "epoch": 7.11, "learning_rate": 1.1301674776902571e-05, "loss": 0.0155, "step": 2656 }, { "epoch": 7.12, "learning_rate": 1.1290194769246374e-05, "loss": 0.0025, "step": 2658 }, { "epoch": 7.12, "learning_rate": 1.127871303219071e-05, "loss": 0.0133, "step": 2660 }, { "epoch": 7.13, "learning_rate": 1.1267229581125892e-05, "loss": 0.0089, "step": 2662 }, { "epoch": 7.13, "learning_rate": 1.1255744431444538e-05, "loss": 0.0016, "step": 2664 }, { "epoch": 7.14, "learning_rate": 1.1244257598541545e-05, "loss": 0.004, "step": 2666 }, { "epoch": 7.14, "learning_rate": 1.1232769097814062e-05, "loss": 0.0026, "step": 2668 }, { "epoch": 7.15, "learning_rate": 1.1221278944661474e-05, "loss": 0.0026, "step": 2670 }, { "epoch": 7.15, "learning_rate": 1.1209787154485382e-05, "loss": 0.0091, "step": 2672 }, { "epoch": 7.16, "learning_rate": 1.1198293742689583e-05, "loss": 0.1874, "step": 2674 }, { "epoch": 7.16, "learning_rate": 1.1186798724680047e-05, "loss": 0.1031, "step": 2676 }, { "epoch": 7.17, "learning_rate": 1.117530211586489e-05, "loss": 0.0028, "step": 2678 }, { "epoch": 7.18, "learning_rate": 1.1163803931654367e-05, "loss": 0.0144, "step": 2680 }, { "epoch": 7.18, "learning_rate": 1.115230418746085e-05, "loss": 0.057, "step": 2682 }, { "epoch": 7.19, "learning_rate": 1.114080289869879e-05, "loss": 0.0252, "step": 2684 }, { "epoch": 7.19, "learning_rate": 1.1129300080784717e-05, "loss": 0.0057, "step": 2686 }, { "epoch": 7.2, "learning_rate": 1.1117795749137208e-05, "loss": 0.0667, "step": 2688 }, { "epoch": 7.2, "learning_rate": 1.1106289919176867e-05, "loss": 0.0025, "step": 2690 }, { "epoch": 7.21, "learning_rate": 1.1094782606326304e-05, "loss": 0.0246, "step": 2692 }, { "epoch": 7.21, "learning_rate": 1.1083273826010132e-05, "loss": 0.0598, "step": 2694 }, { "epoch": 7.22, "learning_rate": 1.1071763593654911e-05, "loss": 0.0301, "step": 2696 }, { "epoch": 7.22, "learning_rate": 1.106025192468916e-05, "loss": 0.0018, "step": 2698 }, { "epoch": 7.23, "learning_rate": 1.104873883454332e-05, "loss": 0.0106, "step": 2700 }, { "epoch": 7.23, "learning_rate": 1.1037224338649734e-05, "loss": 0.014, "step": 2702 }, { "epoch": 7.24, "learning_rate": 1.1025708452442636e-05, "loss": 0.013, "step": 2704 }, { "epoch": 7.24, "learning_rate": 1.1014191191358118e-05, "loss": 0.0028, "step": 2706 }, { "epoch": 7.25, "learning_rate": 1.1002672570834118e-05, "loss": 0.0057, "step": 2708 }, { "epoch": 7.26, "learning_rate": 1.0991152606310393e-05, "loss": 0.0545, "step": 2710 }, { "epoch": 7.26, "learning_rate": 1.0979631313228502e-05, "loss": 0.0005, "step": 2712 }, { "epoch": 7.27, "learning_rate": 1.0968108707031792e-05, "loss": 0.0037, "step": 2714 }, { "epoch": 7.27, "learning_rate": 1.0956584803165362e-05, "loss": 0.0095, "step": 2716 }, { "epoch": 7.28, "learning_rate": 1.0945059617076046e-05, "loss": 0.0016, "step": 2718 }, { "epoch": 7.28, "learning_rate": 1.0933533164212416e-05, "loss": 0.0131, "step": 2720 }, { "epoch": 7.29, "learning_rate": 1.092200546002472e-05, "loss": 0.0117, "step": 2722 }, { "epoch": 7.29, "learning_rate": 1.0910476519964896e-05, "loss": 0.0029, "step": 2724 }, { "epoch": 7.3, "learning_rate": 1.0898946359486535e-05, "loss": 0.0081, "step": 2726 }, { "epoch": 7.3, "learning_rate": 1.0887414994044865e-05, "loss": 0.0146, "step": 2728 }, { "epoch": 7.31, "learning_rate": 1.087588243909673e-05, "loss": 0.0069, "step": 2730 }, { "epoch": 7.31, "learning_rate": 1.0864348710100562e-05, "loss": 0.003, "step": 2732 }, { "epoch": 7.32, "learning_rate": 1.0852813822516378e-05, "loss": 0.0051, "step": 2734 }, { "epoch": 7.33, "learning_rate": 1.0841277791805733e-05, "loss": 0.0487, "step": 2736 }, { "epoch": 7.33, "learning_rate": 1.0829740633431733e-05, "loss": 0.0016, "step": 2738 }, { "epoch": 7.34, "learning_rate": 1.081820236285898e-05, "loss": 0.0133, "step": 2740 }, { "epoch": 7.34, "learning_rate": 1.0806662995553572e-05, "loss": 0.0042, "step": 2742 }, { "epoch": 7.35, "learning_rate": 1.0795122546983079e-05, "loss": 0.0409, "step": 2744 }, { "epoch": 7.35, "learning_rate": 1.0783581032616517e-05, "loss": 0.0043, "step": 2746 }, { "epoch": 7.36, "learning_rate": 1.0772038467924331e-05, "loss": 0.0029, "step": 2748 }, { "epoch": 7.36, "learning_rate": 1.0760494868378377e-05, "loss": 0.0065, "step": 2750 }, { "epoch": 7.37, "learning_rate": 1.0748950249451899e-05, "loss": 0.0061, "step": 2752 }, { "epoch": 7.37, "learning_rate": 1.0737404626619503e-05, "loss": 0.0199, "step": 2754 }, { "epoch": 7.38, "learning_rate": 1.072585801535714e-05, "loss": 0.0022, "step": 2756 }, { "epoch": 7.38, "learning_rate": 1.0714310431142085e-05, "loss": 0.0217, "step": 2758 }, { "epoch": 7.39, "learning_rate": 1.070276188945293e-05, "loss": 0.0106, "step": 2760 }, { "epoch": 7.39, "learning_rate": 1.0691212405769536e-05, "loss": 0.0057, "step": 2762 }, { "epoch": 7.4, "learning_rate": 1.067966199557303e-05, "loss": 0.0054, "step": 2764 }, { "epoch": 7.41, "learning_rate": 1.0668110674345783e-05, "loss": 0.0019, "step": 2766 }, { "epoch": 7.41, "learning_rate": 1.0656558457571392e-05, "loss": 0.0045, "step": 2768 }, { "epoch": 7.42, "learning_rate": 1.0645005360734636e-05, "loss": 0.0026, "step": 2770 }, { "epoch": 7.42, "learning_rate": 1.06334513993215e-05, "loss": 0.0143, "step": 2772 }, { "epoch": 7.43, "learning_rate": 1.0621896588819109e-05, "loss": 0.0047, "step": 2774 }, { "epoch": 7.43, "learning_rate": 1.0610340944715731e-05, "loss": 0.0028, "step": 2776 }, { "epoch": 7.44, "learning_rate": 1.0598784482500752e-05, "loss": 0.0064, "step": 2778 }, { "epoch": 7.44, "learning_rate": 1.0587227217664655e-05, "loss": 0.0153, "step": 2780 }, { "epoch": 7.45, "learning_rate": 1.0575669165699e-05, "loss": 0.0023, "step": 2782 }, { "epoch": 7.45, "learning_rate": 1.05641103420964e-05, "loss": 0.0038, "step": 2784 }, { "epoch": 7.46, "learning_rate": 1.0552550762350503e-05, "loss": 0.0132, "step": 2786 }, { "epoch": 7.46, "learning_rate": 1.054099044195597e-05, "loss": 0.0006, "step": 2788 }, { "epoch": 7.47, "learning_rate": 1.0529429396408452e-05, "loss": 0.018, "step": 2790 }, { "epoch": 7.48, "learning_rate": 1.0517867641204583e-05, "loss": 0.0315, "step": 2792 }, { "epoch": 7.48, "learning_rate": 1.050630519184193e-05, "loss": 0.0728, "step": 2794 }, { "epoch": 7.49, "learning_rate": 1.0494742063819011e-05, "loss": 0.0562, "step": 2796 }, { "epoch": 7.49, "learning_rate": 1.0483178272635236e-05, "loss": 0.0142, "step": 2798 }, { "epoch": 7.5, "learning_rate": 1.0471613833790917e-05, "loss": 0.0434, "step": 2800 }, { "epoch": 7.5, "learning_rate": 1.0460048762787224e-05, "loss": 0.0088, "step": 2802 }, { "epoch": 7.51, "learning_rate": 1.0448483075126183e-05, "loss": 0.0027, "step": 2804 }, { "epoch": 7.51, "learning_rate": 1.043691678631064e-05, "loss": 0.0856, "step": 2806 }, { "epoch": 7.52, "learning_rate": 1.0425349911844252e-05, "loss": 0.013, "step": 2808 }, { "epoch": 7.52, "learning_rate": 1.0413782467231455e-05, "loss": 0.0055, "step": 2810 }, { "epoch": 7.53, "learning_rate": 1.0402214467977453e-05, "loss": 0.1374, "step": 2812 }, { "epoch": 7.53, "learning_rate": 1.0390645929588197e-05, "loss": 0.0053, "step": 2814 }, { "epoch": 7.54, "learning_rate": 1.0379076867570348e-05, "loss": 0.0039, "step": 2816 }, { "epoch": 7.54, "learning_rate": 1.0367507297431289e-05, "loss": 0.009, "step": 2818 }, { "epoch": 7.55, "learning_rate": 1.0355937234679065e-05, "loss": 0.0538, "step": 2820 }, { "epoch": 7.56, "learning_rate": 1.0344366694822395e-05, "loss": 0.0125, "step": 2822 }, { "epoch": 7.56, "learning_rate": 1.0332795693370623e-05, "loss": 0.0088, "step": 2824 }, { "epoch": 7.57, "learning_rate": 1.0321224245833725e-05, "loss": 0.0035, "step": 2826 }, { "epoch": 7.57, "learning_rate": 1.0309652367722272e-05, "loss": 0.0119, "step": 2828 }, { "epoch": 7.58, "learning_rate": 1.029808007454741e-05, "loss": 0.0081, "step": 2830 }, { "epoch": 7.58, "learning_rate": 1.0286507381820838e-05, "loss": 0.0155, "step": 2832 }, { "epoch": 7.59, "learning_rate": 1.0274934305054798e-05, "loss": 0.0028, "step": 2834 }, { "epoch": 7.59, "learning_rate": 1.026336085976204e-05, "loss": 0.0681, "step": 2836 }, { "epoch": 7.6, "learning_rate": 1.0251787061455813e-05, "loss": 0.0964, "step": 2838 }, { "epoch": 7.6, "learning_rate": 1.0240212925649838e-05, "loss": 0.0345, "step": 2840 }, { "epoch": 7.61, "learning_rate": 1.0228638467858287e-05, "loss": 0.0301, "step": 2842 }, { "epoch": 7.61, "learning_rate": 1.0217063703595762e-05, "loss": 0.0471, "step": 2844 }, { "epoch": 7.62, "learning_rate": 1.0205488648377281e-05, "loss": 0.0273, "step": 2846 }, { "epoch": 7.63, "learning_rate": 1.0193913317718245e-05, "loss": 0.0145, "step": 2848 }, { "epoch": 7.63, "learning_rate": 1.0182337727134431e-05, "loss": 0.0262, "step": 2850 }, { "epoch": 7.64, "learning_rate": 1.0170761892141966e-05, "loss": 0.1083, "step": 2852 }, { "epoch": 7.64, "learning_rate": 1.0159185828257291e-05, "loss": 0.0108, "step": 2854 }, { "epoch": 7.65, "learning_rate": 1.014760955099717e-05, "loss": 0.0861, "step": 2856 }, { "epoch": 7.65, "learning_rate": 1.0136033075878644e-05, "loss": 0.08, "step": 2858 }, { "epoch": 7.66, "learning_rate": 1.012445641841902e-05, "loss": 0.062, "step": 2860 }, { "epoch": 7.66, "learning_rate": 1.0112879594135852e-05, "loss": 0.1869, "step": 2862 }, { "epoch": 7.67, "learning_rate": 1.0101302618546916e-05, "loss": 0.0951, "step": 2864 }, { "epoch": 7.67, "learning_rate": 1.0089725507170192e-05, "loss": 0.0207, "step": 2866 }, { "epoch": 7.68, "learning_rate": 1.007814827552384e-05, "loss": 0.0057, "step": 2868 }, { "epoch": 7.68, "learning_rate": 1.0066570939126183e-05, "loss": 0.0054, "step": 2870 }, { "epoch": 7.69, "learning_rate": 1.0054993513495681e-05, "loss": 0.0141, "step": 2872 }, { "epoch": 7.69, "learning_rate": 1.004341601415092e-05, "loss": 0.0345, "step": 2874 }, { "epoch": 7.7, "learning_rate": 1.003183845661058e-05, "loss": 0.0157, "step": 2876 }, { "epoch": 7.71, "learning_rate": 1.002026085639342e-05, "loss": 0.0092, "step": 2878 }, { "epoch": 7.71, "learning_rate": 1.0008683229018257e-05, "loss": 0.0391, "step": 2880 }, { "epoch": 7.72, "learning_rate": 9.99710559000394e-06, "loss": 0.0064, "step": 2882 }, { "epoch": 7.72, "learning_rate": 9.98552795486934e-06, "loss": 0.0047, "step": 2884 }, { "epoch": 7.73, "learning_rate": 9.973950339133323e-06, "loss": 0.0322, "step": 2886 }, { "epoch": 7.73, "learning_rate": 9.962372758314725e-06, "loss": 0.0201, "step": 2888 }, { "epoch": 7.74, "learning_rate": 9.950795227932334e-06, "loss": 0.0078, "step": 2890 }, { "epoch": 7.74, "learning_rate": 9.939217763504873e-06, "loss": 0.009, "step": 2892 }, { "epoch": 7.75, "learning_rate": 9.927640380550979e-06, "loss": 0.0097, "step": 2894 }, { "epoch": 7.75, "learning_rate": 9.916063094589174e-06, "loss": 0.0092, "step": 2896 }, { "epoch": 7.76, "learning_rate": 9.904485921137854e-06, "loss": 0.0327, "step": 2898 }, { "epoch": 7.76, "learning_rate": 9.892908875715265e-06, "loss": 0.0025, "step": 2900 }, { "epoch": 7.77, "learning_rate": 9.881331973839478e-06, "loss": 0.0581, "step": 2902 }, { "epoch": 7.78, "learning_rate": 9.869755231028375e-06, "loss": 0.0331, "step": 2904 }, { "epoch": 7.78, "learning_rate": 9.858178662799618e-06, "loss": 0.0119, "step": 2906 }, { "epoch": 7.79, "learning_rate": 9.846602284670647e-06, "loss": 0.0419, "step": 2908 }, { "epoch": 7.79, "learning_rate": 9.835026112158637e-06, "loss": 0.0445, "step": 2910 }, { "epoch": 7.8, "learning_rate": 9.823450160780492e-06, "loss": 0.0257, "step": 2912 }, { "epoch": 7.8, "learning_rate": 9.811874446052816e-06, "loss": 0.0114, "step": 2914 }, { "epoch": 7.81, "learning_rate": 9.8002989834919e-06, "loss": 0.06, "step": 2916 }, { "epoch": 7.81, "learning_rate": 9.78872378861369e-06, "loss": 0.0807, "step": 2918 }, { "epoch": 7.82, "learning_rate": 9.777148876933786e-06, "loss": 0.0202, "step": 2920 }, { "epoch": 7.82, "learning_rate": 9.765574263967397e-06, "loss": 0.0442, "step": 2922 }, { "epoch": 7.83, "learning_rate": 9.753999965229333e-06, "loss": 0.0947, "step": 2924 }, { "epoch": 7.83, "learning_rate": 9.74242599623399e-06, "loss": 0.0402, "step": 2926 }, { "epoch": 7.84, "learning_rate": 9.73085237249531e-06, "loss": 0.0209, "step": 2928 }, { "epoch": 7.84, "learning_rate": 9.719279109526786e-06, "loss": 0.0048, "step": 2930 }, { "epoch": 7.85, "learning_rate": 9.707706222841417e-06, "loss": 0.0194, "step": 2932 }, { "epoch": 7.86, "learning_rate": 9.696133727951704e-06, "loss": 0.0051, "step": 2934 }, { "epoch": 7.86, "learning_rate": 9.684561640369617e-06, "loss": 0.0232, "step": 2936 }, { "epoch": 7.87, "learning_rate": 9.672989975606586e-06, "loss": 0.0208, "step": 2938 }, { "epoch": 7.87, "learning_rate": 9.661418749173467e-06, "loss": 0.0034, "step": 2940 }, { "epoch": 7.88, "learning_rate": 9.649847976580537e-06, "loss": 0.0048, "step": 2942 }, { "epoch": 7.88, "learning_rate": 9.63827767333746e-06, "loss": 0.0245, "step": 2944 }, { "epoch": 7.89, "learning_rate": 9.62670785495327e-06, "loss": 0.015, "step": 2946 }, { "epoch": 7.89, "learning_rate": 9.615138536936353e-06, "loss": 0.0147, "step": 2948 }, { "epoch": 7.9, "learning_rate": 9.603569734794422e-06, "loss": 0.0027, "step": 2950 }, { "epoch": 7.9, "learning_rate": 9.592001464034503e-06, "loss": 0.0065, "step": 2952 }, { "epoch": 7.91, "learning_rate": 9.580433740162906e-06, "loss": 0.006, "step": 2954 }, { "epoch": 7.91, "learning_rate": 9.568866578685211e-06, "loss": 0.0485, "step": 2956 }, { "epoch": 7.92, "learning_rate": 9.557299995106239e-06, "loss": 0.009, "step": 2958 }, { "epoch": 7.93, "learning_rate": 9.545734004930037e-06, "loss": 0.0122, "step": 2960 }, { "epoch": 7.93, "learning_rate": 9.534168623659866e-06, "loss": 0.0035, "step": 2962 }, { "epoch": 7.94, "learning_rate": 9.522603866798162e-06, "loss": 0.0058, "step": 2964 }, { "epoch": 7.94, "learning_rate": 9.51103974984652e-06, "loss": 0.1357, "step": 2966 }, { "epoch": 7.95, "learning_rate": 9.49947628830569e-06, "loss": 0.0043, "step": 2968 }, { "epoch": 7.95, "learning_rate": 9.487913497675536e-06, "loss": 0.0011, "step": 2970 }, { "epoch": 7.96, "learning_rate": 9.476351393455017e-06, "loss": 0.0028, "step": 2972 }, { "epoch": 7.96, "learning_rate": 9.464789991142184e-06, "loss": 0.0267, "step": 2974 }, { "epoch": 7.97, "learning_rate": 9.453229306234143e-06, "loss": 0.0068, "step": 2976 }, { "epoch": 7.97, "learning_rate": 9.441669354227032e-06, "loss": 0.0506, "step": 2978 }, { "epoch": 7.98, "learning_rate": 9.430110150616014e-06, "loss": 0.0056, "step": 2980 }, { "epoch": 7.98, "learning_rate": 9.418551710895243e-06, "loss": 0.0023, "step": 2982 }, { "epoch": 7.99, "learning_rate": 9.406994050557857e-06, "loss": 0.0167, "step": 2984 }, { "epoch": 7.99, "learning_rate": 9.395437185095943e-06, "loss": 0.005, "step": 2986 }, { "epoch": 8.0, "learning_rate": 9.383881130000523e-06, "loss": 0.0428, "step": 2988 }, { "epoch": 8.01, "learning_rate": 9.372325900761535e-06, "loss": 0.002, "step": 2990 }, { "epoch": 8.01, "learning_rate": 9.360771512867808e-06, "loss": 0.0388, "step": 2992 }, { "epoch": 8.02, "learning_rate": 9.349217981807041e-06, "loss": 0.0063, "step": 2994 }, { "epoch": 8.02, "learning_rate": 9.337665323065796e-06, "loss": 0.0015, "step": 2996 }, { "epoch": 8.03, "learning_rate": 9.32611355212945e-06, "loss": 0.0263, "step": 2998 }, { "epoch": 8.03, "learning_rate": 9.314562684482202e-06, "loss": 0.0486, "step": 3000 }, { "epoch": 8.04, "learning_rate": 9.303012735607034e-06, "loss": 0.0014, "step": 3002 }, { "epoch": 8.04, "learning_rate": 9.291463720985693e-06, "loss": 0.0198, "step": 3004 }, { "epoch": 8.05, "learning_rate": 9.279915656098685e-06, "loss": 0.0081, "step": 3006 }, { "epoch": 8.05, "learning_rate": 9.268368556425238e-06, "loss": 0.0029, "step": 3008 }, { "epoch": 8.06, "learning_rate": 9.25682243744328e-06, "loss": 0.003, "step": 3010 }, { "epoch": 8.06, "learning_rate": 9.245277314629431e-06, "loss": 0.0179, "step": 3012 }, { "epoch": 8.07, "learning_rate": 9.23373320345897e-06, "loss": 0.0105, "step": 3014 }, { "epoch": 8.07, "learning_rate": 9.222190119405834e-06, "loss": 0.0091, "step": 3016 }, { "epoch": 8.08, "learning_rate": 9.210648077942567e-06, "loss": 0.0029, "step": 3018 }, { "epoch": 8.09, "learning_rate": 9.19910709454032e-06, "loss": 0.0037, "step": 3020 }, { "epoch": 8.09, "learning_rate": 9.187567184668832e-06, "loss": 0.0165, "step": 3022 }, { "epoch": 8.1, "learning_rate": 9.176028363796397e-06, "loss": 0.0158, "step": 3024 }, { "epoch": 8.1, "learning_rate": 9.16449064738985e-06, "loss": 0.004, "step": 3026 }, { "epoch": 8.11, "learning_rate": 9.152954050914547e-06, "loss": 0.0022, "step": 3028 }, { "epoch": 8.11, "learning_rate": 9.14141858983434e-06, "loss": 0.0208, "step": 3030 }, { "epoch": 8.12, "learning_rate": 9.129884279611564e-06, "loss": 0.0004, "step": 3032 }, { "epoch": 8.12, "learning_rate": 9.118351135707011e-06, "loss": 0.0179, "step": 3034 }, { "epoch": 8.13, "learning_rate": 9.106819173579901e-06, "loss": 0.0011, "step": 3036 }, { "epoch": 8.13, "learning_rate": 9.095288408687884e-06, "loss": 0.0009, "step": 3038 }, { "epoch": 8.14, "learning_rate": 9.08375885648699e-06, "loss": 0.0498, "step": 3040 }, { "epoch": 8.14, "learning_rate": 9.072230532431632e-06, "loss": 0.0023, "step": 3042 }, { "epoch": 8.15, "learning_rate": 9.060703451974581e-06, "loss": 0.008, "step": 3044 }, { "epoch": 8.16, "learning_rate": 9.049177630566932e-06, "loss": 0.0279, "step": 3046 }, { "epoch": 8.16, "learning_rate": 9.037653083658098e-06, "loss": 0.0931, "step": 3048 }, { "epoch": 8.17, "learning_rate": 9.02612982669578e-06, "loss": 0.0831, "step": 3050 }, { "epoch": 8.17, "learning_rate": 9.01460787512595e-06, "loss": 0.0022, "step": 3052 }, { "epoch": 8.18, "learning_rate": 9.003087244392837e-06, "loss": 0.0011, "step": 3054 }, { "epoch": 8.18, "learning_rate": 8.99156794993889e-06, "loss": 0.0003, "step": 3056 }, { "epoch": 8.19, "learning_rate": 8.980050007204776e-06, "loss": 0.0061, "step": 3058 }, { "epoch": 8.19, "learning_rate": 8.96853343162934e-06, "loss": 0.0246, "step": 3060 }, { "epoch": 8.2, "learning_rate": 8.957018238649602e-06, "loss": 0.0754, "step": 3062 }, { "epoch": 8.2, "learning_rate": 8.945504443700727e-06, "loss": 0.0504, "step": 3064 }, { "epoch": 8.21, "learning_rate": 8.933992062216008e-06, "loss": 0.14, "step": 3066 }, { "epoch": 8.21, "learning_rate": 8.922481109626837e-06, "loss": 0.013, "step": 3068 }, { "epoch": 8.22, "learning_rate": 8.910971601362697e-06, "loss": 0.0013, "step": 3070 }, { "epoch": 8.22, "learning_rate": 8.899463552851133e-06, "loss": 0.2511, "step": 3072 }, { "epoch": 8.23, "learning_rate": 8.88795697951773e-06, "loss": 0.0102, "step": 3074 }, { "epoch": 8.24, "learning_rate": 8.876451896786102e-06, "loss": 0.0055, "step": 3076 }, { "epoch": 8.24, "learning_rate": 8.864948320077859e-06, "loss": 0.081, "step": 3078 }, { "epoch": 8.25, "learning_rate": 8.8534462648126e-06, "loss": 0.3125, "step": 3080 }, { "epoch": 8.25, "learning_rate": 8.841945746407873e-06, "loss": 0.0197, "step": 3082 }, { "epoch": 8.26, "learning_rate": 8.830446780279175e-06, "loss": 0.0105, "step": 3084 }, { "epoch": 8.26, "learning_rate": 8.818949381839924e-06, "loss": 0.0073, "step": 3086 }, { "epoch": 8.27, "learning_rate": 8.807453566501427e-06, "loss": 0.0586, "step": 3088 }, { "epoch": 8.27, "learning_rate": 8.795959349672878e-06, "loss": 0.005, "step": 3090 }, { "epoch": 8.28, "learning_rate": 8.784466746761321e-06, "loss": 0.0059, "step": 3092 }, { "epoch": 8.28, "learning_rate": 8.772975773171646e-06, "loss": 0.0399, "step": 3094 }, { "epoch": 8.29, "learning_rate": 8.761486444306547e-06, "loss": 0.0762, "step": 3096 }, { "epoch": 8.29, "learning_rate": 8.749998775566525e-06, "loss": 0.0095, "step": 3098 }, { "epoch": 8.3, "learning_rate": 8.73851278234985e-06, "loss": 0.0937, "step": 3100 }, { "epoch": 8.31, "learning_rate": 8.727028480052544e-06, "loss": 0.0865, "step": 3102 }, { "epoch": 8.31, "learning_rate": 8.715545884068368e-06, "loss": 0.0091, "step": 3104 }, { "epoch": 8.32, "learning_rate": 8.70406500978879e-06, "loss": 0.0051, "step": 3106 }, { "epoch": 8.32, "learning_rate": 8.692585872602977e-06, "loss": 0.0335, "step": 3108 }, { "epoch": 8.33, "learning_rate": 8.681108487897762e-06, "loss": 0.0181, "step": 3110 }, { "epoch": 8.33, "learning_rate": 8.669632871057629e-06, "loss": 0.0165, "step": 3112 }, { "epoch": 8.34, "learning_rate": 8.658159037464698e-06, "loss": 0.0036, "step": 3114 }, { "epoch": 8.34, "learning_rate": 8.646687002498692e-06, "loss": 0.0128, "step": 3116 }, { "epoch": 8.35, "learning_rate": 8.635216781536925e-06, "loss": 0.0289, "step": 3118 }, { "epoch": 8.35, "learning_rate": 8.623748389954284e-06, "loss": 0.0053, "step": 3120 }, { "epoch": 8.36, "learning_rate": 8.612281843123196e-06, "loss": 0.0665, "step": 3122 }, { "epoch": 8.36, "learning_rate": 8.600817156413623e-06, "loss": 0.0054, "step": 3124 }, { "epoch": 8.37, "learning_rate": 8.589354345193028e-06, "loss": 0.0125, "step": 3126 }, { "epoch": 8.37, "learning_rate": 8.577893424826361e-06, "loss": 0.0196, "step": 3128 }, { "epoch": 8.38, "learning_rate": 8.566434410676041e-06, "loss": 0.0059, "step": 3130 }, { "epoch": 8.39, "learning_rate": 8.554977318101933e-06, "loss": 0.0045, "step": 3132 }, { "epoch": 8.39, "learning_rate": 8.543522162461315e-06, "loss": 0.0021, "step": 3134 }, { "epoch": 8.4, "learning_rate": 8.532068959108882e-06, "loss": 0.007, "step": 3136 }, { "epoch": 8.4, "learning_rate": 8.520617723396702e-06, "loss": 0.0162, "step": 3138 }, { "epoch": 8.41, "learning_rate": 8.509168470674215e-06, "loss": 0.0225, "step": 3140 }, { "epoch": 8.41, "learning_rate": 8.4977212162882e-06, "loss": 0.0409, "step": 3142 }, { "epoch": 8.42, "learning_rate": 8.486275975582749e-06, "loss": 0.0027, "step": 3144 }, { "epoch": 8.42, "learning_rate": 8.474832763899264e-06, "loss": 0.0273, "step": 3146 }, { "epoch": 8.43, "learning_rate": 8.463391596576421e-06, "loss": 0.0057, "step": 3148 }, { "epoch": 8.43, "learning_rate": 8.451952488950167e-06, "loss": 0.0047, "step": 3150 }, { "epoch": 8.44, "learning_rate": 8.440515456353677e-06, "loss": 0.0253, "step": 3152 }, { "epoch": 8.44, "learning_rate": 8.429080514117348e-06, "loss": 0.0063, "step": 3154 }, { "epoch": 8.45, "learning_rate": 8.417647677568774e-06, "loss": 0.0054, "step": 3156 }, { "epoch": 8.46, "learning_rate": 8.40621696203273e-06, "loss": 0.017, "step": 3158 }, { "epoch": 8.46, "learning_rate": 8.394788382831144e-06, "loss": 0.0006, "step": 3160 }, { "epoch": 8.47, "learning_rate": 8.38336195528308e-06, "loss": 0.005, "step": 3162 }, { "epoch": 8.47, "learning_rate": 8.371937694704725e-06, "loss": 0.097, "step": 3164 }, { "epoch": 8.48, "learning_rate": 8.36051561640935e-06, "loss": 0.0074, "step": 3166 }, { "epoch": 8.48, "learning_rate": 8.349095735707313e-06, "loss": 0.0114, "step": 3168 }, { "epoch": 8.49, "learning_rate": 8.337678067906016e-06, "loss": 0.005, "step": 3170 }, { "epoch": 8.49, "learning_rate": 8.326262628309902e-06, "loss": 0.0081, "step": 3172 }, { "epoch": 8.5, "learning_rate": 8.314849432220424e-06, "loss": 0.0031, "step": 3174 }, { "epoch": 8.5, "learning_rate": 8.303438494936027e-06, "loss": 0.0074, "step": 3176 }, { "epoch": 8.51, "learning_rate": 8.292029831752128e-06, "loss": 0.0044, "step": 3178 }, { "epoch": 8.51, "learning_rate": 8.280623457961102e-06, "loss": 0.0089, "step": 3180 }, { "epoch": 8.52, "learning_rate": 8.269219388852247e-06, "loss": 0.0324, "step": 3182 }, { "epoch": 8.52, "learning_rate": 8.257817639711776e-06, "loss": 0.0033, "step": 3184 }, { "epoch": 8.53, "learning_rate": 8.24641822582279e-06, "loss": 0.0038, "step": 3186 }, { "epoch": 8.54, "learning_rate": 8.235021162465267e-06, "loss": 0.0007, "step": 3188 }, { "epoch": 8.54, "learning_rate": 8.223626464916022e-06, "loss": 0.0112, "step": 3190 }, { "epoch": 8.55, "learning_rate": 8.212234148448708e-06, "loss": 0.0087, "step": 3192 }, { "epoch": 8.55, "learning_rate": 8.200844228333784e-06, "loss": 0.0023, "step": 3194 }, { "epoch": 8.56, "learning_rate": 8.189456719838495e-06, "loss": 0.0122, "step": 3196 }, { "epoch": 8.56, "learning_rate": 8.178071638226857e-06, "loss": 0.0107, "step": 3198 }, { "epoch": 8.57, "learning_rate": 8.166688998759631e-06, "loss": 0.0131, "step": 3200 }, { "epoch": 8.57, "learning_rate": 8.155308816694306e-06, "loss": 0.0026, "step": 3202 }, { "epoch": 8.58, "learning_rate": 8.143931107285073e-06, "loss": 0.0128, "step": 3204 }, { "epoch": 8.58, "learning_rate": 8.132555885782812e-06, "loss": 0.0375, "step": 3206 }, { "epoch": 8.59, "learning_rate": 8.121183167435064e-06, "loss": 0.0272, "step": 3208 }, { "epoch": 8.59, "learning_rate": 8.109812967486024e-06, "loss": 0.0324, "step": 3210 }, { "epoch": 8.6, "learning_rate": 8.098445301176505e-06, "loss": 0.0212, "step": 3212 }, { "epoch": 8.61, "learning_rate": 8.087080183743921e-06, "loss": 0.053, "step": 3214 }, { "epoch": 8.61, "learning_rate": 8.075717630422275e-06, "loss": 0.0075, "step": 3216 }, { "epoch": 8.62, "learning_rate": 8.06435765644213e-06, "loss": 0.0015, "step": 3218 }, { "epoch": 8.62, "learning_rate": 8.05300027703059e-06, "loss": 0.003, "step": 3220 }, { "epoch": 8.63, "learning_rate": 8.041645507411288e-06, "loss": 0.0026, "step": 3222 }, { "epoch": 8.63, "learning_rate": 8.030293362804353e-06, "loss": 0.0015, "step": 3224 }, { "epoch": 8.64, "learning_rate": 8.018943858426394e-06, "loss": 0.0105, "step": 3226 }, { "epoch": 8.64, "learning_rate": 8.007597009490487e-06, "loss": 0.0058, "step": 3228 }, { "epoch": 8.65, "learning_rate": 7.996252831206142e-06, "loss": 0.0968, "step": 3230 }, { "epoch": 8.65, "learning_rate": 7.984911338779296e-06, "loss": 0.0039, "step": 3232 }, { "epoch": 8.66, "learning_rate": 7.973572547412282e-06, "loss": 0.0692, "step": 3234 }, { "epoch": 8.66, "learning_rate": 7.962236472303811e-06, "loss": 0.009, "step": 3236 }, { "epoch": 8.67, "learning_rate": 7.950903128648957e-06, "loss": 0.0194, "step": 3238 }, { "epoch": 8.67, "learning_rate": 7.939572531639128e-06, "loss": 0.0034, "step": 3240 }, { "epoch": 8.68, "learning_rate": 7.928244696462053e-06, "loss": 0.0183, "step": 3242 }, { "epoch": 8.69, "learning_rate": 7.916919638301763e-06, "loss": 0.01, "step": 3244 }, { "epoch": 8.69, "learning_rate": 7.905597372338558e-06, "loss": 0.0031, "step": 3246 }, { "epoch": 8.7, "learning_rate": 7.894277913749003e-06, "loss": 0.0041, "step": 3248 }, { "epoch": 8.7, "learning_rate": 7.882961277705897e-06, "loss": 0.0076, "step": 3250 }, { "epoch": 8.71, "learning_rate": 7.871647479378249e-06, "loss": 0.0059, "step": 3252 }, { "epoch": 8.71, "learning_rate": 7.860336533931279e-06, "loss": 0.0055, "step": 3254 }, { "epoch": 8.72, "learning_rate": 7.849028456526373e-06, "loss": 0.0054, "step": 3256 }, { "epoch": 8.72, "learning_rate": 7.837723262321071e-06, "loss": 0.0026, "step": 3258 }, { "epoch": 8.73, "learning_rate": 7.826420966469055e-06, "loss": 0.0266, "step": 3260 }, { "epoch": 8.73, "learning_rate": 7.815121584120114e-06, "loss": 0.0272, "step": 3262 }, { "epoch": 8.74, "learning_rate": 7.803825130420141e-06, "loss": 0.0046, "step": 3264 }, { "epoch": 8.74, "learning_rate": 7.792531620511097e-06, "loss": 0.0867, "step": 3266 }, { "epoch": 8.75, "learning_rate": 7.781241069531e-06, "loss": 0.0019, "step": 3268 }, { "epoch": 8.76, "learning_rate": 7.7699534926139e-06, "loss": 0.0013, "step": 3270 }, { "epoch": 8.76, "learning_rate": 7.75866890488986e-06, "loss": 0.0011, "step": 3272 }, { "epoch": 8.77, "learning_rate": 7.747387321484934e-06, "loss": 0.0093, "step": 3274 }, { "epoch": 8.77, "learning_rate": 7.736108757521159e-06, "loss": 0.0134, "step": 3276 }, { "epoch": 8.78, "learning_rate": 7.724833228116514e-06, "loss": 0.0171, "step": 3278 }, { "epoch": 8.78, "learning_rate": 7.713560748384914e-06, "loss": 0.1394, "step": 3280 }, { "epoch": 8.79, "learning_rate": 7.702291333436191e-06, "loss": 0.0015, "step": 3282 }, { "epoch": 8.79, "learning_rate": 7.69102499837606e-06, "loss": 0.0027, "step": 3284 }, { "epoch": 8.8, "learning_rate": 7.679761758306114e-06, "loss": 0.0022, "step": 3286 }, { "epoch": 8.8, "learning_rate": 7.668501628323796e-06, "loss": 0.0047, "step": 3288 }, { "epoch": 8.81, "learning_rate": 7.657244623522378e-06, "loss": 0.0054, "step": 3290 }, { "epoch": 8.81, "learning_rate": 7.645990758990947e-06, "loss": 0.0134, "step": 3292 }, { "epoch": 8.82, "learning_rate": 7.634740049814376e-06, "loss": 0.0216, "step": 3294 }, { "epoch": 8.82, "learning_rate": 7.623492511073314e-06, "loss": 0.0874, "step": 3296 }, { "epoch": 8.83, "learning_rate": 7.612248157844153e-06, "loss": 0.0037, "step": 3298 }, { "epoch": 8.84, "learning_rate": 7.601007005199022e-06, "loss": 0.003, "step": 3300 }, { "epoch": 8.84, "learning_rate": 7.589769068205757e-06, "loss": 0.0029, "step": 3302 }, { "epoch": 8.85, "learning_rate": 7.578534361927883e-06, "loss": 0.0178, "step": 3304 }, { "epoch": 8.85, "learning_rate": 7.567302901424598e-06, "loss": 0.0107, "step": 3306 }, { "epoch": 8.86, "learning_rate": 7.556074701750743e-06, "loss": 0.0031, "step": 3308 }, { "epoch": 8.86, "learning_rate": 7.544849777956792e-06, "loss": 0.0121, "step": 3310 }, { "epoch": 8.87, "learning_rate": 7.533628145088832e-06, "loss": 0.0151, "step": 3312 }, { "epoch": 8.87, "learning_rate": 7.522409818188531e-06, "loss": 0.0098, "step": 3314 }, { "epoch": 8.88, "learning_rate": 7.51119481229313e-06, "loss": 0.0457, "step": 3316 }, { "epoch": 8.88, "learning_rate": 7.499983142435418e-06, "loss": 0.0018, "step": 3318 }, { "epoch": 8.89, "learning_rate": 7.488774823643712e-06, "loss": 0.0685, "step": 3320 }, { "epoch": 8.89, "learning_rate": 7.477569870941833e-06, "loss": 0.0045, "step": 3322 }, { "epoch": 8.9, "learning_rate": 7.4663682993491e-06, "loss": 0.0042, "step": 3324 }, { "epoch": 8.9, "learning_rate": 7.455170123880293e-06, "loss": 0.0017, "step": 3326 }, { "epoch": 8.91, "learning_rate": 7.443975359545641e-06, "loss": 0.0228, "step": 3328 }, { "epoch": 8.92, "learning_rate": 7.432784021350796e-06, "loss": 0.0384, "step": 3330 }, { "epoch": 8.92, "learning_rate": 7.421596124296826e-06, "loss": 0.0172, "step": 3332 }, { "epoch": 8.93, "learning_rate": 7.4104116833801835e-06, "loss": 0.0597, "step": 3334 }, { "epoch": 8.93, "learning_rate": 7.399230713592684e-06, "loss": 0.006, "step": 3336 }, { "epoch": 8.94, "learning_rate": 7.3880532299214966e-06, "loss": 0.0014, "step": 3338 }, { "epoch": 8.94, "learning_rate": 7.376879247349112e-06, "loss": 0.0268, "step": 3340 }, { "epoch": 8.95, "learning_rate": 7.365708780853331e-06, "loss": 0.0301, "step": 3342 }, { "epoch": 8.95, "learning_rate": 7.354541845407241e-06, "loss": 0.0243, "step": 3344 }, { "epoch": 8.96, "learning_rate": 7.343378455979197e-06, "loss": 0.0147, "step": 3346 }, { "epoch": 8.96, "learning_rate": 7.332218627532801e-06, "loss": 0.0902, "step": 3348 }, { "epoch": 8.97, "learning_rate": 7.3210623750268786e-06, "loss": 0.0336, "step": 3350 }, { "epoch": 8.97, "learning_rate": 7.309909713415465e-06, "loss": 0.0053, "step": 3352 }, { "epoch": 8.98, "learning_rate": 7.298760657647779e-06, "loss": 0.0361, "step": 3354 }, { "epoch": 8.99, "learning_rate": 7.287615222668214e-06, "loss": 0.0327, "step": 3356 }, { "epoch": 8.99, "learning_rate": 7.276473423416302e-06, "loss": 0.0089, "step": 3358 }, { "epoch": 9.0, "learning_rate": 7.265335274826704e-06, "loss": 0.0023, "step": 3360 }, { "epoch": 9.0, "learning_rate": 7.254200791829187e-06, "loss": 0.0498, "step": 3362 }, { "epoch": 9.01, "learning_rate": 7.243069989348604e-06, "loss": 0.0021, "step": 3364 }, { "epoch": 9.01, "learning_rate": 7.231942882304879e-06, "loss": 0.0004, "step": 3366 }, { "epoch": 9.02, "learning_rate": 7.22081948561298e-06, "loss": 0.0017, "step": 3368 }, { "epoch": 9.02, "learning_rate": 7.209699814182899e-06, "loss": 0.0013, "step": 3370 }, { "epoch": 9.03, "learning_rate": 7.198583882919636e-06, "loss": 0.0059, "step": 3372 }, { "epoch": 9.03, "learning_rate": 7.187471706723181e-06, "loss": 0.0013, "step": 3374 }, { "epoch": 9.04, "learning_rate": 7.176363300488485e-06, "loss": 0.0015, "step": 3376 }, { "epoch": 9.04, "learning_rate": 7.16525867910545e-06, "loss": 0.0057, "step": 3378 }, { "epoch": 9.05, "learning_rate": 7.154157857458905e-06, "loss": 0.0007, "step": 3380 }, { "epoch": 9.05, "learning_rate": 7.14306085042858e-06, "loss": 0.0075, "step": 3382 }, { "epoch": 9.06, "learning_rate": 7.131967672889101e-06, "loss": 0.0013, "step": 3384 }, { "epoch": 9.07, "learning_rate": 7.120878339709949e-06, "loss": 0.0022, "step": 3386 }, { "epoch": 9.07, "learning_rate": 7.109792865755466e-06, "loss": 0.0034, "step": 3388 }, { "epoch": 9.08, "learning_rate": 7.09871126588481e-06, "loss": 0.0024, "step": 3390 }, { "epoch": 9.08, "learning_rate": 7.087633554951952e-06, "loss": 0.0034, "step": 3392 }, { "epoch": 9.09, "learning_rate": 7.076559747805644e-06, "loss": 0.0049, "step": 3394 }, { "epoch": 9.09, "learning_rate": 7.0654898592894135e-06, "loss": 0.059, "step": 3396 }, { "epoch": 9.1, "learning_rate": 7.054423904241527e-06, "loss": 0.0013, "step": 3398 }, { "epoch": 9.1, "learning_rate": 7.043361897494988e-06, "loss": 0.017, "step": 3400 }, { "epoch": 9.11, "learning_rate": 7.0323038538775e-06, "loss": 0.012, "step": 3402 }, { "epoch": 9.11, "learning_rate": 7.021249788211456e-06, "loss": 0.0009, "step": 3404 }, { "epoch": 9.12, "learning_rate": 7.010199715313919e-06, "loss": 0.024, "step": 3406 }, { "epoch": 9.12, "learning_rate": 6.999153649996595e-06, "loss": 0.0046, "step": 3408 }, { "epoch": 9.13, "learning_rate": 6.988111607065826e-06, "loss": 0.0047, "step": 3410 }, { "epoch": 9.14, "learning_rate": 6.9770736013225535e-06, "loss": 0.0224, "step": 3412 }, { "epoch": 9.14, "learning_rate": 6.966039647562319e-06, "loss": 0.0009, "step": 3414 }, { "epoch": 9.15, "learning_rate": 6.955009760575222e-06, "loss": 0.0125, "step": 3416 }, { "epoch": 9.15, "learning_rate": 6.943983955145914e-06, "loss": 0.0016, "step": 3418 }, { "epoch": 9.16, "learning_rate": 6.932962246053577e-06, "loss": 0.0016, "step": 3420 }, { "epoch": 9.16, "learning_rate": 6.921944648071901e-06, "loss": 0.0369, "step": 3422 }, { "epoch": 9.17, "learning_rate": 6.910931175969062e-06, "loss": 0.0143, "step": 3424 }, { "epoch": 9.17, "learning_rate": 6.899921844507716e-06, "loss": 0.0025, "step": 3426 }, { "epoch": 9.18, "learning_rate": 6.888916668444958e-06, "loss": 0.0435, "step": 3428 }, { "epoch": 9.18, "learning_rate": 6.877915662532317e-06, "loss": 0.0029, "step": 3430 }, { "epoch": 9.19, "learning_rate": 6.8669188415157324e-06, "loss": 0.0086, "step": 3432 }, { "epoch": 9.19, "learning_rate": 6.855926220135535e-06, "loss": 0.0018, "step": 3434 }, { "epoch": 9.2, "learning_rate": 6.844937813126427e-06, "loss": 0.0074, "step": 3436 }, { "epoch": 9.2, "learning_rate": 6.833953635217456e-06, "loss": 0.0614, "step": 3438 }, { "epoch": 9.21, "learning_rate": 6.8229737011320075e-06, "loss": 0.0006, "step": 3440 }, { "epoch": 9.22, "learning_rate": 6.811998025587776e-06, "loss": 0.0227, "step": 3442 }, { "epoch": 9.22, "learning_rate": 6.801026623296745e-06, "loss": 0.0259, "step": 3444 }, { "epoch": 9.23, "learning_rate": 6.790059508965174e-06, "loss": 0.0134, "step": 3446 }, { "epoch": 9.23, "learning_rate": 6.779096697293574e-06, "loss": 0.0119, "step": 3448 }, { "epoch": 9.24, "learning_rate": 6.768138202976691e-06, "loss": 0.001, "step": 3450 }, { "epoch": 9.24, "learning_rate": 6.757184040703475e-06, "loss": 0.0053, "step": 3452 }, { "epoch": 9.25, "learning_rate": 6.74623422515708e-06, "loss": 0.0038, "step": 3454 }, { "epoch": 9.25, "learning_rate": 6.735288771014824e-06, "loss": 0.0043, "step": 3456 }, { "epoch": 9.26, "learning_rate": 6.72434769294819e-06, "loss": 0.0091, "step": 3458 }, { "epoch": 9.26, "learning_rate": 6.713411005622785e-06, "loss": 0.0017, "step": 3460 }, { "epoch": 9.27, "learning_rate": 6.702478723698336e-06, "loss": 0.0026, "step": 3462 }, { "epoch": 9.27, "learning_rate": 6.6915508618286625e-06, "loss": 0.0022, "step": 3464 }, { "epoch": 9.28, "learning_rate": 6.680627434661662e-06, "loss": 0.0342, "step": 3466 }, { "epoch": 9.29, "learning_rate": 6.669708456839283e-06, "loss": 0.0019, "step": 3468 }, { "epoch": 9.29, "learning_rate": 6.658793942997516e-06, "loss": 0.0041, "step": 3470 }, { "epoch": 9.3, "learning_rate": 6.647883907766364e-06, "loss": 0.0084, "step": 3472 }, { "epoch": 9.3, "learning_rate": 6.636978365769826e-06, "loss": 0.0015, "step": 3474 }, { "epoch": 9.31, "learning_rate": 6.626077331625883e-06, "loss": 0.0611, "step": 3476 }, { "epoch": 9.31, "learning_rate": 6.615180819946466e-06, "loss": 0.0349, "step": 3478 }, { "epoch": 9.32, "learning_rate": 6.604288845337453e-06, "loss": 0.051, "step": 3480 }, { "epoch": 9.32, "learning_rate": 6.593401422398635e-06, "loss": 0.0006, "step": 3482 }, { "epoch": 9.33, "learning_rate": 6.582518565723701e-06, "loss": 0.016, "step": 3484 }, { "epoch": 9.33, "learning_rate": 6.5716402899002205e-06, "loss": 0.0204, "step": 3486 }, { "epoch": 9.34, "learning_rate": 6.560766609509624e-06, "loss": 0.0022, "step": 3488 }, { "epoch": 9.34, "learning_rate": 6.549897539127185e-06, "loss": 0.0422, "step": 3490 }, { "epoch": 9.35, "learning_rate": 6.5390330933219895e-06, "loss": 0.0071, "step": 3492 }, { "epoch": 9.35, "learning_rate": 6.528173286656931e-06, "loss": 0.013, "step": 3494 }, { "epoch": 9.36, "learning_rate": 6.517318133688685e-06, "loss": 0.0012, "step": 3496 }, { "epoch": 9.37, "learning_rate": 6.506467648967683e-06, "loss": 0.0043, "step": 3498 }, { "epoch": 9.37, "learning_rate": 6.495621847038105e-06, "loss": 0.0012, "step": 3500 }, { "epoch": 9.38, "learning_rate": 6.4847807424378525e-06, "loss": 0.0297, "step": 3502 }, { "epoch": 9.38, "learning_rate": 6.473944349698531e-06, "loss": 0.0101, "step": 3504 }, { "epoch": 9.39, "learning_rate": 6.46311268334543e-06, "loss": 0.0021, "step": 3506 }, { "epoch": 9.39, "learning_rate": 6.452285757897501e-06, "loss": 0.0002, "step": 3508 }, { "epoch": 9.4, "learning_rate": 6.441463587867341e-06, "loss": 0.0798, "step": 3510 }, { "epoch": 9.4, "learning_rate": 6.43064618776118e-06, "loss": 0.0062, "step": 3512 }, { "epoch": 9.41, "learning_rate": 6.419833572078846e-06, "loss": 0.0005, "step": 3514 }, { "epoch": 9.41, "learning_rate": 6.4090257553137566e-06, "loss": 0.0111, "step": 3516 }, { "epoch": 9.42, "learning_rate": 6.3982227519528986e-06, "loss": 0.0063, "step": 3518 }, { "epoch": 9.42, "learning_rate": 6.3874245764768036e-06, "loss": 0.0353, "step": 3520 }, { "epoch": 9.43, "learning_rate": 6.3766312433595325e-06, "loss": 0.0326, "step": 3522 }, { "epoch": 9.44, "learning_rate": 6.3658427670686596e-06, "loss": 0.0582, "step": 3524 }, { "epoch": 9.44, "learning_rate": 6.355059162065245e-06, "loss": 0.001, "step": 3526 }, { "epoch": 9.45, "learning_rate": 6.344280442803819e-06, "loss": 0.0031, "step": 3528 }, { "epoch": 9.45, "learning_rate": 6.3335066237323654e-06, "loss": 0.0015, "step": 3530 }, { "epoch": 9.46, "learning_rate": 6.322737719292295e-06, "loss": 0.0102, "step": 3532 }, { "epoch": 9.46, "learning_rate": 6.311973743918438e-06, "loss": 0.0035, "step": 3534 }, { "epoch": 9.47, "learning_rate": 6.3012147120390126e-06, "loss": 0.1321, "step": 3536 }, { "epoch": 9.47, "learning_rate": 6.290460638075613e-06, "loss": 0.0056, "step": 3538 }, { "epoch": 9.48, "learning_rate": 6.279711536443185e-06, "loss": 0.0311, "step": 3540 }, { "epoch": 9.48, "learning_rate": 6.2689674215500114e-06, "loss": 0.0126, "step": 3542 }, { "epoch": 9.49, "learning_rate": 6.2582283077976864e-06, "loss": 0.0045, "step": 3544 }, { "epoch": 9.49, "learning_rate": 6.247494209581114e-06, "loss": 0.0083, "step": 3546 }, { "epoch": 9.5, "learning_rate": 6.2367651412884586e-06, "loss": 0.0023, "step": 3548 }, { "epoch": 9.5, "learning_rate": 6.226041117301153e-06, "loss": 0.1556, "step": 3550 }, { "epoch": 9.51, "learning_rate": 6.2153221519938644e-06, "loss": 0.0004, "step": 3552 }, { "epoch": 9.52, "learning_rate": 6.2046082597344795e-06, "loss": 0.0095, "step": 3554 }, { "epoch": 9.52, "learning_rate": 6.193899454884085e-06, "loss": 0.0014, "step": 3556 }, { "epoch": 9.53, "learning_rate": 6.183195751796947e-06, "loss": 0.0117, "step": 3558 }, { "epoch": 9.53, "learning_rate": 6.1724971648205e-06, "loss": 0.0004, "step": 3560 }, { "epoch": 9.54, "learning_rate": 6.161803708295313e-06, "loss": 0.0018, "step": 3562 }, { "epoch": 9.54, "learning_rate": 6.151115396555081e-06, "loss": 0.002, "step": 3564 }, { "epoch": 9.55, "learning_rate": 6.140432243926601e-06, "loss": 0.0041, "step": 3566 }, { "epoch": 9.55, "learning_rate": 6.1297542647297615e-06, "loss": 0.003, "step": 3568 }, { "epoch": 9.56, "learning_rate": 6.119081473277502e-06, "loss": 0.0181, "step": 3570 }, { "epoch": 9.56, "learning_rate": 6.1084138838758265e-06, "loss": 0.0012, "step": 3572 }, { "epoch": 9.57, "learning_rate": 6.097751510823753e-06, "loss": 0.0002, "step": 3574 }, { "epoch": 9.57, "learning_rate": 6.0870943684133136e-06, "loss": 0.0019, "step": 3576 }, { "epoch": 9.58, "learning_rate": 6.076442470929525e-06, "loss": 0.0122, "step": 3578 }, { "epoch": 9.59, "learning_rate": 6.065795832650372e-06, "loss": 0.0263, "step": 3580 }, { "epoch": 9.59, "learning_rate": 6.055154467846801e-06, "loss": 0.0016, "step": 3582 }, { "epoch": 9.6, "learning_rate": 6.044518390782679e-06, "loss": 0.0072, "step": 3584 }, { "epoch": 9.6, "learning_rate": 6.033887615714786e-06, "loss": 0.0039, "step": 3586 }, { "epoch": 9.61, "learning_rate": 6.0232621568928015e-06, "loss": 0.0025, "step": 3588 }, { "epoch": 9.61, "learning_rate": 6.012642028559272e-06, "loss": 0.0225, "step": 3590 }, { "epoch": 9.62, "learning_rate": 6.002027244949602e-06, "loss": 0.0045, "step": 3592 }, { "epoch": 9.62, "learning_rate": 5.991417820292035e-06, "loss": 0.001, "step": 3594 }, { "epoch": 9.63, "learning_rate": 5.980813768807625e-06, "loss": 0.0139, "step": 3596 }, { "epoch": 9.63, "learning_rate": 5.970215104710227e-06, "loss": 0.0127, "step": 3598 }, { "epoch": 9.64, "learning_rate": 5.959621842206474e-06, "loss": 0.0005, "step": 3600 }, { "epoch": 9.64, "learning_rate": 5.949033995495759e-06, "loss": 0.0079, "step": 3602 }, { "epoch": 9.65, "learning_rate": 5.938451578770219e-06, "loss": 0.0068, "step": 3604 }, { "epoch": 9.65, "learning_rate": 5.927874606214705e-06, "loss": 0.0311, "step": 3606 }, { "epoch": 9.66, "learning_rate": 5.917303092006778e-06, "loss": 0.0141, "step": 3608 }, { "epoch": 9.67, "learning_rate": 5.906737050316677e-06, "loss": 0.0011, "step": 3610 }, { "epoch": 9.67, "learning_rate": 5.896176495307304e-06, "loss": 0.0207, "step": 3612 }, { "epoch": 9.68, "learning_rate": 5.885621441134219e-06, "loss": 0.0058, "step": 3614 }, { "epoch": 9.68, "learning_rate": 5.875071901945595e-06, "loss": 0.0008, "step": 3616 }, { "epoch": 9.69, "learning_rate": 5.864527891882217e-06, "loss": 0.0204, "step": 3618 }, { "epoch": 9.69, "learning_rate": 5.85398942507746e-06, "loss": 0.0009, "step": 3620 }, { "epoch": 9.7, "learning_rate": 5.843456515657269e-06, "loss": 0.0086, "step": 3622 }, { "epoch": 9.7, "learning_rate": 5.832929177740134e-06, "loss": 0.0011, "step": 3624 }, { "epoch": 9.71, "learning_rate": 5.822407425437089e-06, "loss": 0.0027, "step": 3626 }, { "epoch": 9.71, "learning_rate": 5.811891272851669e-06, "loss": 0.0032, "step": 3628 }, { "epoch": 9.72, "learning_rate": 5.801380734079906e-06, "loss": 0.0022, "step": 3630 }, { "epoch": 9.72, "learning_rate": 5.790875823210313e-06, "loss": 0.001, "step": 3632 }, { "epoch": 9.73, "learning_rate": 5.780376554323853e-06, "loss": 0.0045, "step": 3634 }, { "epoch": 9.73, "learning_rate": 5.769882941493927e-06, "loss": 0.0036, "step": 3636 }, { "epoch": 9.74, "learning_rate": 5.7593949987863584e-06, "loss": 0.0158, "step": 3638 }, { "epoch": 9.75, "learning_rate": 5.74891274025936e-06, "loss": 0.0128, "step": 3640 }, { "epoch": 9.75, "learning_rate": 5.738436179963545e-06, "loss": 0.0008, "step": 3642 }, { "epoch": 9.76, "learning_rate": 5.727965331941872e-06, "loss": 0.001, "step": 3644 }, { "epoch": 9.76, "learning_rate": 5.7175002102296455e-06, "loss": 0.0359, "step": 3646 }, { "epoch": 9.77, "learning_rate": 5.707040828854496e-06, "loss": 0.0016, "step": 3648 }, { "epoch": 9.77, "learning_rate": 5.696587201836363e-06, "loss": 0.0253, "step": 3650 }, { "epoch": 9.78, "learning_rate": 5.686139343187468e-06, "loss": 0.0067, "step": 3652 }, { "epoch": 9.78, "learning_rate": 5.6756972669123025e-06, "loss": 0.001, "step": 3654 }, { "epoch": 9.79, "learning_rate": 5.665260987007606e-06, "loss": 0.0011, "step": 3656 }, { "epoch": 9.79, "learning_rate": 5.65483051746235e-06, "loss": 0.0004, "step": 3658 }, { "epoch": 9.8, "learning_rate": 5.644405872257716e-06, "loss": 0.0986, "step": 3660 }, { "epoch": 9.8, "learning_rate": 5.633987065367077e-06, "loss": 0.0004, "step": 3662 }, { "epoch": 9.81, "learning_rate": 5.623574110755991e-06, "loss": 0.019, "step": 3664 }, { "epoch": 9.82, "learning_rate": 5.613167022382157e-06, "loss": 0.0032, "step": 3666 }, { "epoch": 9.82, "learning_rate": 5.602765814195418e-06, "loss": 0.0021, "step": 3668 }, { "epoch": 9.83, "learning_rate": 5.592370500137734e-06, "loss": 0.0175, "step": 3670 }, { "epoch": 9.83, "learning_rate": 5.581981094143165e-06, "loss": 0.0047, "step": 3672 }, { "epoch": 9.84, "learning_rate": 5.571597610137851e-06, "loss": 0.0614, "step": 3674 }, { "epoch": 9.84, "learning_rate": 5.561220062039989e-06, "loss": 0.0031, "step": 3676 }, { "epoch": 9.85, "learning_rate": 5.550848463759835e-06, "loss": 0.0058, "step": 3678 }, { "epoch": 9.85, "learning_rate": 5.540482829199654e-06, "loss": 0.0134, "step": 3680 }, { "epoch": 9.86, "learning_rate": 5.5301231722537215e-06, "loss": 0.0101, "step": 3682 }, { "epoch": 9.86, "learning_rate": 5.519769506808303e-06, "loss": 0.0013, "step": 3684 }, { "epoch": 9.87, "learning_rate": 5.509421846741632e-06, "loss": 0.0062, "step": 3686 }, { "epoch": 9.87, "learning_rate": 5.499080205923888e-06, "loss": 0.0042, "step": 3688 }, { "epoch": 9.88, "learning_rate": 5.48874459821719e-06, "loss": 0.0048, "step": 3690 }, { "epoch": 9.88, "learning_rate": 5.478415037475568e-06, "loss": 0.0015, "step": 3692 }, { "epoch": 9.89, "learning_rate": 5.46809153754494e-06, "loss": 0.0009, "step": 3694 }, { "epoch": 9.9, "learning_rate": 5.457774112263106e-06, "loss": 0.001, "step": 3696 }, { "epoch": 9.9, "learning_rate": 5.447462775459722e-06, "loss": 0.0013, "step": 3698 }, { "epoch": 9.91, "learning_rate": 5.4371575409562815e-06, "loss": 0.005, "step": 3700 }, { "epoch": 9.91, "learning_rate": 5.426858422566103e-06, "loss": 0.0014, "step": 3702 }, { "epoch": 9.92, "learning_rate": 5.416565434094301e-06, "loss": 0.0085, "step": 3704 }, { "epoch": 9.92, "learning_rate": 5.406278589337777e-06, "loss": 0.0193, "step": 3706 }, { "epoch": 9.93, "learning_rate": 5.395997902085197e-06, "loss": 0.0061, "step": 3708 }, { "epoch": 9.93, "learning_rate": 5.385723386116967e-06, "loss": 0.0016, "step": 3710 }, { "epoch": 9.94, "learning_rate": 5.375455055205234e-06, "loss": 0.055, "step": 3712 }, { "epoch": 9.94, "learning_rate": 5.365192923113847e-06, "loss": 0.0037, "step": 3714 }, { "epoch": 9.95, "learning_rate": 5.354937003598341e-06, "loss": 0.1714, "step": 3716 }, { "epoch": 9.95, "learning_rate": 5.3446873104059335e-06, "loss": 0.1282, "step": 3718 }, { "epoch": 9.96, "learning_rate": 5.334443857275488e-06, "loss": 0.0022, "step": 3720 }, { "epoch": 9.97, "learning_rate": 5.324206657937508e-06, "loss": 0.0013, "step": 3722 }, { "epoch": 9.97, "learning_rate": 5.313975726114111e-06, "loss": 0.0018, "step": 3724 }, { "epoch": 9.98, "learning_rate": 5.303751075519019e-06, "loss": 0.0191, "step": 3726 }, { "epoch": 9.98, "learning_rate": 5.29353271985753e-06, "loss": 0.0132, "step": 3728 }, { "epoch": 9.99, "learning_rate": 5.283320672826506e-06, "loss": 0.0015, "step": 3730 }, { "epoch": 9.99, "learning_rate": 5.273114948114346e-06, "loss": 0.0031, "step": 3732 }, { "epoch": 10.0, "learning_rate": 5.262915559400992e-06, "loss": 0.0514, "step": 3734 }, { "epoch": 10.0, "learning_rate": 5.252722520357873e-06, "loss": 0.0109, "step": 3736 }, { "epoch": 10.01, "learning_rate": 5.24253584464792e-06, "loss": 0.0114, "step": 3738 }, { "epoch": 10.01, "learning_rate": 5.232355545925529e-06, "loss": 0.0016, "step": 3740 }, { "epoch": 10.02, "learning_rate": 5.2221816378365475e-06, "loss": 0.0006, "step": 3742 }, { "epoch": 10.02, "learning_rate": 5.2120141340182595e-06, "loss": 0.0012, "step": 3744 }, { "epoch": 10.03, "learning_rate": 5.201853048099364e-06, "loss": 0.069, "step": 3746 }, { "epoch": 10.03, "learning_rate": 5.191698393699955e-06, "loss": 0.0017, "step": 3748 }, { "epoch": 10.04, "learning_rate": 5.181550184431511e-06, "loss": 0.0131, "step": 3750 }, { "epoch": 10.05, "learning_rate": 5.171408433896862e-06, "loss": 0.0007, "step": 3752 }, { "epoch": 10.05, "learning_rate": 5.161273155690194e-06, "loss": 0.0172, "step": 3754 }, { "epoch": 10.06, "learning_rate": 5.151144363397007e-06, "loss": 0.0016, "step": 3756 }, { "epoch": 10.06, "learning_rate": 5.141022070594109e-06, "loss": 0.0074, "step": 3758 }, { "epoch": 10.07, "learning_rate": 5.130906290849599e-06, "loss": 0.0014, "step": 3760 }, { "epoch": 10.07, "learning_rate": 5.120797037722841e-06, "loss": 0.0032, "step": 3762 }, { "epoch": 10.08, "learning_rate": 5.110694324764456e-06, "loss": 0.0125, "step": 3764 }, { "epoch": 10.08, "learning_rate": 5.100598165516295e-06, "loss": 0.0011, "step": 3766 }, { "epoch": 10.09, "learning_rate": 5.0905085735114235e-06, "loss": 0.0264, "step": 3768 }, { "epoch": 10.09, "learning_rate": 5.080425562274108e-06, "loss": 0.0107, "step": 3770 }, { "epoch": 10.1, "learning_rate": 5.070349145319791e-06, "loss": 0.0022, "step": 3772 }, { "epoch": 10.1, "learning_rate": 5.060279336155074e-06, "loss": 0.0014, "step": 3774 }, { "epoch": 10.11, "learning_rate": 5.050216148277709e-06, "loss": 0.0015, "step": 3776 }, { "epoch": 10.12, "learning_rate": 5.040159595176567e-06, "loss": 0.0031, "step": 3778 }, { "epoch": 10.12, "learning_rate": 5.030109690331625e-06, "loss": 0.0052, "step": 3780 }, { "epoch": 10.13, "learning_rate": 5.020066447213949e-06, "loss": 0.0138, "step": 3782 }, { "epoch": 10.13, "learning_rate": 5.010029879285678e-06, "loss": 0.0014, "step": 3784 }, { "epoch": 10.14, "learning_rate": 5.000000000000003e-06, "loss": 0.0236, "step": 3786 }, { "epoch": 10.14, "learning_rate": 4.989976822801145e-06, "loss": 0.0005, "step": 3788 }, { "epoch": 10.15, "learning_rate": 4.979960361124346e-06, "loss": 0.0005, "step": 3790 }, { "epoch": 10.15, "learning_rate": 4.969950628395848e-06, "loss": 0.0175, "step": 3792 }, { "epoch": 10.16, "learning_rate": 4.959947638032866e-06, "loss": 0.0063, "step": 3794 }, { "epoch": 10.16, "learning_rate": 4.949951403443581e-06, "loss": 0.0665, "step": 3796 }, { "epoch": 10.17, "learning_rate": 4.939961938027127e-06, "loss": 0.0031, "step": 3798 }, { "epoch": 10.17, "learning_rate": 4.929979255173553e-06, "loss": 0.012, "step": 3800 }, { "epoch": 10.18, "learning_rate": 4.920003368263818e-06, "loss": 0.018, "step": 3802 }, { "epoch": 10.18, "learning_rate": 4.910034290669777e-06, "loss": 0.0006, "step": 3804 }, { "epoch": 10.19, "learning_rate": 4.900072035754154e-06, "loss": 0.0022, "step": 3806 }, { "epoch": 10.2, "learning_rate": 4.890116616870528e-06, "loss": 0.0013, "step": 3808 }, { "epoch": 10.2, "learning_rate": 4.880168047363312e-06, "loss": 0.0008, "step": 3810 }, { "epoch": 10.21, "learning_rate": 4.870226340567747e-06, "loss": 0.0014, "step": 3812 }, { "epoch": 10.21, "learning_rate": 4.860291509809869e-06, "loss": 0.0128, "step": 3814 }, { "epoch": 10.22, "learning_rate": 4.8503635684064955e-06, "loss": 0.0017, "step": 3816 }, { "epoch": 10.22, "learning_rate": 4.840442529665212e-06, "loss": 0.0086, "step": 3818 }, { "epoch": 10.23, "learning_rate": 4.83052840688435e-06, "loss": 0.0009, "step": 3820 }, { "epoch": 10.23, "learning_rate": 4.82062121335297e-06, "loss": 0.0099, "step": 3822 }, { "epoch": 10.24, "learning_rate": 4.810720962350853e-06, "loss": 0.004, "step": 3824 }, { "epoch": 10.24, "learning_rate": 4.800827667148463e-06, "loss": 0.0317, "step": 3826 }, { "epoch": 10.25, "learning_rate": 4.790941341006946e-06, "loss": 0.0012, "step": 3828 }, { "epoch": 10.25, "learning_rate": 4.781061997178104e-06, "loss": 0.0032, "step": 3830 }, { "epoch": 10.26, "learning_rate": 4.7711896489043795e-06, "loss": 0.0013, "step": 3832 }, { "epoch": 10.27, "learning_rate": 4.761324309418843e-06, "loss": 0.001, "step": 3834 }, { "epoch": 10.27, "learning_rate": 4.751465991945165e-06, "loss": 0.0032, "step": 3836 }, { "epoch": 10.28, "learning_rate": 4.7416147096976065e-06, "loss": 0.0159, "step": 3838 }, { "epoch": 10.28, "learning_rate": 4.731770475880995e-06, "loss": 0.0007, "step": 3840 }, { "epoch": 10.29, "learning_rate": 4.7219333036907146e-06, "loss": 0.0041, "step": 3842 }, { "epoch": 10.29, "learning_rate": 4.712103206312677e-06, "loss": 0.0018, "step": 3844 }, { "epoch": 10.3, "learning_rate": 4.702280196923323e-06, "loss": 0.0022, "step": 3846 }, { "epoch": 10.3, "learning_rate": 4.692464288689581e-06, "loss": 0.0346, "step": 3848 }, { "epoch": 10.31, "learning_rate": 4.682655494768866e-06, "loss": 0.003, "step": 3850 }, { "epoch": 10.31, "learning_rate": 4.672853828309056e-06, "loss": 0.0067, "step": 3852 }, { "epoch": 10.32, "learning_rate": 4.663059302448472e-06, "loss": 0.0002, "step": 3854 }, { "epoch": 10.32, "learning_rate": 4.653271930315868e-06, "loss": 0.0042, "step": 3856 }, { "epoch": 10.33, "learning_rate": 4.643491725030408e-06, "loss": 0.0009, "step": 3858 }, { "epoch": 10.33, "learning_rate": 4.63371869970165e-06, "loss": 0.0011, "step": 3860 }, { "epoch": 10.34, "learning_rate": 4.623952867429526e-06, "loss": 0.001, "step": 3862 }, { "epoch": 10.35, "learning_rate": 4.614194241304325e-06, "loss": 0.0031, "step": 3864 }, { "epoch": 10.35, "learning_rate": 4.604442834406678e-06, "loss": 0.003, "step": 3866 }, { "epoch": 10.36, "learning_rate": 4.594698659807546e-06, "loss": 0.0006, "step": 3868 }, { "epoch": 10.36, "learning_rate": 4.584961730568188e-06, "loss": 0.0083, "step": 3870 }, { "epoch": 10.37, "learning_rate": 4.575232059740151e-06, "loss": 0.0031, "step": 3872 }, { "epoch": 10.37, "learning_rate": 4.565509660365255e-06, "loss": 0.007, "step": 3874 }, { "epoch": 10.38, "learning_rate": 4.555794545475574e-06, "loss": 0.0016, "step": 3876 }, { "epoch": 10.38, "learning_rate": 4.546086728093416e-06, "loss": 0.0024, "step": 3878 }, { "epoch": 10.39, "learning_rate": 4.536386221231308e-06, "loss": 0.0109, "step": 3880 }, { "epoch": 10.39, "learning_rate": 4.5266930378919785e-06, "loss": 0.0169, "step": 3882 }, { "epoch": 10.4, "learning_rate": 4.5170071910683366e-06, "loss": 0.0039, "step": 3884 }, { "epoch": 10.4, "learning_rate": 4.507328693743463e-06, "loss": 0.002, "step": 3886 }, { "epoch": 10.41, "learning_rate": 4.497657558890577e-06, "loss": 0.0057, "step": 3888 }, { "epoch": 10.41, "learning_rate": 4.4879937994730435e-06, "loss": 0.001, "step": 3890 }, { "epoch": 10.42, "learning_rate": 4.478337428444331e-06, "loss": 0.023, "step": 3892 }, { "epoch": 10.43, "learning_rate": 4.468688458748006e-06, "loss": 0.0062, "step": 3894 }, { "epoch": 10.43, "learning_rate": 4.4590469033177165e-06, "loss": 0.0003, "step": 3896 }, { "epoch": 10.44, "learning_rate": 4.449412775077168e-06, "loss": 0.0145, "step": 3898 }, { "epoch": 10.44, "learning_rate": 4.439786086940116e-06, "loss": 0.0008, "step": 3900 }, { "epoch": 10.45, "learning_rate": 4.430166851810338e-06, "loss": 0.0009, "step": 3902 }, { "epoch": 10.45, "learning_rate": 4.4205550825816245e-06, "loss": 0.0032, "step": 3904 }, { "epoch": 10.46, "learning_rate": 4.410950792137761e-06, "loss": 0.001, "step": 3906 }, { "epoch": 10.46, "learning_rate": 4.4013539933525e-06, "loss": 0.0019, "step": 3908 }, { "epoch": 10.47, "learning_rate": 4.391764699089557e-06, "loss": 0.0013, "step": 3910 }, { "epoch": 10.47, "learning_rate": 4.382182922202596e-06, "loss": 0.0018, "step": 3912 }, { "epoch": 10.48, "learning_rate": 4.372608675535191e-06, "loss": 0.0022, "step": 3914 }, { "epoch": 10.48, "learning_rate": 4.36304197192083e-06, "loss": 0.0098, "step": 3916 }, { "epoch": 10.49, "learning_rate": 4.35348282418289e-06, "loss": 0.0437, "step": 3918 }, { "epoch": 10.5, "learning_rate": 4.343931245134616e-06, "loss": 0.0204, "step": 3920 }, { "epoch": 10.5, "learning_rate": 4.334387247579112e-06, "loss": 0.0014, "step": 3922 }, { "epoch": 10.51, "learning_rate": 4.324850844309318e-06, "loss": 0.0004, "step": 3924 }, { "epoch": 10.51, "learning_rate": 4.315322048107994e-06, "loss": 0.0002, "step": 3926 }, { "epoch": 10.52, "learning_rate": 4.305800871747703e-06, "loss": 0.0131, "step": 3928 }, { "epoch": 10.52, "learning_rate": 4.296287327990797e-06, "loss": 0.003, "step": 3930 }, { "epoch": 10.53, "learning_rate": 4.2867814295893906e-06, "loss": 0.0003, "step": 3932 }, { "epoch": 10.53, "learning_rate": 4.2772831892853624e-06, "loss": 0.0028, "step": 3934 }, { "epoch": 10.54, "learning_rate": 4.267792619810316e-06, "loss": 0.0012, "step": 3936 }, { "epoch": 10.54, "learning_rate": 4.258309733885574e-06, "loss": 0.0005, "step": 3938 }, { "epoch": 10.55, "learning_rate": 4.248834544222162e-06, "loss": 0.0035, "step": 3940 }, { "epoch": 10.55, "learning_rate": 4.2393670635207864e-06, "loss": 0.003, "step": 3942 }, { "epoch": 10.56, "learning_rate": 4.229907304471826e-06, "loss": 0.005, "step": 3944 }, { "epoch": 10.56, "learning_rate": 4.2204552797553075e-06, "loss": 0.0068, "step": 3946 }, { "epoch": 10.57, "learning_rate": 4.211011002040885e-06, "loss": 0.0001, "step": 3948 }, { "epoch": 10.58, "learning_rate": 4.201574483987836e-06, "loss": 0.0008, "step": 3950 }, { "epoch": 10.58, "learning_rate": 4.1921457382450294e-06, "loss": 0.0076, "step": 3952 }, { "epoch": 10.59, "learning_rate": 4.1827247774509216e-06, "loss": 0.0014, "step": 3954 }, { "epoch": 10.59, "learning_rate": 4.1733116142335316e-06, "loss": 0.01, "step": 3956 }, { "epoch": 10.6, "learning_rate": 4.163906261210424e-06, "loss": 0.0238, "step": 3958 }, { "epoch": 10.6, "learning_rate": 4.154508730988704e-06, "loss": 0.0005, "step": 3960 }, { "epoch": 10.61, "learning_rate": 4.14511903616498e-06, "loss": 0.0082, "step": 3962 }, { "epoch": 10.61, "learning_rate": 4.135737189325364e-06, "loss": 0.0006, "step": 3964 }, { "epoch": 10.62, "learning_rate": 4.126363203045444e-06, "loss": 0.1039, "step": 3966 }, { "epoch": 10.62, "learning_rate": 4.1169970898902745e-06, "loss": 0.0009, "step": 3968 }, { "epoch": 10.63, "learning_rate": 4.107638862414358e-06, "loss": 0.001, "step": 3970 }, { "epoch": 10.63, "learning_rate": 4.098288533161623e-06, "loss": 0.0007, "step": 3972 }, { "epoch": 10.64, "learning_rate": 4.088946114665414e-06, "loss": 0.0008, "step": 3974 }, { "epoch": 10.65, "learning_rate": 4.0796116194484715e-06, "loss": 0.0005, "step": 3976 }, { "epoch": 10.65, "learning_rate": 4.070285060022914e-06, "loss": 0.0042, "step": 3978 }, { "epoch": 10.66, "learning_rate": 4.06096644889022e-06, "loss": 0.0375, "step": 3980 }, { "epoch": 10.66, "learning_rate": 4.051655798541227e-06, "loss": 0.0007, "step": 3982 }, { "epoch": 10.67, "learning_rate": 4.042353121456086e-06, "loss": 0.0008, "step": 3984 }, { "epoch": 10.67, "learning_rate": 4.033058430104269e-06, "loss": 0.0021, "step": 3986 }, { "epoch": 10.68, "learning_rate": 4.0237717369445394e-06, "loss": 0.0015, "step": 3988 }, { "epoch": 10.68, "learning_rate": 4.0144930544249436e-06, "loss": 0.002, "step": 3990 }, { "epoch": 10.69, "learning_rate": 4.005222394982789e-06, "loss": 0.0012, "step": 3992 }, { "epoch": 10.69, "learning_rate": 3.995959771044629e-06, "loss": 0.0025, "step": 3994 }, { "epoch": 10.7, "learning_rate": 3.986705195026243e-06, "loss": 0.0003, "step": 3996 }, { "epoch": 10.7, "learning_rate": 3.977458679332628e-06, "loss": 0.0021, "step": 3998 }, { "epoch": 10.71, "learning_rate": 3.968220236357969e-06, "loss": 0.0048, "step": 4000 }, { "epoch": 10.71, "learning_rate": 3.958989878485644e-06, "loss": 0.0003, "step": 4002 }, { "epoch": 10.72, "learning_rate": 3.9497676180881794e-06, "loss": 0.0024, "step": 4004 }, { "epoch": 10.73, "learning_rate": 3.940553467527255e-06, "loss": 0.0007, "step": 4006 }, { "epoch": 10.73, "learning_rate": 3.931347439153677e-06, "loss": 0.0037, "step": 4008 }, { "epoch": 10.74, "learning_rate": 3.922149545307365e-06, "loss": 0.0013, "step": 4010 }, { "epoch": 10.74, "learning_rate": 3.912959798317338e-06, "loss": 0.0013, "step": 4012 }, { "epoch": 10.75, "learning_rate": 3.903778210501692e-06, "loss": 0.0188, "step": 4014 }, { "epoch": 10.75, "learning_rate": 3.894604794167585e-06, "loss": 0.0028, "step": 4016 }, { "epoch": 10.76, "learning_rate": 3.885439561611224e-06, "loss": 0.0097, "step": 4018 }, { "epoch": 10.76, "learning_rate": 3.876282525117847e-06, "loss": 0.0114, "step": 4020 }, { "epoch": 10.77, "learning_rate": 3.8671336969617e-06, "loss": 0.0006, "step": 4022 }, { "epoch": 10.77, "learning_rate": 3.8579930894060405e-06, "loss": 0.0026, "step": 4024 }, { "epoch": 10.78, "learning_rate": 3.84886071470309e-06, "loss": 0.0032, "step": 4026 }, { "epoch": 10.78, "learning_rate": 3.839736585094045e-06, "loss": 0.0005, "step": 4028 }, { "epoch": 10.79, "learning_rate": 3.830620712809047e-06, "loss": 0.0016, "step": 4030 }, { "epoch": 10.8, "learning_rate": 3.821513110067169e-06, "loss": 0.0009, "step": 4032 }, { "epoch": 10.8, "learning_rate": 3.8124137890764003e-06, "loss": 0.0084, "step": 4034 }, { "epoch": 10.81, "learning_rate": 3.803322762033628e-06, "loss": 0.0004, "step": 4036 }, { "epoch": 10.81, "learning_rate": 3.7942400411246226e-06, "loss": 0.0009, "step": 4038 }, { "epoch": 10.82, "learning_rate": 3.785165638524022e-06, "loss": 0.0002, "step": 4040 }, { "epoch": 10.82, "learning_rate": 3.7760995663953125e-06, "loss": 0.0004, "step": 4042 }, { "epoch": 10.83, "learning_rate": 3.7670418368908112e-06, "loss": 0.0005, "step": 4044 }, { "epoch": 10.83, "learning_rate": 3.7579924621516615e-06, "loss": 0.0032, "step": 4046 }, { "epoch": 10.84, "learning_rate": 3.748951454307801e-06, "loss": 0.0041, "step": 4048 }, { "epoch": 10.84, "learning_rate": 3.7399188254779527e-06, "loss": 0.0007, "step": 4050 }, { "epoch": 10.85, "learning_rate": 3.73089458776961e-06, "loss": 0.0107, "step": 4052 }, { "epoch": 10.85, "learning_rate": 3.7218787532790167e-06, "loss": 0.0019, "step": 4054 }, { "epoch": 10.86, "learning_rate": 3.712871334091154e-06, "loss": 0.0015, "step": 4056 }, { "epoch": 10.86, "learning_rate": 3.703872342279723e-06, "loss": 0.0004, "step": 4058 }, { "epoch": 10.87, "learning_rate": 3.694881789907129e-06, "loss": 0.0024, "step": 4060 }, { "epoch": 10.88, "learning_rate": 3.6858996890244638e-06, "loss": 0.0004, "step": 4062 }, { "epoch": 10.88, "learning_rate": 3.6769260516714935e-06, "loss": 0.0003, "step": 4064 }, { "epoch": 10.89, "learning_rate": 3.6679608898766306e-06, "loss": 0.005, "step": 4066 }, { "epoch": 10.89, "learning_rate": 3.659004215656943e-06, "loss": 0.001, "step": 4068 }, { "epoch": 10.9, "learning_rate": 3.6500560410181084e-06, "loss": 0.0004, "step": 4070 }, { "epoch": 10.9, "learning_rate": 3.6411163779544145e-06, "loss": 0.0004, "step": 4072 }, { "epoch": 10.91, "learning_rate": 3.63218523844874e-06, "loss": 0.0011, "step": 4074 }, { "epoch": 10.91, "learning_rate": 3.623262634472543e-06, "loss": 0.0037, "step": 4076 }, { "epoch": 10.92, "learning_rate": 3.6143485779858343e-06, "loss": 0.0929, "step": 4078 }, { "epoch": 10.92, "learning_rate": 3.6054430809371723e-06, "loss": 0.0006, "step": 4080 }, { "epoch": 10.93, "learning_rate": 3.5965461552636373e-06, "loss": 0.0004, "step": 4082 }, { "epoch": 10.93, "learning_rate": 3.5876578128908235e-06, "loss": 0.0015, "step": 4084 }, { "epoch": 10.94, "learning_rate": 3.5787780657328198e-06, "loss": 0.0003, "step": 4086 }, { "epoch": 10.95, "learning_rate": 3.5699069256921936e-06, "loss": 0.0024, "step": 4088 }, { "epoch": 10.95, "learning_rate": 3.5610444046599746e-06, "loss": 0.0008, "step": 4090 }, { "epoch": 10.96, "learning_rate": 3.5521905145156366e-06, "loss": 0.0059, "step": 4092 }, { "epoch": 10.96, "learning_rate": 3.543345267127094e-06, "loss": 0.0004, "step": 4094 }, { "epoch": 10.97, "learning_rate": 3.5345086743506675e-06, "loss": 0.0008, "step": 4096 }, { "epoch": 10.97, "learning_rate": 3.5256807480310793e-06, "loss": 0.0005, "step": 4098 }, { "epoch": 10.98, "learning_rate": 3.516861500001435e-06, "loss": 0.0007, "step": 4100 }, { "epoch": 10.98, "learning_rate": 3.5080509420832075e-06, "loss": 0.0152, "step": 4102 }, { "epoch": 10.99, "learning_rate": 3.4992490860862215e-06, "loss": 0.0009, "step": 4104 }, { "epoch": 10.99, "learning_rate": 3.4904559438086385e-06, "loss": 0.0017, "step": 4106 }, { "epoch": 11.0, "learning_rate": 3.4816715270369385e-06, "loss": 0.0018, "step": 4108 }, { "epoch": 11.0, "learning_rate": 3.4728958475459052e-06, "loss": 0.0012, "step": 4110 }, { "epoch": 11.01, "learning_rate": 3.4641289170986135e-06, "loss": 0.0004, "step": 4112 }, { "epoch": 11.01, "learning_rate": 3.455370747446406e-06, "loss": 0.0007, "step": 4114 }, { "epoch": 11.02, "learning_rate": 3.4466213503288905e-06, "loss": 0.002, "step": 4116 }, { "epoch": 11.03, "learning_rate": 3.4378807374739086e-06, "loss": 0.0029, "step": 4118 }, { "epoch": 11.03, "learning_rate": 3.429148920597529e-06, "loss": 0.001, "step": 4120 }, { "epoch": 11.04, "learning_rate": 3.4204259114040317e-06, "loss": 0.0007, "step": 4122 }, { "epoch": 11.04, "learning_rate": 3.4117117215858875e-06, "loss": 0.0027, "step": 4124 }, { "epoch": 11.05, "learning_rate": 3.4030063628237496e-06, "loss": 0.0005, "step": 4126 }, { "epoch": 11.05, "learning_rate": 3.394309846786432e-06, "loss": 0.0018, "step": 4128 }, { "epoch": 11.06, "learning_rate": 3.3856221851308947e-06, "loss": 0.0001, "step": 4130 }, { "epoch": 11.06, "learning_rate": 3.376943389502232e-06, "loss": 0.0011, "step": 4132 }, { "epoch": 11.07, "learning_rate": 3.3682734715336494e-06, "loss": 0.0004, "step": 4134 }, { "epoch": 11.07, "learning_rate": 3.359612442846455e-06, "loss": 0.0063, "step": 4136 }, { "epoch": 11.08, "learning_rate": 3.350960315050048e-06, "loss": 0.0056, "step": 4138 }, { "epoch": 11.08, "learning_rate": 3.342317099741886e-06, "loss": 0.0002, "step": 4140 }, { "epoch": 11.09, "learning_rate": 3.333682808507487e-06, "loss": 0.0007, "step": 4142 }, { "epoch": 11.1, "learning_rate": 3.325057452920403e-06, "loss": 0.0004, "step": 4144 }, { "epoch": 11.1, "learning_rate": 3.3164410445422103e-06, "loss": 0.0003, "step": 4146 }, { "epoch": 11.11, "learning_rate": 3.307833594922494e-06, "loss": 0.0014, "step": 4148 }, { "epoch": 11.11, "learning_rate": 3.299235115598829e-06, "loss": 0.0009, "step": 4150 }, { "epoch": 11.12, "learning_rate": 3.2906456180967638e-06, "loss": 0.0006, "step": 4152 }, { "epoch": 11.12, "learning_rate": 3.2820651139298133e-06, "loss": 0.0145, "step": 4154 }, { "epoch": 11.13, "learning_rate": 3.2734936145994312e-06, "loss": 0.0007, "step": 4156 }, { "epoch": 11.13, "learning_rate": 3.2649311315950028e-06, "loss": 0.0006, "step": 4158 }, { "epoch": 11.14, "learning_rate": 3.2563776763938358e-06, "loss": 0.0019, "step": 4160 }, { "epoch": 11.14, "learning_rate": 3.247833260461125e-06, "loss": 0.0009, "step": 4162 }, { "epoch": 11.15, "learning_rate": 3.2392978952499553e-06, "loss": 0.0163, "step": 4164 }, { "epoch": 11.15, "learning_rate": 3.2307715922012774e-06, "loss": 0.0012, "step": 4166 }, { "epoch": 11.16, "learning_rate": 3.2222543627438972e-06, "loss": 0.0008, "step": 4168 }, { "epoch": 11.16, "learning_rate": 3.2137462182944557e-06, "loss": 0.0003, "step": 4170 }, { "epoch": 11.17, "learning_rate": 3.2052471702574184e-06, "loss": 0.0006, "step": 4172 }, { "epoch": 11.18, "learning_rate": 3.196757230025056e-06, "loss": 0.0006, "step": 4174 }, { "epoch": 11.18, "learning_rate": 3.1882764089774333e-06, "loss": 0.001, "step": 4176 }, { "epoch": 11.19, "learning_rate": 3.1798047184823844e-06, "loss": 0.0021, "step": 4178 }, { "epoch": 11.19, "learning_rate": 3.1713421698955192e-06, "loss": 0.0046, "step": 4180 }, { "epoch": 11.2, "learning_rate": 3.162888774560181e-06, "loss": 0.0003, "step": 4182 }, { "epoch": 11.2, "learning_rate": 3.1544445438074466e-06, "loss": 0.0004, "step": 4184 }, { "epoch": 11.21, "learning_rate": 3.1460094889561098e-06, "loss": 0.0008, "step": 4186 }, { "epoch": 11.21, "learning_rate": 3.1375836213126653e-06, "loss": 0.0005, "step": 4188 }, { "epoch": 11.22, "learning_rate": 3.129166952171293e-06, "loss": 0.1135, "step": 4190 }, { "epoch": 11.22, "learning_rate": 3.1207594928138405e-06, "loss": 0.0052, "step": 4192 }, { "epoch": 11.23, "learning_rate": 3.1123612545098138e-06, "loss": 0.0203, "step": 4194 }, { "epoch": 11.23, "learning_rate": 3.103972248516357e-06, "loss": 0.0005, "step": 4196 }, { "epoch": 11.24, "learning_rate": 3.0955924860782384e-06, "loss": 0.0009, "step": 4198 }, { "epoch": 11.24, "learning_rate": 3.0872219784278357e-06, "loss": 0.0139, "step": 4200 }, { "epoch": 11.25, "learning_rate": 3.0788607367851264e-06, "loss": 0.0008, "step": 4202 }, { "epoch": 11.26, "learning_rate": 3.0705087723576645e-06, "loss": 0.0003, "step": 4204 }, { "epoch": 11.26, "learning_rate": 3.062166096340562e-06, "loss": 0.0006, "step": 4206 }, { "epoch": 11.27, "learning_rate": 3.053832719916493e-06, "loss": 0.0006, "step": 4208 }, { "epoch": 11.27, "learning_rate": 3.045508654255659e-06, "loss": 0.0016, "step": 4210 }, { "epoch": 11.28, "learning_rate": 3.037193910515781e-06, "loss": 0.0006, "step": 4212 }, { "epoch": 11.28, "learning_rate": 3.0288884998420852e-06, "loss": 0.002, "step": 4214 }, { "epoch": 11.29, "learning_rate": 3.0205924333672886e-06, "loss": 0.0009, "step": 4216 }, { "epoch": 11.29, "learning_rate": 3.0123057222115835e-06, "loss": 0.0004, "step": 4218 }, { "epoch": 11.3, "learning_rate": 3.004028377482622e-06, "loss": 0.0002, "step": 4220 }, { "epoch": 11.3, "learning_rate": 2.995760410275498e-06, "loss": 0.0002, "step": 4222 }, { "epoch": 11.31, "learning_rate": 2.98750183167274e-06, "loss": 0.0007, "step": 4224 }, { "epoch": 11.31, "learning_rate": 2.9792526527442876e-06, "loss": 0.0099, "step": 4226 }, { "epoch": 11.32, "learning_rate": 2.9710128845474897e-06, "loss": 0.0002, "step": 4228 }, { "epoch": 11.33, "learning_rate": 2.9627825381270704e-06, "loss": 0.0006, "step": 4230 }, { "epoch": 11.33, "learning_rate": 2.9545616245151288e-06, "loss": 0.0006, "step": 4232 }, { "epoch": 11.34, "learning_rate": 2.9463501547311212e-06, "loss": 0.0028, "step": 4234 }, { "epoch": 11.34, "learning_rate": 2.9381481397818446e-06, "loss": 0.0074, "step": 4236 }, { "epoch": 11.35, "learning_rate": 2.9299555906614217e-06, "loss": 0.0011, "step": 4238 }, { "epoch": 11.35, "learning_rate": 2.9217725183512868e-06, "loss": 0.0002, "step": 4240 }, { "epoch": 11.36, "learning_rate": 2.913598933820174e-06, "loss": 0.0038, "step": 4242 }, { "epoch": 11.36, "learning_rate": 2.9054348480240955e-06, "loss": 0.0012, "step": 4244 }, { "epoch": 11.37, "learning_rate": 2.8972802719063366e-06, "loss": 0.0032, "step": 4246 }, { "epoch": 11.37, "learning_rate": 2.8891352163974264e-06, "loss": 0.0058, "step": 4248 }, { "epoch": 11.38, "learning_rate": 2.880999692415147e-06, "loss": 0.0003, "step": 4250 }, { "epoch": 11.38, "learning_rate": 2.8728737108644935e-06, "loss": 0.0017, "step": 4252 }, { "epoch": 11.39, "learning_rate": 2.86475728263767e-06, "loss": 0.001, "step": 4254 }, { "epoch": 11.39, "learning_rate": 2.8566504186140807e-06, "loss": 0.0073, "step": 4256 }, { "epoch": 11.4, "learning_rate": 2.8485531296603052e-06, "loss": 0.0002, "step": 4258 }, { "epoch": 11.41, "learning_rate": 2.840465426630091e-06, "loss": 0.0014, "step": 4260 }, { "epoch": 11.41, "learning_rate": 2.8323873203643346e-06, "loss": 0.0004, "step": 4262 }, { "epoch": 11.42, "learning_rate": 2.824318821691071e-06, "loss": 0.0006, "step": 4264 }, { "epoch": 11.42, "learning_rate": 2.8162599414254556e-06, "loss": 0.0008, "step": 4266 }, { "epoch": 11.43, "learning_rate": 2.8082106903697516e-06, "loss": 0.0005, "step": 4268 }, { "epoch": 11.43, "learning_rate": 2.800171079313312e-06, "loss": 0.0037, "step": 4270 }, { "epoch": 11.44, "learning_rate": 2.7921411190325753e-06, "loss": 0.003, "step": 4272 }, { "epoch": 11.44, "learning_rate": 2.7841208202910395e-06, "loss": 0.0009, "step": 4274 }, { "epoch": 11.45, "learning_rate": 2.776110193839251e-06, "loss": 0.0002, "step": 4276 }, { "epoch": 11.45, "learning_rate": 2.768109250414791e-06, "loss": 0.0003, "step": 4278 }, { "epoch": 11.46, "learning_rate": 2.7601180007422657e-06, "loss": 0.0041, "step": 4280 }, { "epoch": 11.46, "learning_rate": 2.752136455533284e-06, "loss": 0.0001, "step": 4282 }, { "epoch": 11.47, "learning_rate": 2.744164625486446e-06, "loss": 0.0003, "step": 4284 }, { "epoch": 11.48, "learning_rate": 2.7362025212873346e-06, "loss": 0.0041, "step": 4286 }, { "epoch": 11.48, "learning_rate": 2.7282501536084895e-06, "loss": 0.0004, "step": 4288 }, { "epoch": 11.49, "learning_rate": 2.720307533109402e-06, "loss": 0.013, "step": 4290 }, { "epoch": 11.49, "learning_rate": 2.7123746704364984e-06, "loss": 0.0032, "step": 4292 }, { "epoch": 11.5, "learning_rate": 2.7044515762231294e-06, "loss": 0.0014, "step": 4294 }, { "epoch": 11.5, "learning_rate": 2.6965382610895463e-06, "loss": 0.0009, "step": 4296 }, { "epoch": 11.51, "learning_rate": 2.6886347356428932e-06, "loss": 0.0007, "step": 4298 }, { "epoch": 11.51, "learning_rate": 2.6807410104771937e-06, "loss": 0.0003, "step": 4300 }, { "epoch": 11.52, "learning_rate": 2.6728570961733335e-06, "loss": 0.0004, "step": 4302 }, { "epoch": 11.52, "learning_rate": 2.6649830032990477e-06, "loss": 0.0005, "step": 4304 }, { "epoch": 11.53, "learning_rate": 2.6571187424089072e-06, "loss": 0.0002, "step": 4306 }, { "epoch": 11.53, "learning_rate": 2.649264324044306e-06, "loss": 0.0283, "step": 4308 }, { "epoch": 11.54, "learning_rate": 2.64141975873344e-06, "loss": 0.0003, "step": 4310 }, { "epoch": 11.54, "learning_rate": 2.6335850569913022e-06, "loss": 0.0006, "step": 4312 }, { "epoch": 11.55, "learning_rate": 2.625760229319659e-06, "loss": 0.0002, "step": 4314 }, { "epoch": 11.56, "learning_rate": 2.617945286207053e-06, "loss": 0.0003, "step": 4316 }, { "epoch": 11.56, "learning_rate": 2.6101402381287654e-06, "loss": 0.0011, "step": 4318 }, { "epoch": 11.57, "learning_rate": 2.6023450955468176e-06, "loss": 0.0002, "step": 4320 }, { "epoch": 11.57, "learning_rate": 2.594559868909956e-06, "loss": 0.0005, "step": 4322 }, { "epoch": 11.58, "learning_rate": 2.5867845686536306e-06, "loss": 0.0038, "step": 4324 }, { "epoch": 11.58, "learning_rate": 2.579019205199992e-06, "loss": 0.0017, "step": 4326 }, { "epoch": 11.59, "learning_rate": 2.571263788957866e-06, "loss": 0.0001, "step": 4328 }, { "epoch": 11.59, "learning_rate": 2.5635183303227495e-06, "loss": 0.0008, "step": 4330 }, { "epoch": 11.6, "learning_rate": 2.555782839676787e-06, "loss": 0.0031, "step": 4332 }, { "epoch": 11.6, "learning_rate": 2.5480573273887654e-06, "loss": 0.0002, "step": 4334 }, { "epoch": 11.61, "learning_rate": 2.5403418038140937e-06, "loss": 0.0003, "step": 4336 }, { "epoch": 11.61, "learning_rate": 2.532636279294799e-06, "loss": 0.0005, "step": 4338 }, { "epoch": 11.62, "learning_rate": 2.5249407641594936e-06, "loss": 0.0002, "step": 4340 }, { "epoch": 11.63, "learning_rate": 2.517255268723385e-06, "loss": 0.0004, "step": 4342 }, { "epoch": 11.63, "learning_rate": 2.5095798032882447e-06, "loss": 0.0001, "step": 4344 }, { "epoch": 11.64, "learning_rate": 2.501914378142396e-06, "loss": 0.0002, "step": 4346 }, { "epoch": 11.64, "learning_rate": 2.4942590035607104e-06, "loss": 0.0018, "step": 4348 }, { "epoch": 11.65, "learning_rate": 2.4866136898045844e-06, "loss": 0.0005, "step": 4350 }, { "epoch": 11.65, "learning_rate": 2.4789784471219293e-06, "loss": 0.0604, "step": 4352 }, { "epoch": 11.66, "learning_rate": 2.4713532857471568e-06, "loss": 0.0004, "step": 4354 }, { "epoch": 11.66, "learning_rate": 2.4637382159011656e-06, "loss": 0.0004, "step": 4356 }, { "epoch": 11.67, "learning_rate": 2.456133247791328e-06, "loss": 0.0004, "step": 4358 }, { "epoch": 11.67, "learning_rate": 2.4485383916114747e-06, "loss": 0.0036, "step": 4360 }, { "epoch": 11.68, "learning_rate": 2.440953657541879e-06, "loss": 0.0007, "step": 4362 }, { "epoch": 11.68, "learning_rate": 2.433379055749259e-06, "loss": 0.0028, "step": 4364 }, { "epoch": 11.69, "learning_rate": 2.425814596386735e-06, "loss": 0.0002, "step": 4366 }, { "epoch": 11.69, "learning_rate": 2.4182602895938447e-06, "loss": 0.0017, "step": 4368 }, { "epoch": 11.7, "learning_rate": 2.4107161454965088e-06, "loss": 0.0018, "step": 4370 }, { "epoch": 11.71, "learning_rate": 2.4031821742070293e-06, "loss": 0.0001, "step": 4372 }, { "epoch": 11.71, "learning_rate": 2.3956583858240736e-06, "loss": 0.0003, "step": 4374 }, { "epoch": 11.72, "learning_rate": 2.388144790432657e-06, "loss": 0.0004, "step": 4376 }, { "epoch": 11.72, "learning_rate": 2.3806413981041344e-06, "loss": 0.0002, "step": 4378 }, { "epoch": 11.73, "learning_rate": 2.373148218896182e-06, "loss": 0.0007, "step": 4380 }, { "epoch": 11.73, "learning_rate": 2.3656652628527877e-06, "loss": 0.0002, "step": 4382 }, { "epoch": 11.74, "learning_rate": 2.358192540004233e-06, "loss": 0.0003, "step": 4384 }, { "epoch": 11.74, "learning_rate": 2.35073006036709e-06, "loss": 0.0003, "step": 4386 }, { "epoch": 11.75, "learning_rate": 2.3432778339441953e-06, "loss": 0.0002, "step": 4388 }, { "epoch": 11.75, "learning_rate": 2.335835870724641e-06, "loss": 0.0014, "step": 4390 }, { "epoch": 11.76, "learning_rate": 2.3284041806837643e-06, "loss": 0.0007, "step": 4392 }, { "epoch": 11.76, "learning_rate": 2.3209827737831315e-06, "loss": 0.0002, "step": 4394 }, { "epoch": 11.77, "learning_rate": 2.313571659970526e-06, "loss": 0.0022, "step": 4396 }, { "epoch": 11.78, "learning_rate": 2.306170849179932e-06, "loss": 0.0004, "step": 4398 }, { "epoch": 11.78, "learning_rate": 2.2987803513315256e-06, "loss": 0.0023, "step": 4400 }, { "epoch": 11.79, "learning_rate": 2.2914001763316583e-06, "loss": 0.0005, "step": 4402 }, { "epoch": 11.79, "learning_rate": 2.284030334072842e-06, "loss": 0.0031, "step": 4404 }, { "epoch": 11.8, "learning_rate": 2.276670834433746e-06, "loss": 0.0005, "step": 4406 }, { "epoch": 11.8, "learning_rate": 2.269321687279171e-06, "loss": 0.0009, "step": 4408 }, { "epoch": 11.81, "learning_rate": 2.2619829024600394e-06, "loss": 0.0002, "step": 4410 }, { "epoch": 11.81, "learning_rate": 2.2546544898133873e-06, "loss": 0.0002, "step": 4412 }, { "epoch": 11.82, "learning_rate": 2.2473364591623435e-06, "loss": 0.0007, "step": 4414 }, { "epoch": 11.82, "learning_rate": 2.2400288203161268e-06, "loss": 0.0011, "step": 4416 }, { "epoch": 11.83, "learning_rate": 2.23273158307002e-06, "loss": 0.0002, "step": 4418 }, { "epoch": 11.83, "learning_rate": 2.2254447572053694e-06, "loss": 0.0065, "step": 4420 }, { "epoch": 11.84, "learning_rate": 2.2181683524895615e-06, "loss": 0.0007, "step": 4422 }, { "epoch": 11.84, "learning_rate": 2.210902378676015e-06, "loss": 0.0005, "step": 4424 }, { "epoch": 11.85, "learning_rate": 2.203646845504166e-06, "loss": 0.0004, "step": 4426 }, { "epoch": 11.86, "learning_rate": 2.1964017626994617e-06, "loss": 0.0017, "step": 4428 }, { "epoch": 11.86, "learning_rate": 2.189167139973335e-06, "loss": 0.0004, "step": 4430 }, { "epoch": 11.87, "learning_rate": 2.181942987023199e-06, "loss": 0.0004, "step": 4432 }, { "epoch": 11.87, "learning_rate": 2.174729313532433e-06, "loss": 0.0009, "step": 4434 }, { "epoch": 11.88, "learning_rate": 2.1675261291703733e-06, "loss": 0.0003, "step": 4436 }, { "epoch": 11.88, "learning_rate": 2.1603334435922897e-06, "loss": 0.0001, "step": 4438 }, { "epoch": 11.89, "learning_rate": 2.153151266439384e-06, "loss": 0.0004, "step": 4440 }, { "epoch": 11.89, "learning_rate": 2.145979607338773e-06, "loss": 0.0013, "step": 4442 }, { "epoch": 11.9, "learning_rate": 2.1388184759034703e-06, "loss": 0.0007, "step": 4444 }, { "epoch": 11.9, "learning_rate": 2.131667881732382e-06, "loss": 0.0006, "step": 4446 }, { "epoch": 11.91, "learning_rate": 2.124527834410287e-06, "loss": 0.0005, "step": 4448 }, { "epoch": 11.91, "learning_rate": 2.1173983435078325e-06, "loss": 0.0002, "step": 4450 }, { "epoch": 11.92, "learning_rate": 2.11027941858151e-06, "loss": 0.0003, "step": 4452 }, { "epoch": 11.93, "learning_rate": 2.1031710691736507e-06, "loss": 0.0009, "step": 4454 }, { "epoch": 11.93, "learning_rate": 2.0960733048124082e-06, "loss": 0.0003, "step": 4456 }, { "epoch": 11.94, "learning_rate": 2.08898613501175e-06, "loss": 0.0002, "step": 4458 }, { "epoch": 11.94, "learning_rate": 2.081909569271442e-06, "loss": 0.0007, "step": 4460 }, { "epoch": 11.95, "learning_rate": 2.0748436170770336e-06, "loss": 0.0005, "step": 4462 }, { "epoch": 11.95, "learning_rate": 2.067788287899852e-06, "loss": 0.0008, "step": 4464 }, { "epoch": 11.96, "learning_rate": 2.060743591196983e-06, "loss": 0.005, "step": 4466 }, { "epoch": 11.96, "learning_rate": 2.0537095364112593e-06, "loss": 0.001, "step": 4468 }, { "epoch": 11.97, "learning_rate": 2.0466861329712473e-06, "loss": 0.0005, "step": 4470 }, { "epoch": 11.97, "learning_rate": 2.0396733902912446e-06, "loss": 0.0002, "step": 4472 }, { "epoch": 11.98, "learning_rate": 2.0326713177712477e-06, "loss": 0.0002, "step": 4474 }, { "epoch": 11.98, "learning_rate": 2.0256799247969627e-06, "loss": 0.0004, "step": 4476 }, { "epoch": 11.99, "learning_rate": 2.0186992207397706e-06, "loss": 0.0002, "step": 4478 }, { "epoch": 11.99, "learning_rate": 2.011729214956728e-06, "loss": 0.0003, "step": 4480 }, { "epoch": 12.0, "learning_rate": 2.004769916790552e-06, "loss": 0.0018, "step": 4482 }, { "epoch": 12.01, "learning_rate": 1.9978213355696074e-06, "loss": 0.0002, "step": 4484 }, { "epoch": 12.01, "learning_rate": 1.9908834806078915e-06, "loss": 0.0001, "step": 4486 }, { "epoch": 12.02, "learning_rate": 1.9839563612050273e-06, "loss": 0.0003, "step": 4488 }, { "epoch": 12.02, "learning_rate": 1.977039986646244e-06, "loss": 0.0011, "step": 4490 }, { "epoch": 12.03, "learning_rate": 1.970134366202373e-06, "loss": 0.0001, "step": 4492 }, { "epoch": 12.03, "learning_rate": 1.963239509129825e-06, "loss": 0.0027, "step": 4494 }, { "epoch": 12.04, "learning_rate": 1.956355424670585e-06, "loss": 0.0002, "step": 4496 }, { "epoch": 12.04, "learning_rate": 1.949482122052204e-06, "loss": 0.0003, "step": 4498 }, { "epoch": 12.05, "learning_rate": 1.9426196104877737e-06, "loss": 0.0002, "step": 4500 }, { "epoch": 12.05, "learning_rate": 1.9357678991759244e-06, "loss": 0.0004, "step": 4502 }, { "epoch": 12.06, "learning_rate": 1.9289269973008084e-06, "loss": 0.0027, "step": 4504 }, { "epoch": 12.06, "learning_rate": 1.922096914032089e-06, "loss": 0.0004, "step": 4506 }, { "epoch": 12.07, "learning_rate": 1.915277658524929e-06, "loss": 0.0007, "step": 4508 }, { "epoch": 12.07, "learning_rate": 1.9084692399199755e-06, "loss": 0.0004, "step": 4510 }, { "epoch": 12.08, "learning_rate": 1.9016716673433532e-06, "loss": 0.0007, "step": 4512 }, { "epoch": 12.09, "learning_rate": 1.8948849499066456e-06, "loss": 0.0038, "step": 4514 }, { "epoch": 12.09, "learning_rate": 1.888109096706886e-06, "loss": 0.0007, "step": 4516 }, { "epoch": 12.1, "learning_rate": 1.8813441168265435e-06, "loss": 0.0004, "step": 4518 }, { "epoch": 12.1, "learning_rate": 1.8745900193335198e-06, "loss": 0.001, "step": 4520 }, { "epoch": 12.11, "learning_rate": 1.8678468132811211e-06, "loss": 0.0008, "step": 4522 }, { "epoch": 12.11, "learning_rate": 1.8611145077080595e-06, "loss": 0.0004, "step": 4524 }, { "epoch": 12.12, "learning_rate": 1.8543931116384327e-06, "loss": 0.0007, "step": 4526 }, { "epoch": 12.12, "learning_rate": 1.8476826340817177e-06, "loss": 0.0002, "step": 4528 }, { "epoch": 12.13, "learning_rate": 1.8409830840327546e-06, "loss": 0.0, "step": 4530 }, { "epoch": 12.13, "learning_rate": 1.834294470471737e-06, "loss": 0.0009, "step": 4532 }, { "epoch": 12.14, "learning_rate": 1.8276168023641982e-06, "loss": 0.0002, "step": 4534 }, { "epoch": 12.14, "learning_rate": 1.820950088661001e-06, "loss": 0.0001, "step": 4536 }, { "epoch": 12.15, "learning_rate": 1.8142943382983247e-06, "loss": 0.0009, "step": 4538 }, { "epoch": 12.16, "learning_rate": 1.8076495601976484e-06, "loss": 0.0005, "step": 4540 }, { "epoch": 12.16, "learning_rate": 1.8010157632657544e-06, "loss": 0.0002, "step": 4542 }, { "epoch": 12.17, "learning_rate": 1.794392956394696e-06, "loss": 0.0003, "step": 4544 }, { "epoch": 12.17, "learning_rate": 1.787781148461799e-06, "loss": 0.0008, "step": 4546 }, { "epoch": 12.18, "learning_rate": 1.7811803483296464e-06, "loss": 0.0002, "step": 4548 }, { "epoch": 12.18, "learning_rate": 1.7745905648460637e-06, "loss": 0.0004, "step": 4550 }, { "epoch": 12.19, "learning_rate": 1.768011806844112e-06, "loss": 0.0003, "step": 4552 }, { "epoch": 12.19, "learning_rate": 1.7614440831420732e-06, "loss": 0.0005, "step": 4554 }, { "epoch": 12.2, "learning_rate": 1.754887402543437e-06, "loss": 0.0019, "step": 4556 }, { "epoch": 12.2, "learning_rate": 1.7483417738368923e-06, "loss": 0.0002, "step": 4558 }, { "epoch": 12.21, "learning_rate": 1.7418072057963143e-06, "loss": 0.0021, "step": 4560 }, { "epoch": 12.21, "learning_rate": 1.7352837071807482e-06, "loss": 0.0023, "step": 4562 }, { "epoch": 12.22, "learning_rate": 1.7287712867344099e-06, "loss": 0.0004, "step": 4564 }, { "epoch": 12.22, "learning_rate": 1.7222699531866583e-06, "loss": 0.0025, "step": 4566 }, { "epoch": 12.23, "learning_rate": 1.715779715251994e-06, "loss": 0.0002, "step": 4568 }, { "epoch": 12.24, "learning_rate": 1.7093005816300445e-06, "loss": 0.0005, "step": 4570 }, { "epoch": 12.24, "learning_rate": 1.702832561005553e-06, "loss": 0.0001, "step": 4572 }, { "epoch": 12.25, "learning_rate": 1.6963756620483673e-06, "loss": 0.0002, "step": 4574 }, { "epoch": 12.25, "learning_rate": 1.6899298934134256e-06, "loss": 0.0002, "step": 4576 }, { "epoch": 12.26, "learning_rate": 1.6834952637407487e-06, "loss": 0.0002, "step": 4578 }, { "epoch": 12.26, "learning_rate": 1.677071781655426e-06, "loss": 0.0003, "step": 4580 }, { "epoch": 12.27, "learning_rate": 1.6706594557676047e-06, "loss": 0.0002, "step": 4582 }, { "epoch": 12.27, "learning_rate": 1.664258294672475e-06, "loss": 0.0229, "step": 4584 }, { "epoch": 12.28, "learning_rate": 1.6578683069502698e-06, "loss": 0.0003, "step": 4586 }, { "epoch": 12.28, "learning_rate": 1.651489501166237e-06, "loss": 0.0003, "step": 4588 }, { "epoch": 12.29, "learning_rate": 1.6451218858706374e-06, "loss": 0.0006, "step": 4590 }, { "epoch": 12.29, "learning_rate": 1.6387654695987332e-06, "loss": 0.0016, "step": 4592 }, { "epoch": 12.3, "learning_rate": 1.6324202608707772e-06, "loss": 0.0014, "step": 4594 }, { "epoch": 12.31, "learning_rate": 1.6260862681919965e-06, "loss": 0.0001, "step": 4596 }, { "epoch": 12.31, "learning_rate": 1.6197635000525835e-06, "loss": 0.0001, "step": 4598 }, { "epoch": 12.32, "learning_rate": 1.613451964927688e-06, "loss": 0.0002, "step": 4600 }, { "epoch": 12.32, "learning_rate": 1.6071516712774005e-06, "loss": 0.0143, "step": 4602 }, { "epoch": 12.33, "learning_rate": 1.600862627546741e-06, "loss": 0.0001, "step": 4604 }, { "epoch": 12.33, "learning_rate": 1.5945848421656595e-06, "loss": 0.0002, "step": 4606 }, { "epoch": 12.34, "learning_rate": 1.588318323549004e-06, "loss": 0.0003, "step": 4608 }, { "epoch": 12.34, "learning_rate": 1.5820630800965253e-06, "loss": 0.0002, "step": 4610 }, { "epoch": 12.35, "learning_rate": 1.575819120192862e-06, "loss": 0.0001, "step": 4612 }, { "epoch": 12.35, "learning_rate": 1.5695864522075256e-06, "loss": 0.0018, "step": 4614 }, { "epoch": 12.36, "learning_rate": 1.5633650844948922e-06, "loss": 0.0006, "step": 4616 }, { "epoch": 12.36, "learning_rate": 1.5571550253941925e-06, "loss": 0.0005, "step": 4618 }, { "epoch": 12.37, "learning_rate": 1.5509562832294944e-06, "loss": 0.0004, "step": 4620 }, { "epoch": 12.37, "learning_rate": 1.5447688663097027e-06, "loss": 0.0014, "step": 4622 }, { "epoch": 12.38, "learning_rate": 1.5385927829285364e-06, "loss": 0.0003, "step": 4624 }, { "epoch": 12.39, "learning_rate": 1.5324280413645254e-06, "loss": 0.0008, "step": 4626 }, { "epoch": 12.39, "learning_rate": 1.5262746498809976e-06, "loss": 0.0005, "step": 4628 }, { "epoch": 12.4, "learning_rate": 1.5201326167260644e-06, "loss": 0.0002, "step": 4630 }, { "epoch": 12.4, "learning_rate": 1.514001950132611e-06, "loss": 0.0004, "step": 4632 }, { "epoch": 12.41, "learning_rate": 1.507882658318296e-06, "loss": 0.0002, "step": 4634 }, { "epoch": 12.41, "learning_rate": 1.5017747494855194e-06, "loss": 0.0003, "step": 4636 }, { "epoch": 12.42, "learning_rate": 1.4956782318214302e-06, "loss": 0.0003, "step": 4638 }, { "epoch": 12.42, "learning_rate": 1.489593113497907e-06, "loss": 0.0002, "step": 4640 }, { "epoch": 12.43, "learning_rate": 1.483519402671546e-06, "loss": 0.0001, "step": 4642 }, { "epoch": 12.43, "learning_rate": 1.4774571074836564e-06, "loss": 0.0011, "step": 4644 }, { "epoch": 12.44, "learning_rate": 1.4714062360602422e-06, "loss": 0.0002, "step": 4646 }, { "epoch": 12.44, "learning_rate": 1.4653667965119978e-06, "loss": 0.0004, "step": 4648 }, { "epoch": 12.45, "learning_rate": 1.459338796934293e-06, "loss": 0.0004, "step": 4650 }, { "epoch": 12.46, "learning_rate": 1.4533222454071605e-06, "loss": 0.0017, "step": 4652 }, { "epoch": 12.46, "learning_rate": 1.4473171499952954e-06, "loss": 0.0005, "step": 4654 }, { "epoch": 12.47, "learning_rate": 1.441323518748029e-06, "loss": 0.0003, "step": 4656 }, { "epoch": 12.47, "learning_rate": 1.4353413596993282e-06, "loss": 0.0025, "step": 4658 }, { "epoch": 12.48, "learning_rate": 1.4293706808677831e-06, "loss": 0.0009, "step": 4660 }, { "epoch": 12.48, "learning_rate": 1.4234114902565965e-06, "loss": 0.0004, "step": 4662 }, { "epoch": 12.49, "learning_rate": 1.4174637958535698e-06, "loss": 0.0006, "step": 4664 }, { "epoch": 12.49, "learning_rate": 1.4115276056310968e-06, "loss": 0.0003, "step": 4666 }, { "epoch": 12.5, "learning_rate": 1.405602927546148e-06, "loss": 0.0018, "step": 4668 }, { "epoch": 12.5, "learning_rate": 1.3996897695402679e-06, "loss": 0.001, "step": 4670 }, { "epoch": 12.51, "learning_rate": 1.3937881395395525e-06, "loss": 0.0003, "step": 4672 }, { "epoch": 12.51, "learning_rate": 1.3878980454546499e-06, "loss": 0.0009, "step": 4674 }, { "epoch": 12.52, "learning_rate": 1.3820194951807475e-06, "loss": 0.0009, "step": 4676 }, { "epoch": 12.52, "learning_rate": 1.3761524965975547e-06, "loss": 0.0002, "step": 4678 }, { "epoch": 12.53, "learning_rate": 1.3702970575692975e-06, "loss": 0.0001, "step": 4680 }, { "epoch": 12.54, "learning_rate": 1.3644531859447074e-06, "loss": 0.0023, "step": 4682 }, { "epoch": 12.54, "learning_rate": 1.358620889557014e-06, "loss": 0.0007, "step": 4684 }, { "epoch": 12.55, "learning_rate": 1.3528001762239263e-06, "loss": 0.0009, "step": 4686 }, { "epoch": 12.55, "learning_rate": 1.3469910537476294e-06, "loss": 0.0003, "step": 4688 }, { "epoch": 12.56, "learning_rate": 1.3411935299147739e-06, "loss": 0.0003, "step": 4690 }, { "epoch": 12.56, "learning_rate": 1.3354076124964598e-06, "loss": 0.0005, "step": 4692 }, { "epoch": 12.57, "learning_rate": 1.3296333092482316e-06, "loss": 0.0003, "step": 4694 }, { "epoch": 12.57, "learning_rate": 1.3238706279100643e-06, "loss": 0.0002, "step": 4696 }, { "epoch": 12.58, "learning_rate": 1.3181195762063592e-06, "loss": 0.0043, "step": 4698 }, { "epoch": 12.58, "learning_rate": 1.3123801618459242e-06, "loss": 0.0003, "step": 4700 }, { "epoch": 12.59, "learning_rate": 1.3066523925219699e-06, "loss": 0.0001, "step": 4702 }, { "epoch": 12.59, "learning_rate": 1.300936275912098e-06, "loss": 0.0002, "step": 4704 }, { "epoch": 12.6, "learning_rate": 1.2952318196782898e-06, "loss": 0.0015, "step": 4706 }, { "epoch": 12.61, "learning_rate": 1.2895390314668987e-06, "loss": 0.0003, "step": 4708 }, { "epoch": 12.61, "learning_rate": 1.2838579189086352e-06, "loss": 0.0025, "step": 4710 }, { "epoch": 12.62, "learning_rate": 1.2781884896185625e-06, "loss": 0.0002, "step": 4712 }, { "epoch": 12.62, "learning_rate": 1.2725307511960815e-06, "loss": 0.0001, "step": 4714 }, { "epoch": 12.63, "learning_rate": 1.266884711224924e-06, "loss": 0.0003, "step": 4716 }, { "epoch": 12.63, "learning_rate": 1.2612503772731366e-06, "loss": 0.0026, "step": 4718 }, { "epoch": 12.64, "learning_rate": 1.255627756893083e-06, "loss": 0.0003, "step": 4720 }, { "epoch": 12.64, "learning_rate": 1.25001685762142e-06, "loss": 0.0005, "step": 4722 }, { "epoch": 12.65, "learning_rate": 1.2444176869790925e-06, "loss": 0.0002, "step": 4724 }, { "epoch": 12.65, "learning_rate": 1.238830252471328e-06, "loss": 0.0002, "step": 4726 }, { "epoch": 12.66, "learning_rate": 1.233254561587619e-06, "loss": 0.0002, "step": 4728 }, { "epoch": 12.66, "learning_rate": 1.2276906218017193e-06, "loss": 0.0002, "step": 4730 }, { "epoch": 12.67, "learning_rate": 1.2221384405716308e-06, "loss": 0.0002, "step": 4732 }, { "epoch": 12.67, "learning_rate": 1.2165980253395938e-06, "loss": 0.0005, "step": 4734 }, { "epoch": 12.68, "learning_rate": 1.211069383532073e-06, "loss": 0.0002, "step": 4736 }, { "epoch": 12.69, "learning_rate": 1.205552522559762e-06, "loss": 0.0002, "step": 4738 }, { "epoch": 12.69, "learning_rate": 1.2000474498175552e-06, "loss": 0.0003, "step": 4740 }, { "epoch": 12.7, "learning_rate": 1.1945541726845455e-06, "loss": 0.0002, "step": 4742 }, { "epoch": 12.7, "learning_rate": 1.1890726985240163e-06, "loss": 0.0004, "step": 4744 }, { "epoch": 12.71, "learning_rate": 1.183603034683436e-06, "loss": 0.0007, "step": 4746 }, { "epoch": 12.71, "learning_rate": 1.1781451884944328e-06, "loss": 0.0008, "step": 4748 }, { "epoch": 12.72, "learning_rate": 1.1726991672728006e-06, "loss": 0.0002, "step": 4750 }, { "epoch": 12.72, "learning_rate": 1.1672649783184787e-06, "loss": 0.0044, "step": 4752 }, { "epoch": 12.73, "learning_rate": 1.1618426289155493e-06, "loss": 0.001, "step": 4754 }, { "epoch": 12.73, "learning_rate": 1.1564321263322242e-06, "loss": 0.0003, "step": 4756 }, { "epoch": 12.74, "learning_rate": 1.1510334778208332e-06, "loss": 0.0003, "step": 4758 }, { "epoch": 12.74, "learning_rate": 1.145646690617821e-06, "loss": 0.0002, "step": 4760 }, { "epoch": 12.75, "learning_rate": 1.1402717719437295e-06, "loss": 0.0006, "step": 4762 }, { "epoch": 12.76, "learning_rate": 1.1349087290031924e-06, "loss": 0.0023, "step": 4764 }, { "epoch": 12.76, "learning_rate": 1.1295575689849248e-06, "loss": 0.0001, "step": 4766 }, { "epoch": 12.77, "learning_rate": 1.124218299061718e-06, "loss": 0.0004, "step": 4768 }, { "epoch": 12.77, "learning_rate": 1.118890926390419e-06, "loss": 0.0004, "step": 4770 }, { "epoch": 12.78, "learning_rate": 1.113575458111933e-06, "loss": 0.0005, "step": 4772 }, { "epoch": 12.78, "learning_rate": 1.1082719013512032e-06, "loss": 0.0003, "step": 4774 }, { "epoch": 12.79, "learning_rate": 1.1029802632172116e-06, "loss": 0.0001, "step": 4776 }, { "epoch": 12.79, "learning_rate": 1.0977005508029603e-06, "loss": 0.0002, "step": 4778 }, { "epoch": 12.8, "learning_rate": 1.0924327711854688e-06, "loss": 0.0003, "step": 4780 }, { "epoch": 12.8, "learning_rate": 1.0871769314257586e-06, "loss": 0.0004, "step": 4782 }, { "epoch": 12.81, "learning_rate": 1.0819330385688497e-06, "loss": 0.0001, "step": 4784 }, { "epoch": 12.81, "learning_rate": 1.0767010996437476e-06, "loss": 0.0082, "step": 4786 }, { "epoch": 12.82, "learning_rate": 1.0714811216634314e-06, "loss": 0.0001, "step": 4788 }, { "epoch": 12.82, "learning_rate": 1.0662731116248537e-06, "loss": 0.0005, "step": 4790 }, { "epoch": 12.83, "learning_rate": 1.0610770765089206e-06, "loss": 0.0003, "step": 4792 }, { "epoch": 12.84, "learning_rate": 1.0558930232804877e-06, "loss": 0.0007, "step": 4794 }, { "epoch": 12.84, "learning_rate": 1.0507209588883493e-06, "loss": 0.0002, "step": 4796 }, { "epoch": 12.85, "learning_rate": 1.0455608902652314e-06, "loss": 0.0004, "step": 4798 }, { "epoch": 12.85, "learning_rate": 1.0404128243277778e-06, "loss": 0.0004, "step": 4800 }, { "epoch": 12.86, "learning_rate": 1.0352767679765474e-06, "loss": 0.0012, "step": 4802 }, { "epoch": 12.86, "learning_rate": 1.0301527280959978e-06, "loss": 0.0002, "step": 4804 }, { "epoch": 12.87, "learning_rate": 1.0250407115544825e-06, "loss": 0.0003, "step": 4806 }, { "epoch": 12.87, "learning_rate": 1.0199407252042349e-06, "loss": 0.0005, "step": 4808 }, { "epoch": 12.88, "learning_rate": 1.0148527758813665e-06, "loss": 0.0001, "step": 4810 }, { "epoch": 12.88, "learning_rate": 1.0097768704058542e-06, "loss": 0.0001, "step": 4812 }, { "epoch": 12.89, "learning_rate": 1.0047130155815288e-06, "loss": 0.0002, "step": 4814 }, { "epoch": 12.89, "learning_rate": 9.996612181960696e-07, "loss": 0.0001, "step": 4816 }, { "epoch": 12.9, "learning_rate": 9.946214850209923e-07, "loss": 0.0001, "step": 4818 }, { "epoch": 12.9, "learning_rate": 9.895938228116452e-07, "loss": 0.0023, "step": 4820 }, { "epoch": 12.91, "learning_rate": 9.845782383071912e-07, "loss": 0.0002, "step": 4822 }, { "epoch": 12.92, "learning_rate": 9.795747382306087e-07, "loss": 0.0014, "step": 4824 }, { "epoch": 12.92, "learning_rate": 9.745833292886753e-07, "loss": 0.0004, "step": 4826 }, { "epoch": 12.93, "learning_rate": 9.696040181719623e-07, "loss": 0.0003, "step": 4828 }, { "epoch": 12.93, "learning_rate": 9.646368115548232e-07, "loss": 0.0001, "step": 4830 }, { "epoch": 12.94, "learning_rate": 9.59681716095392e-07, "loss": 0.0011, "step": 4832 }, { "epoch": 12.94, "learning_rate": 9.547387384355622e-07, "loss": 0.0009, "step": 4834 }, { "epoch": 12.95, "learning_rate": 9.498078852009862e-07, "loss": 0.0002, "step": 4836 }, { "epoch": 12.95, "learning_rate": 9.448891630010671e-07, "loss": 0.0002, "step": 4838 }, { "epoch": 12.96, "learning_rate": 9.399825784289441e-07, "loss": 0.0003, "step": 4840 }, { "epoch": 12.96, "learning_rate": 9.350881380614895e-07, "loss": 0.0006, "step": 4842 }, { "epoch": 12.97, "learning_rate": 9.302058484592947e-07, "loss": 0.0001, "step": 4844 }, { "epoch": 12.97, "learning_rate": 9.253357161666676e-07, "loss": 0.0001, "step": 4846 }, { "epoch": 12.98, "learning_rate": 9.204777477116155e-07, "loss": 0.0, "step": 4848 }, { "epoch": 12.99, "learning_rate": 9.156319496058452e-07, "loss": 0.0003, "step": 4850 }, { "epoch": 12.99, "learning_rate": 9.107983283447475e-07, "loss": 0.0001, "step": 4852 }, { "epoch": 13.0, "learning_rate": 9.05976890407394e-07, "loss": 0.0001, "step": 4854 }, { "epoch": 13.0, "learning_rate": 9.01167642256523e-07, "loss": 0.0002, "step": 4856 }, { "epoch": 13.01, "learning_rate": 8.963705903385344e-07, "loss": 0.0003, "step": 4858 }, { "epoch": 13.01, "learning_rate": 8.915857410834793e-07, "loss": 0.0003, "step": 4860 }, { "epoch": 13.02, "learning_rate": 8.86813100905054e-07, "loss": 0.0001, "step": 4862 }, { "epoch": 13.02, "learning_rate": 8.820526762005866e-07, "loss": 0.0003, "step": 4864 }, { "epoch": 13.03, "learning_rate": 8.773044733510338e-07, "loss": 0.0003, "step": 4866 }, { "epoch": 13.03, "learning_rate": 8.72568498720967e-07, "loss": 0.0004, "step": 4868 }, { "epoch": 13.04, "learning_rate": 8.678447586585736e-07, "loss": 0.0001, "step": 4870 }, { "epoch": 13.04, "learning_rate": 8.631332594956343e-07, "loss": 0.0002, "step": 4872 }, { "epoch": 13.05, "learning_rate": 8.584340075475239e-07, "loss": 0.0002, "step": 4874 }, { "epoch": 13.05, "learning_rate": 8.537470091132016e-07, "loss": 0.0006, "step": 4876 }, { "epoch": 13.06, "learning_rate": 8.490722704751997e-07, "loss": 0.0004, "step": 4878 }, { "epoch": 13.07, "learning_rate": 8.444097978996235e-07, "loss": 0.0003, "step": 4880 }, { "epoch": 13.07, "learning_rate": 8.397595976361284e-07, "loss": 0.0001, "step": 4882 }, { "epoch": 13.08, "learning_rate": 8.351216759179249e-07, "loss": 0.0001, "step": 4884 }, { "epoch": 13.08, "learning_rate": 8.304960389617645e-07, "loss": 0.0002, "step": 4886 }, { "epoch": 13.09, "learning_rate": 8.258826929679298e-07, "loss": 0.0008, "step": 4888 }, { "epoch": 13.09, "learning_rate": 8.212816441202309e-07, "loss": 0.0003, "step": 4890 }, { "epoch": 13.1, "learning_rate": 8.166928985859934e-07, "loss": 0.0004, "step": 4892 }, { "epoch": 13.1, "learning_rate": 8.121164625160505e-07, "loss": 0.0003, "step": 4894 }, { "epoch": 13.11, "learning_rate": 8.075523420447384e-07, "loss": 0.0018, "step": 4896 }, { "epoch": 13.11, "learning_rate": 8.030005432898825e-07, "loss": 0.0006, "step": 4898 }, { "epoch": 13.12, "learning_rate": 7.984610723527897e-07, "loss": 0.0007, "step": 4900 }, { "epoch": 13.12, "learning_rate": 7.939339353182518e-07, "loss": 0.0013, "step": 4902 }, { "epoch": 13.13, "learning_rate": 7.89419138254518e-07, "loss": 0.0004, "step": 4904 }, { "epoch": 13.14, "learning_rate": 7.849166872133018e-07, "loss": 0.0007, "step": 4906 }, { "epoch": 13.14, "learning_rate": 7.804265882297668e-07, "loss": 0.0001, "step": 4908 }, { "epoch": 13.15, "learning_rate": 7.759488473225197e-07, "loss": 0.0001, "step": 4910 }, { "epoch": 13.15, "learning_rate": 7.714834704936003e-07, "loss": 0.0003, "step": 4912 }, { "epoch": 13.16, "learning_rate": 7.670304637284798e-07, "loss": 0.0, "step": 4914 }, { "epoch": 13.16, "learning_rate": 7.625898329960447e-07, "loss": 0.0001, "step": 4916 }, { "epoch": 13.17, "learning_rate": 7.581615842485935e-07, "loss": 0.0009, "step": 4918 }, { "epoch": 13.17, "learning_rate": 7.537457234218271e-07, "loss": 0.0012, "step": 4920 }, { "epoch": 13.18, "learning_rate": 7.493422564348418e-07, "loss": 0.0004, "step": 4922 }, { "epoch": 13.18, "learning_rate": 7.44951189190124e-07, "loss": 0.0003, "step": 4924 }, { "epoch": 13.19, "learning_rate": 7.405725275735343e-07, "loss": 0.0001, "step": 4926 }, { "epoch": 13.19, "learning_rate": 7.362062774543066e-07, "loss": 0.0001, "step": 4928 }, { "epoch": 13.2, "learning_rate": 7.318524446850395e-07, "loss": 0.0046, "step": 4930 }, { "epoch": 13.2, "learning_rate": 7.275110351016845e-07, "loss": 0.0002, "step": 4932 }, { "epoch": 13.21, "learning_rate": 7.231820545235435e-07, "loss": 0.0003, "step": 4934 }, { "epoch": 13.22, "learning_rate": 7.188655087532581e-07, "loss": 0.0002, "step": 4936 }, { "epoch": 13.22, "learning_rate": 7.145614035767989e-07, "loss": 0.0001, "step": 4938 }, { "epoch": 13.23, "learning_rate": 7.102697447634643e-07, "loss": 0.0, "step": 4940 }, { "epoch": 13.23, "learning_rate": 7.059905380658694e-07, "loss": 0.0003, "step": 4942 }, { "epoch": 13.24, "learning_rate": 7.017237892199336e-07, "loss": 0.0007, "step": 4944 }, { "epoch": 13.24, "learning_rate": 6.974695039448864e-07, "loss": 0.0001, "step": 4946 }, { "epoch": 13.25, "learning_rate": 6.932276879432421e-07, "loss": 0.0004, "step": 4948 }, { "epoch": 13.25, "learning_rate": 6.889983469008055e-07, "loss": 0.0017, "step": 4950 }, { "epoch": 13.26, "learning_rate": 6.847814864866586e-07, "loss": 0.0003, "step": 4952 }, { "epoch": 13.26, "learning_rate": 6.805771123531546e-07, "loss": 0.0002, "step": 4954 }, { "epoch": 13.27, "learning_rate": 6.763852301359087e-07, "loss": 0.0001, "step": 4956 }, { "epoch": 13.27, "learning_rate": 6.722058454537939e-07, "loss": 0.0002, "step": 4958 }, { "epoch": 13.28, "learning_rate": 6.680389639089291e-07, "loss": 0.0001, "step": 4960 }, { "epoch": 13.29, "learning_rate": 6.638845910866753e-07, "loss": 0.0001, "step": 4962 }, { "epoch": 13.29, "learning_rate": 6.597427325556249e-07, "loss": 0.0002, "step": 4964 }, { "epoch": 13.3, "learning_rate": 6.556133938675968e-07, "loss": 0.0001, "step": 4966 }, { "epoch": 13.3, "learning_rate": 6.514965805576312e-07, "loss": 0.0001, "step": 4968 }, { "epoch": 13.31, "learning_rate": 6.473922981439729e-07, "loss": 0.0002, "step": 4970 }, { "epoch": 13.31, "learning_rate": 6.433005521280756e-07, "loss": 0.0001, "step": 4972 }, { "epoch": 13.32, "learning_rate": 6.392213479945852e-07, "loss": 0.0009, "step": 4974 }, { "epoch": 13.32, "learning_rate": 6.351546912113383e-07, "loss": 0.0002, "step": 4976 }, { "epoch": 13.33, "learning_rate": 6.311005872293518e-07, "loss": 0.0002, "step": 4978 }, { "epoch": 13.33, "learning_rate": 6.27059041482817e-07, "loss": 0.0009, "step": 4980 }, { "epoch": 13.34, "learning_rate": 6.230300593890937e-07, "loss": 0.0013, "step": 4982 }, { "epoch": 13.34, "learning_rate": 6.190136463486973e-07, "loss": 0.0005, "step": 4984 }, { "epoch": 13.35, "learning_rate": 6.150098077452992e-07, "loss": 0.0001, "step": 4986 }, { "epoch": 13.35, "learning_rate": 6.11018548945711e-07, "loss": 0.0001, "step": 4988 }, { "epoch": 13.36, "learning_rate": 6.070398752998896e-07, "loss": 0.0005, "step": 4990 }, { "epoch": 13.37, "learning_rate": 6.030737921409169e-07, "loss": 0.003, "step": 4992 }, { "epoch": 13.37, "learning_rate": 5.99120304784998e-07, "loss": 0.0001, "step": 4994 }, { "epoch": 13.38, "learning_rate": 5.951794185314586e-07, "loss": 0.0003, "step": 4996 }, { "epoch": 13.38, "learning_rate": 5.912511386627285e-07, "loss": 0.0002, "step": 4998 }, { "epoch": 13.39, "learning_rate": 5.873354704443424e-07, "loss": 0.0001, "step": 5000 }, { "epoch": 13.39, "learning_rate": 5.834324191249318e-07, "loss": 0.0005, "step": 5002 }, { "epoch": 13.4, "learning_rate": 5.795419899362143e-07, "loss": 0.0006, "step": 5004 }, { "epoch": 13.4, "learning_rate": 5.756641880929869e-07, "loss": 0.0004, "step": 5006 }, { "epoch": 13.41, "learning_rate": 5.717990187931244e-07, "loss": 0.0001, "step": 5008 }, { "epoch": 13.41, "learning_rate": 5.679464872175666e-07, "loss": 0.0004, "step": 5010 }, { "epoch": 13.42, "learning_rate": 5.641065985303118e-07, "loss": 0.0001, "step": 5012 }, { "epoch": 13.42, "learning_rate": 5.602793578784149e-07, "loss": 0.0003, "step": 5014 }, { "epoch": 13.43, "learning_rate": 5.564647703919757e-07, "loss": 0.0001, "step": 5016 }, { "epoch": 13.44, "learning_rate": 5.52662841184135e-07, "loss": 0.0002, "step": 5018 }, { "epoch": 13.44, "learning_rate": 5.488735753510621e-07, "loss": 0.0001, "step": 5020 }, { "epoch": 13.45, "learning_rate": 5.450969779719561e-07, "loss": 0.0008, "step": 5022 }, { "epoch": 13.45, "learning_rate": 5.413330541090333e-07, "loss": 0.0029, "step": 5024 }, { "epoch": 13.46, "learning_rate": 5.375818088075224e-07, "loss": 0.0001, "step": 5026 }, { "epoch": 13.46, "learning_rate": 5.33843247095659e-07, "loss": 0.0001, "step": 5028 }, { "epoch": 13.47, "learning_rate": 5.301173739846744e-07, "loss": 0.0002, "step": 5030 }, { "epoch": 13.47, "learning_rate": 5.264041944687947e-07, "loss": 0.003, "step": 5032 }, { "epoch": 13.48, "learning_rate": 5.2270371352523e-07, "loss": 0.0001, "step": 5034 }, { "epoch": 13.48, "learning_rate": 5.190159361141678e-07, "loss": 0.0016, "step": 5036 }, { "epoch": 13.49, "learning_rate": 5.153408671787719e-07, "loss": 0.0001, "step": 5038 }, { "epoch": 13.49, "learning_rate": 5.116785116451661e-07, "loss": 0.0006, "step": 5040 }, { "epoch": 13.5, "learning_rate": 5.080288744224359e-07, "loss": 0.0002, "step": 5042 }, { "epoch": 13.5, "learning_rate": 5.043919604026204e-07, "loss": 0.0001, "step": 5044 }, { "epoch": 13.51, "learning_rate": 5.007677744606987e-07, "loss": 0.005, "step": 5046 }, { "epoch": 13.52, "learning_rate": 4.97156321454596e-07, "loss": 0.0003, "step": 5048 }, { "epoch": 13.52, "learning_rate": 4.935576062251635e-07, "loss": 0.0003, "step": 5050 }, { "epoch": 13.53, "learning_rate": 4.899716335961846e-07, "loss": 0.0002, "step": 5052 }, { "epoch": 13.53, "learning_rate": 4.863984083743566e-07, "loss": 0.0001, "step": 5054 }, { "epoch": 13.54, "learning_rate": 4.828379353492951e-07, "loss": 0.0019, "step": 5056 }, { "epoch": 13.54, "learning_rate": 4.792902192935167e-07, "loss": 0.0002, "step": 5058 }, { "epoch": 13.55, "learning_rate": 4.757552649624442e-07, "loss": 0.0002, "step": 5060 }, { "epoch": 13.55, "learning_rate": 4.722330770943917e-07, "loss": 0.0009, "step": 5062 }, { "epoch": 13.56, "learning_rate": 4.687236604105616e-07, "loss": 0.0002, "step": 5064 }, { "epoch": 13.56, "learning_rate": 4.6522701961503436e-07, "loss": 0.0003, "step": 5066 }, { "epoch": 13.57, "learning_rate": 4.6174315939476963e-07, "loss": 0.0003, "step": 5068 }, { "epoch": 13.57, "learning_rate": 4.5827208441959426e-07, "loss": 0.0003, "step": 5070 }, { "epoch": 13.58, "learning_rate": 4.548137993421975e-07, "loss": 0.0005, "step": 5072 }, { "epoch": 13.59, "learning_rate": 4.513683087981235e-07, "loss": 0.0001, "step": 5074 }, { "epoch": 13.59, "learning_rate": 4.479356174057692e-07, "loss": 0.0001, "step": 5076 }, { "epoch": 13.6, "learning_rate": 4.4451572976637157e-07, "loss": 0.0001, "step": 5078 }, { "epoch": 13.6, "learning_rate": 4.411086504640105e-07, "loss": 0.0002, "step": 5080 }, { "epoch": 13.61, "learning_rate": 4.3771438406559173e-07, "loss": 0.0001, "step": 5082 }, { "epoch": 13.61, "learning_rate": 4.3433293512085027e-07, "loss": 0.0001, "step": 5084 }, { "epoch": 13.62, "learning_rate": 4.3096430816233826e-07, "loss": 0.0002, "step": 5086 }, { "epoch": 13.62, "learning_rate": 4.2760850770542263e-07, "loss": 0.0019, "step": 5088 }, { "epoch": 13.63, "learning_rate": 4.242655382482752e-07, "loss": 0.003, "step": 5090 }, { "epoch": 13.63, "learning_rate": 4.2093540427187054e-07, "loss": 0.0001, "step": 5092 }, { "epoch": 13.64, "learning_rate": 4.17618110239979e-07, "loss": 0.0003, "step": 5094 }, { "epoch": 13.64, "learning_rate": 4.1431366059915934e-07, "loss": 0.0001, "step": 5096 }, { "epoch": 13.65, "learning_rate": 4.1102205977875176e-07, "loss": 0.0004, "step": 5098 }, { "epoch": 13.65, "learning_rate": 4.077433121908747e-07, "loss": 0.0003, "step": 5100 }, { "epoch": 13.66, "learning_rate": 4.0447742223042044e-07, "loss": 0.0001, "step": 5102 }, { "epoch": 13.67, "learning_rate": 4.0122439427504377e-07, "loss": 0.0002, "step": 5104 }, { "epoch": 13.67, "learning_rate": 3.9798423268516015e-07, "loss": 0.0001, "step": 5106 }, { "epoch": 13.68, "learning_rate": 3.9475694180393876e-07, "loss": 0.0002, "step": 5108 }, { "epoch": 13.68, "learning_rate": 3.915425259572947e-07, "loss": 0.0018, "step": 5110 }, { "epoch": 13.69, "learning_rate": 3.883409894538881e-07, "loss": 0.0013, "step": 5112 }, { "epoch": 13.69, "learning_rate": 3.8515233658511287e-07, "loss": 0.0003, "step": 5114 }, { "epoch": 13.7, "learning_rate": 3.819765716250967e-07, "loss": 0.0002, "step": 5116 }, { "epoch": 13.7, "learning_rate": 3.7881369883068787e-07, "loss": 0.0005, "step": 5118 }, { "epoch": 13.71, "learning_rate": 3.7566372244145723e-07, "loss": 0.0007, "step": 5120 }, { "epoch": 13.71, "learning_rate": 3.725266466796873e-07, "loss": 0.0004, "step": 5122 }, { "epoch": 13.72, "learning_rate": 3.6940247575037e-07, "loss": 0.0002, "step": 5124 }, { "epoch": 13.72, "learning_rate": 3.662912138411967e-07, "loss": 0.0001, "step": 5126 }, { "epoch": 13.73, "learning_rate": 3.6319286512255913e-07, "loss": 0.0001, "step": 5128 }, { "epoch": 13.73, "learning_rate": 3.601074337475352e-07, "loss": 0.0001, "step": 5130 }, { "epoch": 13.74, "learning_rate": 3.570349238518911e-07, "loss": 0.0002, "step": 5132 }, { "epoch": 13.75, "learning_rate": 3.539753395540735e-07, "loss": 0.0002, "step": 5134 }, { "epoch": 13.75, "learning_rate": 3.5092868495520294e-07, "loss": 0.0001, "step": 5136 }, { "epoch": 13.76, "learning_rate": 3.4789496413906723e-07, "loss": 0.0003, "step": 5138 }, { "epoch": 13.76, "learning_rate": 3.448741811721179e-07, "loss": 0.0001, "step": 5140 }, { "epoch": 13.77, "learning_rate": 3.4186634010346496e-07, "loss": 0.0001, "step": 5142 }, { "epoch": 13.77, "learning_rate": 3.3887144496487224e-07, "loss": 0.0005, "step": 5144 }, { "epoch": 13.78, "learning_rate": 3.3588949977074737e-07, "loss": 0.0002, "step": 5146 }, { "epoch": 13.78, "learning_rate": 3.3292050851814084e-07, "loss": 0.0003, "step": 5148 }, { "epoch": 13.79, "learning_rate": 3.2996447518674255e-07, "loss": 0.0004, "step": 5150 }, { "epoch": 13.79, "learning_rate": 3.270214037388708e-07, "loss": 0.0003, "step": 5152 }, { "epoch": 13.8, "learning_rate": 3.2409129811946773e-07, "loss": 0.0001, "step": 5154 }, { "epoch": 13.8, "learning_rate": 3.211741622560993e-07, "loss": 0.0005, "step": 5156 }, { "epoch": 13.81, "learning_rate": 3.182700000589445e-07, "loss": 0.0004, "step": 5158 }, { "epoch": 13.82, "learning_rate": 3.153788154207926e-07, "loss": 0.0002, "step": 5160 }, { "epoch": 13.82, "learning_rate": 3.125006122170382e-07, "loss": 0.0001, "step": 5162 }, { "epoch": 13.83, "learning_rate": 3.09635394305674e-07, "loss": 0.0002, "step": 5164 }, { "epoch": 13.83, "learning_rate": 3.0678316552728907e-07, "loss": 0.0003, "step": 5166 }, { "epoch": 13.84, "learning_rate": 3.039439297050595e-07, "loss": 0.0002, "step": 5168 }, { "epoch": 13.84, "learning_rate": 3.011176906447444e-07, "loss": 0.0005, "step": 5170 }, { "epoch": 13.85, "learning_rate": 2.9830445213468784e-07, "loss": 0.001, "step": 5172 }, { "epoch": 13.85, "learning_rate": 2.955042179458012e-07, "loss": 0.0002, "step": 5174 }, { "epoch": 13.86, "learning_rate": 2.9271699183156754e-07, "loss": 0.0005, "step": 5176 }, { "epoch": 13.86, "learning_rate": 2.899427775280328e-07, "loss": 0.0002, "step": 5178 }, { "epoch": 13.87, "learning_rate": 2.871815787538024e-07, "loss": 0.0001, "step": 5180 }, { "epoch": 13.87, "learning_rate": 2.844333992100334e-07, "loss": 0.0002, "step": 5182 }, { "epoch": 13.88, "learning_rate": 2.81698242580436e-07, "loss": 0.0001, "step": 5184 }, { "epoch": 13.88, "learning_rate": 2.7897611253126066e-07, "loss": 0.0001, "step": 5186 }, { "epoch": 13.89, "learning_rate": 2.7626701271129654e-07, "loss": 0.0003, "step": 5188 }, { "epoch": 13.9, "learning_rate": 2.735709467518699e-07, "loss": 0.022, "step": 5190 }, { "epoch": 13.9, "learning_rate": 2.7088791826683224e-07, "loss": 0.0002, "step": 5192 }, { "epoch": 13.91, "learning_rate": 2.6821793085256456e-07, "loss": 0.0002, "step": 5194 }, { "epoch": 13.91, "learning_rate": 2.655609880879617e-07, "loss": 0.0, "step": 5196 }, { "epoch": 13.92, "learning_rate": 2.6291709353443715e-07, "loss": 0.0, "step": 5198 }, { "epoch": 13.92, "learning_rate": 2.602862507359127e-07, "loss": 0.0001, "step": 5200 }, { "epoch": 13.93, "learning_rate": 2.5766846321881533e-07, "loss": 0.0005, "step": 5202 }, { "epoch": 13.93, "learning_rate": 2.550637344920737e-07, "loss": 0.0001, "step": 5204 }, { "epoch": 13.94, "learning_rate": 2.5247206804711046e-07, "loss": 0.0003, "step": 5206 }, { "epoch": 13.94, "learning_rate": 2.4989346735784124e-07, "loss": 0.0054, "step": 5208 }, { "epoch": 13.95, "learning_rate": 2.4732793588066794e-07, "loss": 0.0005, "step": 5210 }, { "epoch": 13.95, "learning_rate": 2.447754770544708e-07, "loss": 0.0001, "step": 5212 }, { "epoch": 13.96, "learning_rate": 2.4223609430061077e-07, "loss": 0.0001, "step": 5214 }, { "epoch": 13.97, "learning_rate": 2.397097910229229e-07, "loss": 0.0012, "step": 5216 }, { "epoch": 13.97, "learning_rate": 2.3719657060770618e-07, "loss": 0.0001, "step": 5218 }, { "epoch": 13.98, "learning_rate": 2.3469643642372587e-07, "loss": 0.0003, "step": 5220 }, { "epoch": 13.98, "learning_rate": 2.3220939182220347e-07, "loss": 0.0001, "step": 5222 }, { "epoch": 13.99, "learning_rate": 2.2973544013681792e-07, "loss": 0.0004, "step": 5224 }, { "epoch": 13.99, "learning_rate": 2.2727458468369657e-07, "loss": 0.0012, "step": 5226 }, { "epoch": 14.0, "learning_rate": 2.2482682876141416e-07, "loss": 0.0017, "step": 5228 }, { "epoch": 14.0, "learning_rate": 2.223921756509828e-07, "loss": 0.0001, "step": 5230 }, { "epoch": 14.01, "learning_rate": 2.1997062861585649e-07, "loss": 0.0002, "step": 5232 }, { "epoch": 14.01, "learning_rate": 2.1756219090191655e-07, "loss": 0.0002, "step": 5234 }, { "epoch": 14.02, "learning_rate": 2.1516686573747503e-07, "loss": 0.0001, "step": 5236 }, { "epoch": 14.02, "learning_rate": 2.1278465633327028e-07, "loss": 0.0001, "step": 5238 }, { "epoch": 14.03, "learning_rate": 2.104155658824536e-07, "loss": 0.0003, "step": 5240 }, { "epoch": 14.03, "learning_rate": 2.0805959756059702e-07, "loss": 0.0003, "step": 5242 }, { "epoch": 14.04, "learning_rate": 2.0571675452567997e-07, "loss": 0.001, "step": 5244 }, { "epoch": 14.05, "learning_rate": 2.033870399180915e-07, "loss": 0.0001, "step": 5246 }, { "epoch": 14.05, "learning_rate": 2.0107045686062033e-07, "loss": 0.0001, "step": 5248 }, { "epoch": 14.06, "learning_rate": 1.9876700845845475e-07, "loss": 0.0042, "step": 5250 }, { "epoch": 14.06, "learning_rate": 1.964766977991772e-07, "loss": 0.0004, "step": 5252 }, { "epoch": 14.07, "learning_rate": 1.9419952795275977e-07, "loss": 0.0001, "step": 5254 }, { "epoch": 14.07, "learning_rate": 1.9193550197155962e-07, "loss": 0.0003, "step": 5256 }, { "epoch": 14.08, "learning_rate": 1.896846228903193e-07, "loss": 0.0002, "step": 5258 }, { "epoch": 14.08, "learning_rate": 1.874468937261531e-07, "loss": 0.0001, "step": 5260 }, { "epoch": 14.09, "learning_rate": 1.852223174785539e-07, "loss": 0.0007, "step": 5262 }, { "epoch": 14.09, "learning_rate": 1.8301089712938203e-07, "loss": 0.0031, "step": 5264 }, { "epoch": 14.1, "learning_rate": 1.808126356428619e-07, "loss": 0.0003, "step": 5266 }, { "epoch": 14.1, "learning_rate": 1.7862753596558424e-07, "loss": 0.0003, "step": 5268 }, { "epoch": 14.11, "learning_rate": 1.7645560102649396e-07, "loss": 0.0006, "step": 5270 }, { "epoch": 14.12, "learning_rate": 1.7429683373688888e-07, "loss": 0.0006, "step": 5272 }, { "epoch": 14.12, "learning_rate": 1.721512369904188e-07, "loss": 0.0002, "step": 5274 }, { "epoch": 14.13, "learning_rate": 1.7001881366307872e-07, "loss": 0.0002, "step": 5276 }, { "epoch": 14.13, "learning_rate": 1.6789956661320662e-07, "loss": 0.0003, "step": 5278 }, { "epoch": 14.14, "learning_rate": 1.6579349868147688e-07, "loss": 0.0001, "step": 5280 }, { "epoch": 14.14, "learning_rate": 1.637006126909013e-07, "loss": 0.0001, "step": 5282 }, { "epoch": 14.15, "learning_rate": 1.6162091144681812e-07, "loss": 0.0003, "step": 5284 }, { "epoch": 14.15, "learning_rate": 1.595543977368974e-07, "loss": 0.0003, "step": 5286 }, { "epoch": 14.16, "learning_rate": 1.5750107433112893e-07, "loss": 0.0001, "step": 5288 }, { "epoch": 14.16, "learning_rate": 1.554609439818233e-07, "loss": 0.0001, "step": 5290 }, { "epoch": 14.17, "learning_rate": 1.5343400942360642e-07, "loss": 0.0003, "step": 5292 }, { "epoch": 14.17, "learning_rate": 1.5142027337341715e-07, "loss": 0.0002, "step": 5294 }, { "epoch": 14.18, "learning_rate": 1.4941973853050073e-07, "loss": 0.0006, "step": 5296 }, { "epoch": 14.18, "learning_rate": 1.474324075764111e-07, "loss": 0.0004, "step": 5298 }, { "epoch": 14.19, "learning_rate": 1.4545828317499844e-07, "loss": 0.0015, "step": 5300 }, { "epoch": 14.2, "learning_rate": 1.434973679724161e-07, "loss": 0.0, "step": 5302 }, { "epoch": 14.2, "learning_rate": 1.4154966459710594e-07, "loss": 0.0005, "step": 5304 }, { "epoch": 14.21, "learning_rate": 1.3961517565980632e-07, "loss": 0.0002, "step": 5306 }, { "epoch": 14.21, "learning_rate": 1.3769390375353852e-07, "loss": 0.0001, "step": 5308 }, { "epoch": 14.22, "learning_rate": 1.3578585145360812e-07, "loss": 0.0003, "step": 5310 }, { "epoch": 14.22, "learning_rate": 1.3389102131760367e-07, "loss": 0.0001, "step": 5312 }, { "epoch": 14.23, "learning_rate": 1.3200941588538575e-07, "loss": 0.0003, "step": 5314 }, { "epoch": 14.23, "learning_rate": 1.3014103767909236e-07, "loss": 0.0001, "step": 5316 }, { "epoch": 14.24, "learning_rate": 1.2828588920312912e-07, "loss": 0.0003, "step": 5318 }, { "epoch": 14.24, "learning_rate": 1.2644397294417132e-07, "loss": 0.0002, "step": 5320 }, { "epoch": 14.25, "learning_rate": 1.246152913711529e-07, "loss": 0.0001, "step": 5322 }, { "epoch": 14.25, "learning_rate": 1.2279984693527315e-07, "loss": 0.0002, "step": 5324 }, { "epoch": 14.26, "learning_rate": 1.209976420699821e-07, "loss": 0.0002, "step": 5326 }, { "epoch": 14.27, "learning_rate": 1.1920867919098856e-07, "loss": 0.0004, "step": 5328 }, { "epoch": 14.27, "learning_rate": 1.1743296069624988e-07, "loss": 0.0003, "step": 5330 }, { "epoch": 14.28, "learning_rate": 1.1567048896596877e-07, "loss": 0.0001, "step": 5332 }, { "epoch": 14.28, "learning_rate": 1.1392126636259326e-07, "loss": 0.0002, "step": 5334 }, { "epoch": 14.29, "learning_rate": 1.1218529523081224e-07, "loss": 0.0013, "step": 5336 }, { "epoch": 14.29, "learning_rate": 1.1046257789755099e-07, "loss": 0.0004, "step": 5338 }, { "epoch": 14.3, "learning_rate": 1.0875311667196908e-07, "loss": 0.0012, "step": 5340 }, { "epoch": 14.3, "learning_rate": 1.0705691384545913e-07, "loss": 0.0003, "step": 5342 }, { "epoch": 14.31, "learning_rate": 1.0537397169164022e-07, "loss": 0.0003, "step": 5344 }, { "epoch": 14.31, "learning_rate": 1.0370429246635783e-07, "loss": 0.0008, "step": 5346 }, { "epoch": 14.32, "learning_rate": 1.0204787840767838e-07, "loss": 0.0004, "step": 5348 }, { "epoch": 14.32, "learning_rate": 1.0040473173588805e-07, "loss": 0.0001, "step": 5350 }, { "epoch": 14.33, "learning_rate": 9.877485465349057e-08, "loss": 0.0002, "step": 5352 }, { "epoch": 14.33, "learning_rate": 9.715824934519947e-08, "loss": 0.0001, "step": 5354 }, { "epoch": 14.34, "learning_rate": 9.555491797794136e-08, "loss": 0.0005, "step": 5356 }, { "epoch": 14.35, "learning_rate": 9.396486270085048e-08, "loss": 0.0002, "step": 5358 }, { "epoch": 14.35, "learning_rate": 9.238808564526303e-08, "loss": 0.0001, "step": 5360 }, { "epoch": 14.36, "learning_rate": 9.082458892471834e-08, "loss": 0.0007, "step": 5362 }, { "epoch": 14.36, "learning_rate": 8.927437463495558e-08, "loss": 0.0003, "step": 5364 }, { "epoch": 14.37, "learning_rate": 8.773744485390811e-08, "loss": 0.0001, "step": 5366 }, { "epoch": 14.37, "learning_rate": 8.621380164170467e-08, "loss": 0.0005, "step": 5368 }, { "epoch": 14.38, "learning_rate": 8.470344704066047e-08, "loss": 0.0011, "step": 5370 }, { "epoch": 14.38, "learning_rate": 8.320638307528384e-08, "loss": 0.0003, "step": 5372 }, { "epoch": 14.39, "learning_rate": 8.172261175226293e-08, "loss": 0.0001, "step": 5374 }, { "epoch": 14.39, "learning_rate": 8.025213506047346e-08, "loss": 0.0001, "step": 5376 }, { "epoch": 14.4, "learning_rate": 7.879495497096434e-08, "loss": 0.0003, "step": 5378 }, { "epoch": 14.4, "learning_rate": 7.735107343696868e-08, "loss": 0.0002, "step": 5380 }, { "epoch": 14.41, "learning_rate": 7.592049239388721e-08, "loss": 0.0003, "step": 5382 }, { "epoch": 14.41, "learning_rate": 7.450321375929825e-08, "loss": 0.0001, "step": 5384 }, { "epoch": 14.42, "learning_rate": 7.309923943294439e-08, "loss": 0.0003, "step": 5386 }, { "epoch": 14.43, "learning_rate": 7.170857129673914e-08, "loss": 0.0001, "step": 5388 }, { "epoch": 14.43, "learning_rate": 7.033121121475694e-08, "loss": 0.0003, "step": 5390 }, { "epoch": 14.44, "learning_rate": 6.89671610332343e-08, "loss": 0.0001, "step": 5392 }, { "epoch": 14.44, "learning_rate": 6.761642258056977e-08, "loss": 0.0001, "step": 5394 }, { "epoch": 14.45, "learning_rate": 6.627899766731728e-08, "loss": 0.0002, "step": 5396 }, { "epoch": 14.45, "learning_rate": 6.495488808618055e-08, "loss": 0.0003, "step": 5398 }, { "epoch": 14.46, "learning_rate": 6.364409561202323e-08, "loss": 0.0002, "step": 5400 }, { "epoch": 14.46, "learning_rate": 6.234662200185204e-08, "loss": 0.0001, "step": 5402 }, { "epoch": 14.47, "learning_rate": 6.10624689948236e-08, "loss": 0.0001, "step": 5404 }, { "epoch": 14.47, "learning_rate": 5.979163831223988e-08, "loss": 0.005, "step": 5406 }, { "epoch": 14.48, "learning_rate": 5.8534131657542735e-08, "loss": 0.0045, "step": 5408 }, { "epoch": 14.48, "learning_rate": 5.728995071631716e-08, "loss": 0.0002, "step": 5410 }, { "epoch": 14.49, "learning_rate": 5.605909715628355e-08, "loss": 0.0031, "step": 5412 }, { "epoch": 14.5, "learning_rate": 5.484157262730105e-08, "loss": 0.0003, "step": 5414 }, { "epoch": 14.5, "learning_rate": 5.3637378761359774e-08, "loss": 0.0002, "step": 5416 }, { "epoch": 14.51, "learning_rate": 5.244651717258076e-08, "loss": 0.0002, "step": 5418 }, { "epoch": 14.51, "learning_rate": 5.1268989457218254e-08, "loss": 0.0005, "step": 5420 }, { "epoch": 14.52, "learning_rate": 5.010479719364969e-08, "loss": 0.0003, "step": 5422 }, { "epoch": 14.52, "learning_rate": 4.895394194237901e-08, "loss": 0.0001, "step": 5424 }, { "epoch": 14.53, "learning_rate": 4.781642524603114e-08, "loss": 0.0002, "step": 5426 }, { "epoch": 14.53, "learning_rate": 4.6692248629354174e-08, "loss": 0.0001, "step": 5428 }, { "epoch": 14.54, "learning_rate": 4.558141359921386e-08, "loss": 0.0002, "step": 5430 }, { "epoch": 14.54, "learning_rate": 4.448392164459359e-08, "loss": 0.0004, "step": 5432 }, { "epoch": 14.55, "learning_rate": 4.339977423658881e-08, "loss": 0.0001, "step": 5434 }, { "epoch": 14.55, "learning_rate": 4.232897282841153e-08, "loss": 0.0118, "step": 5436 }, { "epoch": 14.56, "learning_rate": 4.127151885538139e-08, "loss": 0.0002, "step": 5438 }, { "epoch": 14.56, "learning_rate": 4.0227413734926776e-08, "loss": 0.0003, "step": 5440 }, { "epoch": 14.57, "learning_rate": 3.919665886658708e-08, "loss": 0.0001, "step": 5442 }, { "epoch": 14.58, "learning_rate": 3.817925563200375e-08, "loss": 0.0, "step": 5444 }, { "epoch": 14.58, "learning_rate": 3.7175205394919255e-08, "loss": 0.0002, "step": 5446 }, { "epoch": 14.59, "learning_rate": 3.6184509501182574e-08, "loss": 0.0001, "step": 5448 }, { "epoch": 14.59, "learning_rate": 3.520716927873813e-08, "loss": 0.0001, "step": 5450 }, { "epoch": 14.6, "learning_rate": 3.424318603763022e-08, "loss": 0.0001, "step": 5452 }, { "epoch": 14.6, "learning_rate": 3.3292561069998566e-08, "loss": 0.0003, "step": 5454 }, { "epoch": 14.61, "learning_rate": 3.2355295650077226e-08, "loss": 0.0003, "step": 5456 }, { "epoch": 14.61, "learning_rate": 3.143139103419346e-08, "loss": 0.0001, "step": 5458 }, { "epoch": 14.62, "learning_rate": 3.0520848460765525e-08, "loss": 0.0001, "step": 5460 }, { "epoch": 14.62, "learning_rate": 2.962366915029824e-08, "loss": 0.0001, "step": 5462 }, { "epoch": 14.63, "learning_rate": 2.8739854305389614e-08, "loss": 0.0002, "step": 5464 }, { "epoch": 14.63, "learning_rate": 2.7869405110718674e-08, "loss": 0.0003, "step": 5466 }, { "epoch": 14.64, "learning_rate": 2.7012322733049878e-08, "loss": 0.0003, "step": 5468 }, { "epoch": 14.65, "learning_rate": 2.6168608321233134e-08, "loss": 0.0004, "step": 5470 }, { "epoch": 14.65, "learning_rate": 2.533826300619602e-08, "loss": 0.0004, "step": 5472 }, { "epoch": 14.66, "learning_rate": 2.452128790094932e-08, "loss": 0.0002, "step": 5474 }, { "epoch": 14.66, "learning_rate": 2.3717684100579286e-08, "loss": 0.0005, "step": 5476 }, { "epoch": 14.67, "learning_rate": 2.292745268225205e-08, "loss": 0.0002, "step": 5478 }, { "epoch": 14.67, "learning_rate": 2.2150594705206972e-08, "loss": 0.0001, "step": 5480 }, { "epoch": 14.68, "learning_rate": 2.138711121075665e-08, "loss": 0.0007, "step": 5482 }, { "epoch": 14.68, "learning_rate": 2.0637003222289122e-08, "loss": 0.0001, "step": 5484 }, { "epoch": 14.69, "learning_rate": 1.9900271745261214e-08, "loss": 0.0012, "step": 5486 }, { "epoch": 14.69, "learning_rate": 1.9176917767201876e-08, "loss": 0.0005, "step": 5488 }, { "epoch": 14.7, "learning_rate": 1.846694225770551e-08, "loss": 0.0001, "step": 5490 }, { "epoch": 14.7, "learning_rate": 1.7770346168437535e-08, "loss": 0.0011, "step": 5492 }, { "epoch": 14.71, "learning_rate": 1.7087130433126596e-08, "loss": 0.0004, "step": 5494 }, { "epoch": 14.71, "learning_rate": 1.6417295967565695e-08, "loss": 0.0002, "step": 5496 }, { "epoch": 14.72, "learning_rate": 1.5760843669615498e-08, "loss": 0.0009, "step": 5498 }, { "epoch": 14.73, "learning_rate": 1.5117774419193265e-08, "loss": 0.0019, "step": 5500 }, { "epoch": 14.73, "learning_rate": 1.4488089078280587e-08, "loss": 0.0005, "step": 5502 }, { "epoch": 14.74, "learning_rate": 1.387178849092008e-08, "loss": 0.0002, "step": 5504 }, { "epoch": 14.74, "learning_rate": 1.326887348321093e-08, "loss": 0.0001, "step": 5506 }, { "epoch": 14.75, "learning_rate": 1.2679344863311127e-08, "loss": 0.0003, "step": 5508 }, { "epoch": 14.75, "learning_rate": 1.2103203421434117e-08, "loss": 0.0007, "step": 5510 }, { "epoch": 14.76, "learning_rate": 1.1540449929851038e-08, "loss": 0.0004, "step": 5512 }, { "epoch": 14.76, "learning_rate": 1.099108514288627e-08, "loss": 0.0002, "step": 5514 }, { "epoch": 14.77, "learning_rate": 1.0455109796916329e-08, "loss": 0.0003, "step": 5516 }, { "epoch": 14.77, "learning_rate": 9.932524610374305e-09, "loss": 0.0007, "step": 5518 }, { "epoch": 14.78, "learning_rate": 9.423330283742093e-09, "loss": 0.0002, "step": 5520 }, { "epoch": 14.78, "learning_rate": 8.927527499550393e-09, "loss": 0.0002, "step": 5522 }, { "epoch": 14.79, "learning_rate": 8.445116922384255e-09, "loss": 0.0001, "step": 5524 }, { "epoch": 14.8, "learning_rate": 7.97609919887421e-09, "loss": 0.0003, "step": 5526 }, { "epoch": 14.8, "learning_rate": 7.520474957699586e-09, "loss": 0.0004, "step": 5528 }, { "epoch": 14.81, "learning_rate": 7.078244809587409e-09, "loss": 0.0002, "step": 5530 }, { "epoch": 14.81, "learning_rate": 6.649409347310176e-09, "loss": 0.0008, "step": 5532 }, { "epoch": 14.82, "learning_rate": 6.2339691456869685e-09, "loss": 0.0004, "step": 5534 }, { "epoch": 14.82, "learning_rate": 5.831924761581231e-09, "loss": 0.0002, "step": 5536 }, { "epoch": 14.83, "learning_rate": 5.4432767338985506e-09, "loss": 0.0003, "step": 5538 }, { "epoch": 14.83, "learning_rate": 5.068025583592207e-09, "loss": 0.0002, "step": 5540 }, { "epoch": 14.84, "learning_rate": 4.706171813653182e-09, "loss": 0.0003, "step": 5542 }, { "epoch": 14.84, "learning_rate": 4.357715909116822e-09, "loss": 0.0001, "step": 5544 }, { "epoch": 14.85, "learning_rate": 4.022658337060614e-09, "loss": 0.0008, "step": 5546 }, { "epoch": 14.85, "learning_rate": 3.7009995466008586e-09, "loss": 0.0006, "step": 5548 }, { "epoch": 14.86, "learning_rate": 3.3927399688948868e-09, "loss": 0.0001, "step": 5550 }, { "epoch": 14.86, "learning_rate": 3.097880017137733e-09, "loss": 0.0003, "step": 5552 }, { "epoch": 14.87, "learning_rate": 2.8164200865665737e-09, "loss": 0.0004, "step": 5554 }, { "epoch": 14.88, "learning_rate": 2.548360554455176e-09, "loss": 0.0002, "step": 5556 }, { "epoch": 14.88, "learning_rate": 2.2937017801138996e-09, "loss": 0.0002, "step": 5558 }, { "epoch": 14.89, "learning_rate": 2.052444104891915e-09, "loss": 0.0002, "step": 5560 }, { "epoch": 14.89, "learning_rate": 1.8245878521772065e-09, "loss": 0.0001, "step": 5562 }, { "epoch": 14.9, "learning_rate": 1.6101333273899067e-09, "loss": 0.0002, "step": 5564 }, { "epoch": 14.9, "learning_rate": 1.4090808179889614e-09, "loss": 0.0007, "step": 5566 }, { "epoch": 14.91, "learning_rate": 1.2214305934699078e-09, "loss": 0.0004, "step": 5568 }, { "epoch": 14.91, "learning_rate": 1.0471829053615434e-09, "loss": 0.0002, "step": 5570 }, { "epoch": 14.92, "learning_rate": 8.863379872281474e-10, "loss": 0.0008, "step": 5572 }, { "epoch": 14.92, "learning_rate": 7.388960546694801e-10, "loss": 0.0002, "step": 5574 }, { "epoch": 14.93, "learning_rate": 6.048573053196726e-10, "loss": 0.0005, "step": 5576 }, { "epoch": 14.93, "learning_rate": 4.842219188461173e-10, "loss": 0.0007, "step": 5578 }, { "epoch": 14.94, "learning_rate": 3.7699005695057687e-10, "loss": 0.0002, "step": 5580 }, { "epoch": 14.95, "learning_rate": 2.831618633680755e-10, "loss": 0.0009, "step": 5582 }, { "epoch": 14.95, "learning_rate": 2.0273746386911818e-10, "loss": 0.0001, "step": 5584 }, { "epoch": 14.96, "learning_rate": 1.3571696625414022e-10, "loss": 0.0001, "step": 5586 }, { "epoch": 14.96, "learning_rate": 8.210046035905805e-11, "loss": 0.001, "step": 5588 }, { "epoch": 14.97, "learning_rate": 4.188801805304898e-11, "loss": 0.0002, "step": 5590 }, { "epoch": 14.97, "learning_rate": 1.5079693236330627e-11, "loss": 0.0001, "step": 5592 }, { "epoch": 14.98, "learning_rate": 1.6755218446018462e-12, "loss": 0.0002, "step": 5594 } ], "logging_steps": 2, "max_steps": 5595, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 3.0, "total_flos": 34211827015680.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }