{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984008395592314, "eval_steps": 1500, "global_step": 10004, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00019989505509607455, "grad_norm": 12.8125, "learning_rate": 1.0000000000000001e-07, "loss": 1.476, "step": 1 }, { "epoch": 0.00019989505509607455, "eval_loss": 1.7460540533065796, "eval_runtime": 595.8959, "eval_samples_per_second": 3.588, "eval_steps_per_second": 3.588, "step": 1 }, { "epoch": 0.0003997901101921491, "grad_norm": 12.625, "learning_rate": 2.0000000000000002e-07, "loss": 1.6568, "step": 2 }, { "epoch": 0.0005996851652882237, "grad_norm": 13.125, "learning_rate": 3.0000000000000004e-07, "loss": 1.5199, "step": 3 }, { "epoch": 0.0007995802203842982, "grad_norm": 11.75, "learning_rate": 4.0000000000000003e-07, "loss": 1.5059, "step": 4 }, { "epoch": 0.0009994752754803728, "grad_norm": 10.125, "learning_rate": 5.000000000000001e-07, "loss": 1.568, "step": 5 }, { "epoch": 0.0011993703305764475, "grad_norm": 10.5625, "learning_rate": 6.000000000000001e-07, "loss": 1.5257, "step": 6 }, { "epoch": 0.001399265385672522, "grad_norm": 10.6875, "learning_rate": 7.000000000000001e-07, "loss": 1.4327, "step": 7 }, { "epoch": 0.0015991604407685964, "grad_norm": 9.1875, "learning_rate": 8.000000000000001e-07, "loss": 1.5052, "step": 8 }, { "epoch": 0.001799055495864671, "grad_norm": 9.5625, "learning_rate": 9.000000000000001e-07, "loss": 1.5233, "step": 9 }, { "epoch": 0.0019989505509607455, "grad_norm": 8.875, "learning_rate": 1.0000000000000002e-06, "loss": 1.5549, "step": 10 }, { "epoch": 0.0021988456060568202, "grad_norm": 7.4375, "learning_rate": 1.1e-06, "loss": 1.5139, "step": 11 }, { "epoch": 0.002398740661152895, "grad_norm": 6.625, "learning_rate": 1.2000000000000002e-06, "loss": 1.4168, "step": 12 }, { "epoch": 0.002598635716248969, "grad_norm": 6.65625, "learning_rate": 1.3e-06, "loss": 1.452, "step": 13 }, { "epoch": 0.002798530771345044, "grad_norm": 5.6875, "learning_rate": 1.4000000000000001e-06, "loss": 1.384, "step": 14 }, { "epoch": 0.0029984258264411185, "grad_norm": 5.1875, "learning_rate": 1.5e-06, "loss": 1.4018, "step": 15 }, { "epoch": 0.003198320881537193, "grad_norm": 5.09375, "learning_rate": 1.6000000000000001e-06, "loss": 1.3572, "step": 16 }, { "epoch": 0.0033982159366332675, "grad_norm": 4.9375, "learning_rate": 1.7000000000000002e-06, "loss": 1.3639, "step": 17 }, { "epoch": 0.003598110991729342, "grad_norm": 4.84375, "learning_rate": 1.8000000000000001e-06, "loss": 1.4174, "step": 18 }, { "epoch": 0.003798006046825417, "grad_norm": 4.625, "learning_rate": 1.9000000000000002e-06, "loss": 1.3139, "step": 19 }, { "epoch": 0.003997901101921491, "grad_norm": 4.4375, "learning_rate": 2.0000000000000003e-06, "loss": 1.468, "step": 20 }, { "epoch": 0.004197796157017566, "grad_norm": 4.09375, "learning_rate": 2.1000000000000002e-06, "loss": 1.3801, "step": 21 }, { "epoch": 0.0043976912121136405, "grad_norm": 3.609375, "learning_rate": 2.2e-06, "loss": 1.3115, "step": 22 }, { "epoch": 0.004597586267209715, "grad_norm": 3.96875, "learning_rate": 2.3000000000000004e-06, "loss": 1.4058, "step": 23 }, { "epoch": 0.00479748132230579, "grad_norm": 3.828125, "learning_rate": 2.4000000000000003e-06, "loss": 1.3053, "step": 24 }, { "epoch": 0.004997376377401864, "grad_norm": 3.671875, "learning_rate": 2.5e-06, "loss": 1.285, "step": 25 }, { "epoch": 0.005197271432497938, "grad_norm": 3.8125, "learning_rate": 2.6e-06, "loss": 1.4823, "step": 26 }, { "epoch": 0.005397166487594013, "grad_norm": 3.484375, "learning_rate": 2.7000000000000004e-06, "loss": 1.3546, "step": 27 }, { "epoch": 0.005597061542690088, "grad_norm": 3.53125, "learning_rate": 2.8000000000000003e-06, "loss": 1.3501, "step": 28 }, { "epoch": 0.005796956597786162, "grad_norm": 3.8125, "learning_rate": 2.9e-06, "loss": 1.3478, "step": 29 }, { "epoch": 0.005996851652882237, "grad_norm": 3.28125, "learning_rate": 3e-06, "loss": 1.2747, "step": 30 }, { "epoch": 0.006196746707978312, "grad_norm": 3.515625, "learning_rate": 3.1000000000000004e-06, "loss": 1.4141, "step": 31 }, { "epoch": 0.006396641763074386, "grad_norm": 3.3125, "learning_rate": 3.2000000000000003e-06, "loss": 1.3632, "step": 32 }, { "epoch": 0.00659653681817046, "grad_norm": 2.90625, "learning_rate": 3.3000000000000006e-06, "loss": 1.2251, "step": 33 }, { "epoch": 0.006796431873266535, "grad_norm": 3.03125, "learning_rate": 3.4000000000000005e-06, "loss": 1.3892, "step": 34 }, { "epoch": 0.00699632692836261, "grad_norm": 3.015625, "learning_rate": 3.5e-06, "loss": 1.3128, "step": 35 }, { "epoch": 0.007196221983458684, "grad_norm": 2.765625, "learning_rate": 3.6000000000000003e-06, "loss": 1.2615, "step": 36 }, { "epoch": 0.007396117038554759, "grad_norm": 3.25, "learning_rate": 3.7e-06, "loss": 1.3802, "step": 37 }, { "epoch": 0.007596012093650834, "grad_norm": 2.9375, "learning_rate": 3.8000000000000005e-06, "loss": 1.3061, "step": 38 }, { "epoch": 0.0077959071487469075, "grad_norm": 2.828125, "learning_rate": 3.900000000000001e-06, "loss": 1.2044, "step": 39 }, { "epoch": 0.007995802203842982, "grad_norm": 2.671875, "learning_rate": 4.000000000000001e-06, "loss": 1.3376, "step": 40 }, { "epoch": 0.008195697258939058, "grad_norm": 2.671875, "learning_rate": 4.1e-06, "loss": 1.3461, "step": 41 }, { "epoch": 0.008395592314035132, "grad_norm": 2.53125, "learning_rate": 4.2000000000000004e-06, "loss": 1.2514, "step": 42 }, { "epoch": 0.008595487369131205, "grad_norm": 2.5625, "learning_rate": 4.3e-06, "loss": 1.2735, "step": 43 }, { "epoch": 0.008795382424227281, "grad_norm": 2.578125, "learning_rate": 4.4e-06, "loss": 1.2665, "step": 44 }, { "epoch": 0.008995277479323355, "grad_norm": 2.59375, "learning_rate": 4.5e-06, "loss": 1.3754, "step": 45 }, { "epoch": 0.00919517253441943, "grad_norm": 2.578125, "learning_rate": 4.600000000000001e-06, "loss": 1.2434, "step": 46 }, { "epoch": 0.009395067589515504, "grad_norm": 2.515625, "learning_rate": 4.7e-06, "loss": 1.3023, "step": 47 }, { "epoch": 0.00959496264461158, "grad_norm": 2.4375, "learning_rate": 4.800000000000001e-06, "loss": 1.2093, "step": 48 }, { "epoch": 0.009794857699707653, "grad_norm": 2.421875, "learning_rate": 4.9000000000000005e-06, "loss": 1.3093, "step": 49 }, { "epoch": 0.009994752754803727, "grad_norm": 2.5, "learning_rate": 5e-06, "loss": 1.299, "step": 50 }, { "epoch": 0.010194647809899803, "grad_norm": 2.578125, "learning_rate": 5.1e-06, "loss": 1.2736, "step": 51 }, { "epoch": 0.010394542864995877, "grad_norm": 2.546875, "learning_rate": 5.2e-06, "loss": 1.2607, "step": 52 }, { "epoch": 0.010594437920091952, "grad_norm": 2.5, "learning_rate": 5.300000000000001e-06, "loss": 1.2765, "step": 53 }, { "epoch": 0.010794332975188026, "grad_norm": 2.390625, "learning_rate": 5.400000000000001e-06, "loss": 1.2538, "step": 54 }, { "epoch": 0.010994228030284102, "grad_norm": 2.328125, "learning_rate": 5.500000000000001e-06, "loss": 1.2165, "step": 55 }, { "epoch": 0.011194123085380175, "grad_norm": 2.40625, "learning_rate": 5.600000000000001e-06, "loss": 1.2152, "step": 56 }, { "epoch": 0.01139401814047625, "grad_norm": 2.375, "learning_rate": 5.7e-06, "loss": 1.2222, "step": 57 }, { "epoch": 0.011593913195572325, "grad_norm": 2.375, "learning_rate": 5.8e-06, "loss": 1.2779, "step": 58 }, { "epoch": 0.011793808250668399, "grad_norm": 2.375, "learning_rate": 5.9e-06, "loss": 1.239, "step": 59 }, { "epoch": 0.011993703305764474, "grad_norm": 2.34375, "learning_rate": 6e-06, "loss": 1.3348, "step": 60 }, { "epoch": 0.012193598360860548, "grad_norm": 2.28125, "learning_rate": 6.1e-06, "loss": 1.2105, "step": 61 }, { "epoch": 0.012393493415956624, "grad_norm": 2.4375, "learning_rate": 6.200000000000001e-06, "loss": 1.2447, "step": 62 }, { "epoch": 0.012593388471052697, "grad_norm": 2.453125, "learning_rate": 6.300000000000001e-06, "loss": 1.2812, "step": 63 }, { "epoch": 0.012793283526148771, "grad_norm": 2.46875, "learning_rate": 6.4000000000000006e-06, "loss": 1.1675, "step": 64 }, { "epoch": 0.012993178581244847, "grad_norm": 2.390625, "learning_rate": 6.5000000000000004e-06, "loss": 1.3107, "step": 65 }, { "epoch": 0.01319307363634092, "grad_norm": 2.28125, "learning_rate": 6.600000000000001e-06, "loss": 1.2161, "step": 66 }, { "epoch": 0.013392968691436996, "grad_norm": 2.40625, "learning_rate": 6.700000000000001e-06, "loss": 1.2651, "step": 67 }, { "epoch": 0.01359286374653307, "grad_norm": 2.4375, "learning_rate": 6.800000000000001e-06, "loss": 1.2281, "step": 68 }, { "epoch": 0.013792758801629145, "grad_norm": 2.515625, "learning_rate": 6.9e-06, "loss": 1.3876, "step": 69 }, { "epoch": 0.01399265385672522, "grad_norm": 2.578125, "learning_rate": 7e-06, "loss": 1.3301, "step": 70 }, { "epoch": 0.014192548911821293, "grad_norm": 2.28125, "learning_rate": 7.100000000000001e-06, "loss": 1.234, "step": 71 }, { "epoch": 0.014392443966917369, "grad_norm": 2.5, "learning_rate": 7.2000000000000005e-06, "loss": 1.2861, "step": 72 }, { "epoch": 0.014592339022013442, "grad_norm": 2.53125, "learning_rate": 7.3e-06, "loss": 1.2072, "step": 73 }, { "epoch": 0.014792234077109518, "grad_norm": 2.484375, "learning_rate": 7.4e-06, "loss": 1.2894, "step": 74 }, { "epoch": 0.014992129132205592, "grad_norm": 2.328125, "learning_rate": 7.500000000000001e-06, "loss": 1.1828, "step": 75 }, { "epoch": 0.015192024187301667, "grad_norm": 2.34375, "learning_rate": 7.600000000000001e-06, "loss": 1.1208, "step": 76 }, { "epoch": 0.015391919242397741, "grad_norm": 2.21875, "learning_rate": 7.7e-06, "loss": 1.1869, "step": 77 }, { "epoch": 0.015591814297493815, "grad_norm": 2.453125, "learning_rate": 7.800000000000002e-06, "loss": 1.232, "step": 78 }, { "epoch": 0.01579170935258989, "grad_norm": 2.3125, "learning_rate": 7.9e-06, "loss": 1.1701, "step": 79 }, { "epoch": 0.015991604407685964, "grad_norm": 2.640625, "learning_rate": 8.000000000000001e-06, "loss": 1.1675, "step": 80 }, { "epoch": 0.01619149946278204, "grad_norm": 2.515625, "learning_rate": 8.1e-06, "loss": 1.3752, "step": 81 }, { "epoch": 0.016391394517878115, "grad_norm": 2.453125, "learning_rate": 8.2e-06, "loss": 1.1496, "step": 82 }, { "epoch": 0.016591289572974188, "grad_norm": 2.265625, "learning_rate": 8.3e-06, "loss": 1.1925, "step": 83 }, { "epoch": 0.016791184628070263, "grad_norm": 2.5, "learning_rate": 8.400000000000001e-06, "loss": 1.2526, "step": 84 }, { "epoch": 0.01699107968316634, "grad_norm": 2.296875, "learning_rate": 8.5e-06, "loss": 1.2725, "step": 85 }, { "epoch": 0.01719097473826241, "grad_norm": 2.484375, "learning_rate": 8.6e-06, "loss": 1.2927, "step": 86 }, { "epoch": 0.017390869793358486, "grad_norm": 2.375, "learning_rate": 8.700000000000001e-06, "loss": 1.2508, "step": 87 }, { "epoch": 0.017590764848454562, "grad_norm": 2.234375, "learning_rate": 8.8e-06, "loss": 1.164, "step": 88 }, { "epoch": 0.017790659903550637, "grad_norm": 2.40625, "learning_rate": 8.900000000000001e-06, "loss": 1.2274, "step": 89 }, { "epoch": 0.01799055495864671, "grad_norm": 2.234375, "learning_rate": 9e-06, "loss": 1.221, "step": 90 }, { "epoch": 0.018190450013742785, "grad_norm": 2.3125, "learning_rate": 9.100000000000001e-06, "loss": 1.2804, "step": 91 }, { "epoch": 0.01839034506883886, "grad_norm": 2.359375, "learning_rate": 9.200000000000002e-06, "loss": 1.1848, "step": 92 }, { "epoch": 0.018590240123934933, "grad_norm": 2.3125, "learning_rate": 9.3e-06, "loss": 1.2262, "step": 93 }, { "epoch": 0.018790135179031008, "grad_norm": 2.25, "learning_rate": 9.4e-06, "loss": 1.2016, "step": 94 }, { "epoch": 0.018990030234127084, "grad_norm": 2.359375, "learning_rate": 9.5e-06, "loss": 1.2149, "step": 95 }, { "epoch": 0.01918992528922316, "grad_norm": 2.578125, "learning_rate": 9.600000000000001e-06, "loss": 1.2341, "step": 96 }, { "epoch": 0.01938982034431923, "grad_norm": 2.390625, "learning_rate": 9.7e-06, "loss": 1.2795, "step": 97 }, { "epoch": 0.019589715399415307, "grad_norm": 2.21875, "learning_rate": 9.800000000000001e-06, "loss": 1.2028, "step": 98 }, { "epoch": 0.019789610454511383, "grad_norm": 2.234375, "learning_rate": 9.9e-06, "loss": 1.1827, "step": 99 }, { "epoch": 0.019989505509607455, "grad_norm": 2.328125, "learning_rate": 1e-05, "loss": 1.2169, "step": 100 }, { "epoch": 0.02018940056470353, "grad_norm": 2.28125, "learning_rate": 9.999999888950268e-06, "loss": 1.1576, "step": 101 }, { "epoch": 0.020389295619799606, "grad_norm": 2.234375, "learning_rate": 9.999999555801075e-06, "loss": 1.1927, "step": 102 }, { "epoch": 0.02058919067489568, "grad_norm": 2.40625, "learning_rate": 9.999999000552435e-06, "loss": 1.1975, "step": 103 }, { "epoch": 0.020789085729991753, "grad_norm": 2.390625, "learning_rate": 9.999998223204373e-06, "loss": 1.2024, "step": 104 }, { "epoch": 0.02098898078508783, "grad_norm": 2.328125, "learning_rate": 9.999997223756924e-06, "loss": 1.267, "step": 105 }, { "epoch": 0.021188875840183904, "grad_norm": 2.296875, "learning_rate": 9.99999600221013e-06, "loss": 1.2559, "step": 106 }, { "epoch": 0.021388770895279977, "grad_norm": 2.40625, "learning_rate": 9.999994558564052e-06, "loss": 1.179, "step": 107 }, { "epoch": 0.021588665950376052, "grad_norm": 2.34375, "learning_rate": 9.999992892818746e-06, "loss": 1.0606, "step": 108 }, { "epoch": 0.021788561005472128, "grad_norm": 2.46875, "learning_rate": 9.999991004974292e-06, "loss": 1.1812, "step": 109 }, { "epoch": 0.021988456060568203, "grad_norm": 2.265625, "learning_rate": 9.999988895030772e-06, "loss": 1.1652, "step": 110 }, { "epoch": 0.022188351115664275, "grad_norm": 2.171875, "learning_rate": 9.999986562988278e-06, "loss": 1.1498, "step": 111 }, { "epoch": 0.02238824617076035, "grad_norm": 2.359375, "learning_rate": 9.999984008846914e-06, "loss": 1.3272, "step": 112 }, { "epoch": 0.022588141225856426, "grad_norm": 2.296875, "learning_rate": 9.999981232606796e-06, "loss": 1.2065, "step": 113 }, { "epoch": 0.0227880362809525, "grad_norm": 2.296875, "learning_rate": 9.999978234268047e-06, "loss": 1.1944, "step": 114 }, { "epoch": 0.022987931336048574, "grad_norm": 2.234375, "learning_rate": 9.999975013830797e-06, "loss": 1.1866, "step": 115 }, { "epoch": 0.02318782639114465, "grad_norm": 2.1875, "learning_rate": 9.99997157129519e-06, "loss": 1.1906, "step": 116 }, { "epoch": 0.023387721446240725, "grad_norm": 2.265625, "learning_rate": 9.99996790666138e-06, "loss": 1.1623, "step": 117 }, { "epoch": 0.023587616501336797, "grad_norm": 2.53125, "learning_rate": 9.99996401992953e-06, "loss": 1.4069, "step": 118 }, { "epoch": 0.023787511556432873, "grad_norm": 2.421875, "learning_rate": 9.999959911099814e-06, "loss": 1.1261, "step": 119 }, { "epoch": 0.02398740661152895, "grad_norm": 2.515625, "learning_rate": 9.999955580172411e-06, "loss": 1.2397, "step": 120 }, { "epoch": 0.02418730166662502, "grad_norm": 2.40625, "learning_rate": 9.999951027147514e-06, "loss": 1.3055, "step": 121 }, { "epoch": 0.024387196721721096, "grad_norm": 2.484375, "learning_rate": 9.999946252025329e-06, "loss": 1.1119, "step": 122 }, { "epoch": 0.02458709177681717, "grad_norm": 2.390625, "learning_rate": 9.999941254806065e-06, "loss": 1.2665, "step": 123 }, { "epoch": 0.024786986831913247, "grad_norm": 2.328125, "learning_rate": 9.999936035489943e-06, "loss": 1.1914, "step": 124 }, { "epoch": 0.02498688188700932, "grad_norm": 2.171875, "learning_rate": 9.999930594077199e-06, "loss": 1.2627, "step": 125 }, { "epoch": 0.025186776942105395, "grad_norm": 2.453125, "learning_rate": 9.99992493056807e-06, "loss": 1.2579, "step": 126 }, { "epoch": 0.02538667199720147, "grad_norm": 2.265625, "learning_rate": 9.999919044962809e-06, "loss": 1.2031, "step": 127 }, { "epoch": 0.025586567052297542, "grad_norm": 2.296875, "learning_rate": 9.999912937261679e-06, "loss": 1.2567, "step": 128 }, { "epoch": 0.025786462107393618, "grad_norm": 2.34375, "learning_rate": 9.99990660746495e-06, "loss": 1.1828, "step": 129 }, { "epoch": 0.025986357162489693, "grad_norm": 2.203125, "learning_rate": 9.999900055572904e-06, "loss": 1.2224, "step": 130 }, { "epoch": 0.02618625221758577, "grad_norm": 2.21875, "learning_rate": 9.99989328158583e-06, "loss": 1.1482, "step": 131 }, { "epoch": 0.02638614727268184, "grad_norm": 2.359375, "learning_rate": 9.999886285504033e-06, "loss": 1.1253, "step": 132 }, { "epoch": 0.026586042327777917, "grad_norm": 2.265625, "learning_rate": 9.99987906732782e-06, "loss": 1.1303, "step": 133 }, { "epoch": 0.026785937382873992, "grad_norm": 2.359375, "learning_rate": 9.999871627057511e-06, "loss": 1.2155, "step": 134 }, { "epoch": 0.026985832437970064, "grad_norm": 2.390625, "learning_rate": 9.999863964693441e-06, "loss": 1.1278, "step": 135 }, { "epoch": 0.02718572749306614, "grad_norm": 2.40625, "learning_rate": 9.999856080235947e-06, "loss": 1.1915, "step": 136 }, { "epoch": 0.027385622548162215, "grad_norm": 2.265625, "learning_rate": 9.99984797368538e-06, "loss": 1.1856, "step": 137 }, { "epoch": 0.02758551760325829, "grad_norm": 2.265625, "learning_rate": 9.9998396450421e-06, "loss": 1.2587, "step": 138 }, { "epoch": 0.027785412658354363, "grad_norm": 2.578125, "learning_rate": 9.999831094306475e-06, "loss": 1.2584, "step": 139 }, { "epoch": 0.02798530771345044, "grad_norm": 2.359375, "learning_rate": 9.999822321478889e-06, "loss": 1.2475, "step": 140 }, { "epoch": 0.028185202768546514, "grad_norm": 2.375, "learning_rate": 9.999813326559728e-06, "loss": 1.149, "step": 141 }, { "epoch": 0.028385097823642586, "grad_norm": 3.03125, "learning_rate": 9.999804109549397e-06, "loss": 1.2239, "step": 142 }, { "epoch": 0.028584992878738662, "grad_norm": 2.234375, "learning_rate": 9.999794670448298e-06, "loss": 1.1599, "step": 143 }, { "epoch": 0.028784887933834737, "grad_norm": 2.328125, "learning_rate": 9.999785009256853e-06, "loss": 1.2722, "step": 144 }, { "epoch": 0.028984782988930813, "grad_norm": 2.140625, "learning_rate": 9.999775125975492e-06, "loss": 1.1292, "step": 145 }, { "epoch": 0.029184678044026885, "grad_norm": 2.171875, "learning_rate": 9.999765020604655e-06, "loss": 1.1786, "step": 146 }, { "epoch": 0.02938457309912296, "grad_norm": 2.203125, "learning_rate": 9.99975469314479e-06, "loss": 1.1699, "step": 147 }, { "epoch": 0.029584468154219036, "grad_norm": 2.328125, "learning_rate": 9.999744143596354e-06, "loss": 1.25, "step": 148 }, { "epoch": 0.029784363209315108, "grad_norm": 2.28125, "learning_rate": 9.99973337195982e-06, "loss": 1.2246, "step": 149 }, { "epoch": 0.029984258264411184, "grad_norm": 2.265625, "learning_rate": 9.999722378235661e-06, "loss": 1.173, "step": 150 }, { "epoch": 0.03018415331950726, "grad_norm": 2.265625, "learning_rate": 9.99971116242437e-06, "loss": 1.1347, "step": 151 }, { "epoch": 0.030384048374603335, "grad_norm": 2.109375, "learning_rate": 9.99969972452644e-06, "loss": 1.1403, "step": 152 }, { "epoch": 0.030583943429699407, "grad_norm": 2.25, "learning_rate": 9.999688064542385e-06, "loss": 1.2141, "step": 153 }, { "epoch": 0.030783838484795482, "grad_norm": 2.375, "learning_rate": 9.99967618247272e-06, "loss": 1.1917, "step": 154 }, { "epoch": 0.030983733539891558, "grad_norm": 2.46875, "learning_rate": 9.999664078317972e-06, "loss": 1.1414, "step": 155 }, { "epoch": 0.03118362859498763, "grad_norm": 2.265625, "learning_rate": 9.999651752078681e-06, "loss": 1.2256, "step": 156 }, { "epoch": 0.031383523650083706, "grad_norm": 2.25, "learning_rate": 9.999639203755392e-06, "loss": 1.1904, "step": 157 }, { "epoch": 0.03158341870517978, "grad_norm": 2.234375, "learning_rate": 9.999626433348664e-06, "loss": 1.2102, "step": 158 }, { "epoch": 0.03178331376027586, "grad_norm": 2.28125, "learning_rate": 9.999613440859064e-06, "loss": 1.1549, "step": 159 }, { "epoch": 0.03198320881537193, "grad_norm": 2.484375, "learning_rate": 9.999600226287168e-06, "loss": 1.1984, "step": 160 }, { "epoch": 0.032183103870468, "grad_norm": 2.3125, "learning_rate": 9.999586789633565e-06, "loss": 1.1532, "step": 161 }, { "epoch": 0.03238299892556408, "grad_norm": 2.40625, "learning_rate": 9.99957313089885e-06, "loss": 1.2764, "step": 162 }, { "epoch": 0.03258289398066015, "grad_norm": 2.484375, "learning_rate": 9.999559250083631e-06, "loss": 1.2102, "step": 163 }, { "epoch": 0.03278278903575623, "grad_norm": 2.265625, "learning_rate": 9.999545147188523e-06, "loss": 1.1573, "step": 164 }, { "epoch": 0.0329826840908523, "grad_norm": 2.25, "learning_rate": 9.999530822214154e-06, "loss": 1.2516, "step": 165 }, { "epoch": 0.033182579145948375, "grad_norm": 2.40625, "learning_rate": 9.99951627516116e-06, "loss": 1.2498, "step": 166 }, { "epoch": 0.033382474201044454, "grad_norm": 2.265625, "learning_rate": 9.999501506030187e-06, "loss": 1.0588, "step": 167 }, { "epoch": 0.033582369256140526, "grad_norm": 2.21875, "learning_rate": 9.999486514821892e-06, "loss": 1.1712, "step": 168 }, { "epoch": 0.0337822643112366, "grad_norm": 2.3125, "learning_rate": 9.999471301536938e-06, "loss": 1.0943, "step": 169 }, { "epoch": 0.03398215936633268, "grad_norm": 2.328125, "learning_rate": 9.999455866176004e-06, "loss": 1.1701, "step": 170 }, { "epoch": 0.03418205442142875, "grad_norm": 2.1875, "learning_rate": 9.999440208739775e-06, "loss": 1.216, "step": 171 }, { "epoch": 0.03438194947652482, "grad_norm": 2.25, "learning_rate": 9.999424329228944e-06, "loss": 1.1877, "step": 172 }, { "epoch": 0.0345818445316209, "grad_norm": 2.1875, "learning_rate": 9.999408227644221e-06, "loss": 1.191, "step": 173 }, { "epoch": 0.03478173958671697, "grad_norm": 2.3125, "learning_rate": 9.999391903986316e-06, "loss": 1.1894, "step": 174 }, { "epoch": 0.034981634641813045, "grad_norm": 2.21875, "learning_rate": 9.999375358255959e-06, "loss": 1.2301, "step": 175 }, { "epoch": 0.035181529696909124, "grad_norm": 2.25, "learning_rate": 9.99935859045388e-06, "loss": 1.1464, "step": 176 }, { "epoch": 0.035381424752005196, "grad_norm": 2.265625, "learning_rate": 9.999341600580827e-06, "loss": 1.1863, "step": 177 }, { "epoch": 0.035581319807101275, "grad_norm": 2.15625, "learning_rate": 9.999324388637554e-06, "loss": 1.1387, "step": 178 }, { "epoch": 0.03578121486219735, "grad_norm": 2.34375, "learning_rate": 9.999306954624826e-06, "loss": 1.2182, "step": 179 }, { "epoch": 0.03598110991729342, "grad_norm": 2.21875, "learning_rate": 9.999289298543417e-06, "loss": 1.1852, "step": 180 }, { "epoch": 0.0361810049723895, "grad_norm": 2.46875, "learning_rate": 9.999271420394112e-06, "loss": 1.1018, "step": 181 }, { "epoch": 0.03638090002748557, "grad_norm": 2.3125, "learning_rate": 9.999253320177704e-06, "loss": 1.3003, "step": 182 }, { "epoch": 0.03658079508258164, "grad_norm": 2.25, "learning_rate": 9.999234997894998e-06, "loss": 1.15, "step": 183 }, { "epoch": 0.03678069013767772, "grad_norm": 2.375, "learning_rate": 9.999216453546805e-06, "loss": 1.1683, "step": 184 }, { "epoch": 0.03698058519277379, "grad_norm": 2.3125, "learning_rate": 9.999197687133953e-06, "loss": 1.1442, "step": 185 }, { "epoch": 0.037180480247869865, "grad_norm": 2.421875, "learning_rate": 9.999178698657273e-06, "loss": 1.3008, "step": 186 }, { "epoch": 0.037380375302965944, "grad_norm": 2.515625, "learning_rate": 9.99915948811761e-06, "loss": 1.1976, "step": 187 }, { "epoch": 0.037580270358062016, "grad_norm": 2.1875, "learning_rate": 9.999140055515817e-06, "loss": 1.2784, "step": 188 }, { "epoch": 0.03778016541315809, "grad_norm": 2.34375, "learning_rate": 9.999120400852756e-06, "loss": 1.251, "step": 189 }, { "epoch": 0.03798006046825417, "grad_norm": 2.296875, "learning_rate": 9.999100524129299e-06, "loss": 1.2041, "step": 190 }, { "epoch": 0.03817995552335024, "grad_norm": 2.234375, "learning_rate": 9.999080425346333e-06, "loss": 1.2354, "step": 191 }, { "epoch": 0.03837985057844632, "grad_norm": 2.171875, "learning_rate": 9.999060104504746e-06, "loss": 1.1894, "step": 192 }, { "epoch": 0.03857974563354239, "grad_norm": 2.3125, "learning_rate": 9.999039561605445e-06, "loss": 1.2374, "step": 193 }, { "epoch": 0.03877964068863846, "grad_norm": 2.3125, "learning_rate": 9.99901879664934e-06, "loss": 1.1506, "step": 194 }, { "epoch": 0.03897953574373454, "grad_norm": 2.40625, "learning_rate": 9.998997809637354e-06, "loss": 1.1861, "step": 195 }, { "epoch": 0.039179430798830614, "grad_norm": 2.265625, "learning_rate": 9.998976600570418e-06, "loss": 1.1206, "step": 196 }, { "epoch": 0.039379325853926686, "grad_norm": 2.40625, "learning_rate": 9.998955169449479e-06, "loss": 1.1798, "step": 197 }, { "epoch": 0.039579220909022765, "grad_norm": 2.21875, "learning_rate": 9.998933516275482e-06, "loss": 1.1726, "step": 198 }, { "epoch": 0.03977911596411884, "grad_norm": 2.40625, "learning_rate": 9.998911641049393e-06, "loss": 1.2349, "step": 199 }, { "epoch": 0.03997901101921491, "grad_norm": 2.296875, "learning_rate": 9.998889543772182e-06, "loss": 1.0874, "step": 200 }, { "epoch": 0.04017890607431099, "grad_norm": 2.484375, "learning_rate": 9.998867224444832e-06, "loss": 1.141, "step": 201 }, { "epoch": 0.04037880112940706, "grad_norm": 2.359375, "learning_rate": 9.998844683068335e-06, "loss": 1.2468, "step": 202 }, { "epoch": 0.04057869618450313, "grad_norm": 2.328125, "learning_rate": 9.998821919643689e-06, "loss": 1.0987, "step": 203 }, { "epoch": 0.04077859123959921, "grad_norm": 2.265625, "learning_rate": 9.998798934171908e-06, "loss": 1.127, "step": 204 }, { "epoch": 0.040978486294695284, "grad_norm": 2.375, "learning_rate": 9.998775726654014e-06, "loss": 1.1358, "step": 205 }, { "epoch": 0.04117838134979136, "grad_norm": 2.1875, "learning_rate": 9.998752297091033e-06, "loss": 1.195, "step": 206 }, { "epoch": 0.041378276404887435, "grad_norm": 2.296875, "learning_rate": 9.99872864548401e-06, "loss": 1.142, "step": 207 }, { "epoch": 0.04157817145998351, "grad_norm": 2.1875, "learning_rate": 9.998704771833996e-06, "loss": 1.1762, "step": 208 }, { "epoch": 0.041778066515079586, "grad_norm": 2.390625, "learning_rate": 9.998680676142048e-06, "loss": 1.162, "step": 209 }, { "epoch": 0.04197796157017566, "grad_norm": 2.359375, "learning_rate": 9.99865635840924e-06, "loss": 1.2461, "step": 210 }, { "epoch": 0.04217785662527173, "grad_norm": 2.3125, "learning_rate": 9.998631818636648e-06, "loss": 1.159, "step": 211 }, { "epoch": 0.04237775168036781, "grad_norm": 2.421875, "learning_rate": 9.998607056825367e-06, "loss": 1.1312, "step": 212 }, { "epoch": 0.04257764673546388, "grad_norm": 2.203125, "learning_rate": 9.998582072976493e-06, "loss": 1.1496, "step": 213 }, { "epoch": 0.04277754179055995, "grad_norm": 2.1875, "learning_rate": 9.998556867091137e-06, "loss": 1.1292, "step": 214 }, { "epoch": 0.04297743684565603, "grad_norm": 2.296875, "learning_rate": 9.99853143917042e-06, "loss": 1.1355, "step": 215 }, { "epoch": 0.043177331900752104, "grad_norm": 2.390625, "learning_rate": 9.998505789215469e-06, "loss": 1.2097, "step": 216 }, { "epoch": 0.043377226955848176, "grad_norm": 2.28125, "learning_rate": 9.998479917227427e-06, "loss": 1.2135, "step": 217 }, { "epoch": 0.043577122010944255, "grad_norm": 2.25, "learning_rate": 9.998453823207437e-06, "loss": 1.1332, "step": 218 }, { "epoch": 0.04377701706604033, "grad_norm": 2.390625, "learning_rate": 9.998427507156665e-06, "loss": 1.1971, "step": 219 }, { "epoch": 0.043976912121136406, "grad_norm": 2.25, "learning_rate": 9.998400969076276e-06, "loss": 1.1738, "step": 220 }, { "epoch": 0.04417680717623248, "grad_norm": 2.3125, "learning_rate": 9.998374208967451e-06, "loss": 1.2697, "step": 221 }, { "epoch": 0.04437670223132855, "grad_norm": 2.421875, "learning_rate": 9.998347226831375e-06, "loss": 1.1589, "step": 222 }, { "epoch": 0.04457659728642463, "grad_norm": 2.359375, "learning_rate": 9.998320022669253e-06, "loss": 1.2381, "step": 223 }, { "epoch": 0.0447764923415207, "grad_norm": 2.1875, "learning_rate": 9.998292596482286e-06, "loss": 1.1596, "step": 224 }, { "epoch": 0.044976387396616774, "grad_norm": 2.328125, "learning_rate": 9.998264948271696e-06, "loss": 1.2429, "step": 225 }, { "epoch": 0.04517628245171285, "grad_norm": 2.1875, "learning_rate": 9.998237078038712e-06, "loss": 1.1608, "step": 226 }, { "epoch": 0.045376177506808925, "grad_norm": 2.265625, "learning_rate": 9.99820898578457e-06, "loss": 1.1371, "step": 227 }, { "epoch": 0.045576072561905, "grad_norm": 2.265625, "learning_rate": 9.99818067151052e-06, "loss": 1.1286, "step": 228 }, { "epoch": 0.045775967617001076, "grad_norm": 2.28125, "learning_rate": 9.998152135217816e-06, "loss": 1.2278, "step": 229 }, { "epoch": 0.04597586267209715, "grad_norm": 2.328125, "learning_rate": 9.99812337690773e-06, "loss": 1.1407, "step": 230 }, { "epoch": 0.04617575772719322, "grad_norm": 2.4375, "learning_rate": 9.998094396581538e-06, "loss": 1.1912, "step": 231 }, { "epoch": 0.0463756527822893, "grad_norm": 2.296875, "learning_rate": 9.998065194240524e-06, "loss": 1.252, "step": 232 }, { "epoch": 0.04657554783738537, "grad_norm": 2.296875, "learning_rate": 9.99803576988599e-06, "loss": 1.166, "step": 233 }, { "epoch": 0.04677544289248145, "grad_norm": 2.15625, "learning_rate": 9.99800612351924e-06, "loss": 1.1465, "step": 234 }, { "epoch": 0.04697533794757752, "grad_norm": 2.25, "learning_rate": 9.997976255141593e-06, "loss": 1.23, "step": 235 }, { "epoch": 0.047175233002673594, "grad_norm": 2.46875, "learning_rate": 9.997946164754373e-06, "loss": 1.1665, "step": 236 }, { "epoch": 0.04737512805776967, "grad_norm": 2.28125, "learning_rate": 9.99791585235892e-06, "loss": 1.1391, "step": 237 }, { "epoch": 0.047575023112865746, "grad_norm": 2.21875, "learning_rate": 9.997885317956577e-06, "loss": 1.152, "step": 238 }, { "epoch": 0.04777491816796182, "grad_norm": 2.234375, "learning_rate": 9.997854561548702e-06, "loss": 1.153, "step": 239 }, { "epoch": 0.0479748132230579, "grad_norm": 2.328125, "learning_rate": 9.99782358313666e-06, "loss": 1.189, "step": 240 }, { "epoch": 0.04817470827815397, "grad_norm": 2.21875, "learning_rate": 9.997792382721831e-06, "loss": 1.1307, "step": 241 }, { "epoch": 0.04837460333325004, "grad_norm": 2.09375, "learning_rate": 9.997760960305598e-06, "loss": 1.0662, "step": 242 }, { "epoch": 0.04857449838834612, "grad_norm": 2.1875, "learning_rate": 9.997729315889356e-06, "loss": 1.0934, "step": 243 }, { "epoch": 0.04877439344344219, "grad_norm": 2.125, "learning_rate": 9.99769744947451e-06, "loss": 1.2033, "step": 244 }, { "epoch": 0.048974288498538264, "grad_norm": 2.1875, "learning_rate": 9.99766536106248e-06, "loss": 1.153, "step": 245 }, { "epoch": 0.04917418355363434, "grad_norm": 2.234375, "learning_rate": 9.997633050654687e-06, "loss": 1.172, "step": 246 }, { "epoch": 0.049374078608730415, "grad_norm": 2.40625, "learning_rate": 9.997600518252568e-06, "loss": 1.23, "step": 247 }, { "epoch": 0.049573973663826494, "grad_norm": 2.328125, "learning_rate": 9.997567763857566e-06, "loss": 1.1873, "step": 248 }, { "epoch": 0.049773868718922566, "grad_norm": 2.375, "learning_rate": 9.997534787471139e-06, "loss": 1.1962, "step": 249 }, { "epoch": 0.04997376377401864, "grad_norm": 2.171875, "learning_rate": 9.997501589094752e-06, "loss": 1.1827, "step": 250 }, { "epoch": 0.05017365882911472, "grad_norm": 2.140625, "learning_rate": 9.997468168729876e-06, "loss": 1.0892, "step": 251 }, { "epoch": 0.05037355388421079, "grad_norm": 2.28125, "learning_rate": 9.997434526377998e-06, "loss": 1.1739, "step": 252 }, { "epoch": 0.05057344893930686, "grad_norm": 2.234375, "learning_rate": 9.997400662040613e-06, "loss": 1.0799, "step": 253 }, { "epoch": 0.05077334399440294, "grad_norm": 2.234375, "learning_rate": 9.997366575719223e-06, "loss": 1.1721, "step": 254 }, { "epoch": 0.05097323904949901, "grad_norm": 2.25, "learning_rate": 9.997332267415345e-06, "loss": 1.2029, "step": 255 }, { "epoch": 0.051173134104595085, "grad_norm": 2.15625, "learning_rate": 9.9972977371305e-06, "loss": 1.1735, "step": 256 }, { "epoch": 0.051373029159691164, "grad_norm": 2.296875, "learning_rate": 9.997262984866225e-06, "loss": 1.1807, "step": 257 }, { "epoch": 0.051572924214787236, "grad_norm": 2.453125, "learning_rate": 9.997228010624061e-06, "loss": 1.2564, "step": 258 }, { "epoch": 0.05177281926988331, "grad_norm": 2.171875, "learning_rate": 9.997192814405561e-06, "loss": 1.1182, "step": 259 }, { "epoch": 0.05197271432497939, "grad_norm": 2.1875, "learning_rate": 9.99715739621229e-06, "loss": 1.2241, "step": 260 }, { "epoch": 0.05217260938007546, "grad_norm": 2.203125, "learning_rate": 9.997121756045824e-06, "loss": 1.1293, "step": 261 }, { "epoch": 0.05237250443517154, "grad_norm": 2.265625, "learning_rate": 9.997085893907742e-06, "loss": 1.1547, "step": 262 }, { "epoch": 0.05257239949026761, "grad_norm": 2.34375, "learning_rate": 9.997049809799639e-06, "loss": 1.1602, "step": 263 }, { "epoch": 0.05277229454536368, "grad_norm": 2.3125, "learning_rate": 9.997013503723116e-06, "loss": 1.157, "step": 264 }, { "epoch": 0.05297218960045976, "grad_norm": 2.421875, "learning_rate": 9.996976975679786e-06, "loss": 1.186, "step": 265 }, { "epoch": 0.05317208465555583, "grad_norm": 2.1875, "learning_rate": 9.996940225671275e-06, "loss": 1.1212, "step": 266 }, { "epoch": 0.053371979710651905, "grad_norm": 2.296875, "learning_rate": 9.996903253699211e-06, "loss": 1.1478, "step": 267 }, { "epoch": 0.053571874765747984, "grad_norm": 2.1875, "learning_rate": 9.996866059765238e-06, "loss": 1.1511, "step": 268 }, { "epoch": 0.053771769820844056, "grad_norm": 2.3125, "learning_rate": 9.996828643871008e-06, "loss": 1.119, "step": 269 }, { "epoch": 0.05397166487594013, "grad_norm": 2.328125, "learning_rate": 9.996791006018185e-06, "loss": 1.1387, "step": 270 }, { "epoch": 0.05417155993103621, "grad_norm": 2.359375, "learning_rate": 9.996753146208438e-06, "loss": 1.1503, "step": 271 }, { "epoch": 0.05437145498613228, "grad_norm": 2.25, "learning_rate": 9.996715064443449e-06, "loss": 1.2386, "step": 272 }, { "epoch": 0.05457135004122835, "grad_norm": 2.21875, "learning_rate": 9.996676760724913e-06, "loss": 1.1869, "step": 273 }, { "epoch": 0.05477124509632443, "grad_norm": 2.3125, "learning_rate": 9.996638235054527e-06, "loss": 1.1408, "step": 274 }, { "epoch": 0.0549711401514205, "grad_norm": 2.4375, "learning_rate": 9.996599487434007e-06, "loss": 1.2918, "step": 275 }, { "epoch": 0.05517103520651658, "grad_norm": 2.328125, "learning_rate": 9.996560517865069e-06, "loss": 1.1994, "step": 276 }, { "epoch": 0.055370930261612654, "grad_norm": 2.53125, "learning_rate": 9.996521326349447e-06, "loss": 1.2154, "step": 277 }, { "epoch": 0.055570825316708726, "grad_norm": 2.234375, "learning_rate": 9.996481912888881e-06, "loss": 1.2332, "step": 278 }, { "epoch": 0.055770720371804805, "grad_norm": 2.203125, "learning_rate": 9.996442277485122e-06, "loss": 1.1615, "step": 279 }, { "epoch": 0.05597061542690088, "grad_norm": 2.28125, "learning_rate": 9.99640242013993e-06, "loss": 1.2607, "step": 280 }, { "epoch": 0.05617051048199695, "grad_norm": 2.1875, "learning_rate": 9.996362340855076e-06, "loss": 1.1624, "step": 281 }, { "epoch": 0.05637040553709303, "grad_norm": 2.203125, "learning_rate": 9.996322039632343e-06, "loss": 1.1052, "step": 282 }, { "epoch": 0.0565703005921891, "grad_norm": 2.234375, "learning_rate": 9.996281516473517e-06, "loss": 1.1284, "step": 283 }, { "epoch": 0.05677019564728517, "grad_norm": 2.171875, "learning_rate": 9.9962407713804e-06, "loss": 1.1039, "step": 284 }, { "epoch": 0.05697009070238125, "grad_norm": 2.125, "learning_rate": 9.9961998043548e-06, "loss": 1.1431, "step": 285 }, { "epoch": 0.057169985757477323, "grad_norm": 2.21875, "learning_rate": 9.996158615398541e-06, "loss": 1.1113, "step": 286 }, { "epoch": 0.057369880812573396, "grad_norm": 2.5, "learning_rate": 9.996117204513448e-06, "loss": 1.1783, "step": 287 }, { "epoch": 0.057569775867669475, "grad_norm": 2.15625, "learning_rate": 9.996075571701363e-06, "loss": 1.0627, "step": 288 }, { "epoch": 0.05776967092276555, "grad_norm": 2.34375, "learning_rate": 9.996033716964136e-06, "loss": 1.1035, "step": 289 }, { "epoch": 0.057969565977861626, "grad_norm": 2.15625, "learning_rate": 9.995991640303622e-06, "loss": 1.1161, "step": 290 }, { "epoch": 0.0581694610329577, "grad_norm": 2.265625, "learning_rate": 9.995949341721695e-06, "loss": 1.1651, "step": 291 }, { "epoch": 0.05836935608805377, "grad_norm": 2.1875, "learning_rate": 9.995906821220231e-06, "loss": 1.1721, "step": 292 }, { "epoch": 0.05856925114314985, "grad_norm": 2.109375, "learning_rate": 9.99586407880112e-06, "loss": 1.1163, "step": 293 }, { "epoch": 0.05876914619824592, "grad_norm": 2.328125, "learning_rate": 9.99582111446626e-06, "loss": 1.1774, "step": 294 }, { "epoch": 0.05896904125334199, "grad_norm": 2.40625, "learning_rate": 9.99577792821756e-06, "loss": 1.2022, "step": 295 }, { "epoch": 0.05916893630843807, "grad_norm": 2.25, "learning_rate": 9.995734520056936e-06, "loss": 1.1483, "step": 296 }, { "epoch": 0.059368831363534144, "grad_norm": 2.3125, "learning_rate": 9.995690889986321e-06, "loss": 1.1886, "step": 297 }, { "epoch": 0.059568726418630216, "grad_norm": 2.140625, "learning_rate": 9.99564703800765e-06, "loss": 1.0558, "step": 298 }, { "epoch": 0.059768621473726295, "grad_norm": 2.296875, "learning_rate": 9.99560296412287e-06, "loss": 1.1211, "step": 299 }, { "epoch": 0.05996851652882237, "grad_norm": 2.40625, "learning_rate": 9.99555866833394e-06, "loss": 1.1445, "step": 300 }, { "epoch": 0.06016841158391844, "grad_norm": 2.203125, "learning_rate": 9.995514150642827e-06, "loss": 1.122, "step": 301 }, { "epoch": 0.06036830663901452, "grad_norm": 2.109375, "learning_rate": 9.995469411051511e-06, "loss": 1.0816, "step": 302 }, { "epoch": 0.06056820169411059, "grad_norm": 2.171875, "learning_rate": 9.995424449561974e-06, "loss": 1.2716, "step": 303 }, { "epoch": 0.06076809674920667, "grad_norm": 2.296875, "learning_rate": 9.99537926617622e-06, "loss": 1.1145, "step": 304 }, { "epoch": 0.06096799180430274, "grad_norm": 2.3125, "learning_rate": 9.99533386089625e-06, "loss": 1.112, "step": 305 }, { "epoch": 0.061167886859398814, "grad_norm": 2.234375, "learning_rate": 9.995288233724084e-06, "loss": 1.1509, "step": 306 }, { "epoch": 0.06136778191449489, "grad_norm": 2.265625, "learning_rate": 9.995242384661748e-06, "loss": 1.186, "step": 307 }, { "epoch": 0.061567676969590965, "grad_norm": 2.171875, "learning_rate": 9.995196313711279e-06, "loss": 1.1512, "step": 308 }, { "epoch": 0.06176757202468704, "grad_norm": 2.296875, "learning_rate": 9.995150020874724e-06, "loss": 1.136, "step": 309 }, { "epoch": 0.061967467079783116, "grad_norm": 2.265625, "learning_rate": 9.995103506154138e-06, "loss": 1.2276, "step": 310 }, { "epoch": 0.06216736213487919, "grad_norm": 2.328125, "learning_rate": 9.995056769551587e-06, "loss": 1.1692, "step": 311 }, { "epoch": 0.06236725718997526, "grad_norm": 2.40625, "learning_rate": 9.99500981106915e-06, "loss": 1.3003, "step": 312 }, { "epoch": 0.06256715224507134, "grad_norm": 2.25, "learning_rate": 9.99496263070891e-06, "loss": 1.0791, "step": 313 }, { "epoch": 0.06276704730016741, "grad_norm": 2.3125, "learning_rate": 9.99491522847296e-06, "loss": 1.2507, "step": 314 }, { "epoch": 0.06296694235526348, "grad_norm": 2.34375, "learning_rate": 9.994867604363415e-06, "loss": 1.3307, "step": 315 }, { "epoch": 0.06316683741035956, "grad_norm": 2.265625, "learning_rate": 9.994819758382379e-06, "loss": 1.0752, "step": 316 }, { "epoch": 0.06336673246545564, "grad_norm": 2.203125, "learning_rate": 9.994771690531986e-06, "loss": 1.137, "step": 317 }, { "epoch": 0.06356662752055171, "grad_norm": 2.234375, "learning_rate": 9.994723400814367e-06, "loss": 1.1592, "step": 318 }, { "epoch": 0.06376652257564779, "grad_norm": 2.1875, "learning_rate": 9.994674889231668e-06, "loss": 1.1348, "step": 319 }, { "epoch": 0.06396641763074386, "grad_norm": 2.375, "learning_rate": 9.994626155786044e-06, "loss": 1.1495, "step": 320 }, { "epoch": 0.06416631268583993, "grad_norm": 2.40625, "learning_rate": 9.994577200479659e-06, "loss": 1.1524, "step": 321 }, { "epoch": 0.064366207740936, "grad_norm": 2.78125, "learning_rate": 9.994528023314689e-06, "loss": 1.1618, "step": 322 }, { "epoch": 0.06456610279603209, "grad_norm": 2.4375, "learning_rate": 9.994478624293317e-06, "loss": 1.1259, "step": 323 }, { "epoch": 0.06476599785112816, "grad_norm": 2.1875, "learning_rate": 9.994429003417739e-06, "loss": 1.2054, "step": 324 }, { "epoch": 0.06496589290622423, "grad_norm": 2.203125, "learning_rate": 9.994379160690156e-06, "loss": 1.164, "step": 325 }, { "epoch": 0.0651657879613203, "grad_norm": 2.28125, "learning_rate": 9.994329096112786e-06, "loss": 1.1282, "step": 326 }, { "epoch": 0.06536568301641638, "grad_norm": 2.140625, "learning_rate": 9.994278809687849e-06, "loss": 1.1085, "step": 327 }, { "epoch": 0.06556557807151246, "grad_norm": 2.28125, "learning_rate": 9.994228301417584e-06, "loss": 1.1176, "step": 328 }, { "epoch": 0.06576547312660853, "grad_norm": 2.265625, "learning_rate": 9.994177571304228e-06, "loss": 1.1428, "step": 329 }, { "epoch": 0.0659653681817046, "grad_norm": 2.0625, "learning_rate": 9.99412661935004e-06, "loss": 1.0369, "step": 330 }, { "epoch": 0.06616526323680068, "grad_norm": 2.328125, "learning_rate": 9.99407544555728e-06, "loss": 1.1183, "step": 331 }, { "epoch": 0.06636515829189675, "grad_norm": 2.21875, "learning_rate": 9.994024049928222e-06, "loss": 1.2004, "step": 332 }, { "epoch": 0.06656505334699282, "grad_norm": 2.328125, "learning_rate": 9.99397243246515e-06, "loss": 1.0809, "step": 333 }, { "epoch": 0.06676494840208891, "grad_norm": 2.21875, "learning_rate": 9.993920593170355e-06, "loss": 1.153, "step": 334 }, { "epoch": 0.06696484345718498, "grad_norm": 2.3125, "learning_rate": 9.99386853204614e-06, "loss": 1.1371, "step": 335 }, { "epoch": 0.06716473851228105, "grad_norm": 2.25, "learning_rate": 9.993816249094818e-06, "loss": 1.0658, "step": 336 }, { "epoch": 0.06736463356737712, "grad_norm": 2.3125, "learning_rate": 9.993763744318711e-06, "loss": 1.1894, "step": 337 }, { "epoch": 0.0675645286224732, "grad_norm": 2.28125, "learning_rate": 9.993711017720155e-06, "loss": 1.1231, "step": 338 }, { "epoch": 0.06776442367756928, "grad_norm": 2.234375, "learning_rate": 9.993658069301487e-06, "loss": 1.1764, "step": 339 }, { "epoch": 0.06796431873266535, "grad_norm": 2.359375, "learning_rate": 9.993604899065061e-06, "loss": 1.1511, "step": 340 }, { "epoch": 0.06816421378776143, "grad_norm": 2.234375, "learning_rate": 9.993551507013239e-06, "loss": 1.1341, "step": 341 }, { "epoch": 0.0683641088428575, "grad_norm": 2.46875, "learning_rate": 9.993497893148391e-06, "loss": 1.1729, "step": 342 }, { "epoch": 0.06856400389795357, "grad_norm": 2.25, "learning_rate": 9.993444057472902e-06, "loss": 1.2644, "step": 343 }, { "epoch": 0.06876389895304964, "grad_norm": 2.3125, "learning_rate": 9.99338999998916e-06, "loss": 1.1547, "step": 344 }, { "epoch": 0.06896379400814573, "grad_norm": 2.171875, "learning_rate": 9.99333572069957e-06, "loss": 1.0723, "step": 345 }, { "epoch": 0.0691636890632418, "grad_norm": 2.171875, "learning_rate": 9.993281219606537e-06, "loss": 1.1183, "step": 346 }, { "epoch": 0.06936358411833787, "grad_norm": 2.4375, "learning_rate": 9.993226496712488e-06, "loss": 1.1073, "step": 347 }, { "epoch": 0.06956347917343395, "grad_norm": 2.25, "learning_rate": 9.993171552019849e-06, "loss": 1.1318, "step": 348 }, { "epoch": 0.06976337422853002, "grad_norm": 2.234375, "learning_rate": 9.993116385531064e-06, "loss": 1.202, "step": 349 }, { "epoch": 0.06996326928362609, "grad_norm": 2.15625, "learning_rate": 9.993060997248582e-06, "loss": 1.1376, "step": 350 }, { "epoch": 0.07016316433872218, "grad_norm": 2.203125, "learning_rate": 9.993005387174865e-06, "loss": 1.1328, "step": 351 }, { "epoch": 0.07036305939381825, "grad_norm": 2.171875, "learning_rate": 9.99294955531238e-06, "loss": 1.2125, "step": 352 }, { "epoch": 0.07056295444891432, "grad_norm": 2.375, "learning_rate": 9.992893501663613e-06, "loss": 1.2495, "step": 353 }, { "epoch": 0.07076284950401039, "grad_norm": 2.171875, "learning_rate": 9.992837226231046e-06, "loss": 1.0926, "step": 354 }, { "epoch": 0.07096274455910646, "grad_norm": 2.265625, "learning_rate": 9.992780729017184e-06, "loss": 1.1878, "step": 355 }, { "epoch": 0.07116263961420255, "grad_norm": 2.25, "learning_rate": 9.992724010024536e-06, "loss": 1.2096, "step": 356 }, { "epoch": 0.07136253466929862, "grad_norm": 2.28125, "learning_rate": 9.99266706925562e-06, "loss": 1.1596, "step": 357 }, { "epoch": 0.0715624297243947, "grad_norm": 2.421875, "learning_rate": 9.992609906712967e-06, "loss": 1.1814, "step": 358 }, { "epoch": 0.07176232477949077, "grad_norm": 2.265625, "learning_rate": 9.992552522399112e-06, "loss": 1.213, "step": 359 }, { "epoch": 0.07196221983458684, "grad_norm": 2.21875, "learning_rate": 9.99249491631661e-06, "loss": 1.1729, "step": 360 }, { "epoch": 0.07216211488968291, "grad_norm": 2.203125, "learning_rate": 9.992437088468016e-06, "loss": 1.058, "step": 361 }, { "epoch": 0.072362009944779, "grad_norm": 2.15625, "learning_rate": 9.992379038855902e-06, "loss": 1.131, "step": 362 }, { "epoch": 0.07256190499987507, "grad_norm": 2.1875, "learning_rate": 9.992320767482842e-06, "loss": 1.0648, "step": 363 }, { "epoch": 0.07276180005497114, "grad_norm": 2.53125, "learning_rate": 9.992262274351427e-06, "loss": 1.1781, "step": 364 }, { "epoch": 0.07296169511006721, "grad_norm": 2.625, "learning_rate": 9.992203559464256e-06, "loss": 1.1641, "step": 365 }, { "epoch": 0.07316159016516328, "grad_norm": 2.40625, "learning_rate": 9.992144622823933e-06, "loss": 1.144, "step": 366 }, { "epoch": 0.07336148522025937, "grad_norm": 2.203125, "learning_rate": 9.992085464433084e-06, "loss": 1.0778, "step": 367 }, { "epoch": 0.07356138027535544, "grad_norm": 2.1875, "learning_rate": 9.992026084294328e-06, "loss": 1.1035, "step": 368 }, { "epoch": 0.07376127533045151, "grad_norm": 2.25, "learning_rate": 9.991966482410307e-06, "loss": 1.1741, "step": 369 }, { "epoch": 0.07396117038554759, "grad_norm": 2.28125, "learning_rate": 9.99190665878367e-06, "loss": 1.1478, "step": 370 }, { "epoch": 0.07416106544064366, "grad_norm": 2.34375, "learning_rate": 9.99184661341707e-06, "loss": 1.1733, "step": 371 }, { "epoch": 0.07436096049573973, "grad_norm": 2.328125, "learning_rate": 9.991786346313178e-06, "loss": 1.2256, "step": 372 }, { "epoch": 0.07456085555083582, "grad_norm": 2.1875, "learning_rate": 9.991725857474668e-06, "loss": 1.1197, "step": 373 }, { "epoch": 0.07476075060593189, "grad_norm": 2.234375, "learning_rate": 9.99166514690423e-06, "loss": 1.0933, "step": 374 }, { "epoch": 0.07496064566102796, "grad_norm": 2.234375, "learning_rate": 9.99160421460456e-06, "loss": 1.1109, "step": 375 }, { "epoch": 0.07516054071612403, "grad_norm": 2.1875, "learning_rate": 9.991543060578363e-06, "loss": 1.1302, "step": 376 }, { "epoch": 0.0753604357712201, "grad_norm": 2.21875, "learning_rate": 9.991481684828356e-06, "loss": 1.0812, "step": 377 }, { "epoch": 0.07556033082631618, "grad_norm": 2.21875, "learning_rate": 9.991420087357266e-06, "loss": 1.1587, "step": 378 }, { "epoch": 0.07576022588141226, "grad_norm": 2.203125, "learning_rate": 9.991358268167828e-06, "loss": 1.0641, "step": 379 }, { "epoch": 0.07596012093650834, "grad_norm": 2.296875, "learning_rate": 9.99129622726279e-06, "loss": 1.0763, "step": 380 }, { "epoch": 0.07616001599160441, "grad_norm": 2.296875, "learning_rate": 9.991233964644906e-06, "loss": 1.2373, "step": 381 }, { "epoch": 0.07635991104670048, "grad_norm": 2.375, "learning_rate": 9.991171480316944e-06, "loss": 1.2241, "step": 382 }, { "epoch": 0.07655980610179655, "grad_norm": 2.390625, "learning_rate": 9.991108774281676e-06, "loss": 1.2159, "step": 383 }, { "epoch": 0.07675970115689264, "grad_norm": 2.25, "learning_rate": 9.99104584654189e-06, "loss": 1.0728, "step": 384 }, { "epoch": 0.07695959621198871, "grad_norm": 2.078125, "learning_rate": 9.99098269710038e-06, "loss": 1.0239, "step": 385 }, { "epoch": 0.07715949126708478, "grad_norm": 2.125, "learning_rate": 9.990919325959952e-06, "loss": 1.1313, "step": 386 }, { "epoch": 0.07735938632218085, "grad_norm": 2.140625, "learning_rate": 9.990855733123421e-06, "loss": 1.0394, "step": 387 }, { "epoch": 0.07755928137727693, "grad_norm": 2.140625, "learning_rate": 9.99079191859361e-06, "loss": 1.1339, "step": 388 }, { "epoch": 0.077759176432373, "grad_norm": 2.234375, "learning_rate": 9.990727882373357e-06, "loss": 1.1582, "step": 389 }, { "epoch": 0.07795907148746908, "grad_norm": 2.375, "learning_rate": 9.990663624465504e-06, "loss": 1.1049, "step": 390 }, { "epoch": 0.07815896654256516, "grad_norm": 2.34375, "learning_rate": 9.990599144872905e-06, "loss": 1.1503, "step": 391 }, { "epoch": 0.07835886159766123, "grad_norm": 2.078125, "learning_rate": 9.990534443598425e-06, "loss": 1.1675, "step": 392 }, { "epoch": 0.0785587566527573, "grad_norm": 2.109375, "learning_rate": 9.99046952064494e-06, "loss": 1.084, "step": 393 }, { "epoch": 0.07875865170785337, "grad_norm": 2.234375, "learning_rate": 9.99040437601533e-06, "loss": 1.1882, "step": 394 }, { "epoch": 0.07895854676294946, "grad_norm": 2.265625, "learning_rate": 9.99033900971249e-06, "loss": 1.0875, "step": 395 }, { "epoch": 0.07915844181804553, "grad_norm": 2.109375, "learning_rate": 9.990273421739325e-06, "loss": 1.0732, "step": 396 }, { "epoch": 0.0793583368731416, "grad_norm": 2.25, "learning_rate": 9.990207612098748e-06, "loss": 1.2069, "step": 397 }, { "epoch": 0.07955823192823767, "grad_norm": 2.171875, "learning_rate": 9.990141580793682e-06, "loss": 1.1131, "step": 398 }, { "epoch": 0.07975812698333375, "grad_norm": 2.109375, "learning_rate": 9.990075327827058e-06, "loss": 1.0508, "step": 399 }, { "epoch": 0.07995802203842982, "grad_norm": 2.3125, "learning_rate": 9.990008853201823e-06, "loss": 1.1659, "step": 400 }, { "epoch": 0.0801579170935259, "grad_norm": 2.140625, "learning_rate": 9.989942156920926e-06, "loss": 1.1182, "step": 401 }, { "epoch": 0.08035781214862198, "grad_norm": 2.25, "learning_rate": 9.989875238987333e-06, "loss": 1.1216, "step": 402 }, { "epoch": 0.08055770720371805, "grad_norm": 2.265625, "learning_rate": 9.989808099404015e-06, "loss": 1.104, "step": 403 }, { "epoch": 0.08075760225881412, "grad_norm": 2.25, "learning_rate": 9.989740738173953e-06, "loss": 1.1707, "step": 404 }, { "epoch": 0.08095749731391019, "grad_norm": 2.15625, "learning_rate": 9.989673155300141e-06, "loss": 1.1789, "step": 405 }, { "epoch": 0.08115739236900626, "grad_norm": 2.265625, "learning_rate": 9.98960535078558e-06, "loss": 1.2305, "step": 406 }, { "epoch": 0.08135728742410235, "grad_norm": 2.25, "learning_rate": 9.989537324633283e-06, "loss": 1.0805, "step": 407 }, { "epoch": 0.08155718247919842, "grad_norm": 2.28125, "learning_rate": 9.98946907684627e-06, "loss": 1.2271, "step": 408 }, { "epoch": 0.0817570775342945, "grad_norm": 2.265625, "learning_rate": 9.989400607427574e-06, "loss": 1.0816, "step": 409 }, { "epoch": 0.08195697258939057, "grad_norm": 2.25, "learning_rate": 9.989331916380233e-06, "loss": 1.1939, "step": 410 }, { "epoch": 0.08215686764448664, "grad_norm": 2.078125, "learning_rate": 9.989263003707306e-06, "loss": 1.0495, "step": 411 }, { "epoch": 0.08235676269958273, "grad_norm": 1.984375, "learning_rate": 9.989193869411846e-06, "loss": 1.0046, "step": 412 }, { "epoch": 0.0825566577546788, "grad_norm": 2.34375, "learning_rate": 9.989124513496929e-06, "loss": 1.1798, "step": 413 }, { "epoch": 0.08275655280977487, "grad_norm": 2.1875, "learning_rate": 9.989054935965631e-06, "loss": 1.1185, "step": 414 }, { "epoch": 0.08295644786487094, "grad_norm": 2.234375, "learning_rate": 9.98898513682105e-06, "loss": 1.142, "step": 415 }, { "epoch": 0.08315634291996701, "grad_norm": 2.28125, "learning_rate": 9.98891511606628e-06, "loss": 1.2098, "step": 416 }, { "epoch": 0.08335623797506309, "grad_norm": 2.5625, "learning_rate": 9.988844873704433e-06, "loss": 1.1389, "step": 417 }, { "epoch": 0.08355613303015917, "grad_norm": 2.1875, "learning_rate": 9.988774409738628e-06, "loss": 1.1318, "step": 418 }, { "epoch": 0.08375602808525524, "grad_norm": 2.390625, "learning_rate": 9.988703724172e-06, "loss": 1.1519, "step": 419 }, { "epoch": 0.08395592314035132, "grad_norm": 2.28125, "learning_rate": 9.988632817007683e-06, "loss": 1.1853, "step": 420 }, { "epoch": 0.08415581819544739, "grad_norm": 2.109375, "learning_rate": 9.98856168824883e-06, "loss": 1.0061, "step": 421 }, { "epoch": 0.08435571325054346, "grad_norm": 2.078125, "learning_rate": 9.9884903378986e-06, "loss": 1.1319, "step": 422 }, { "epoch": 0.08455560830563955, "grad_norm": 2.3125, "learning_rate": 9.988418765960161e-06, "loss": 1.1209, "step": 423 }, { "epoch": 0.08475550336073562, "grad_norm": 2.125, "learning_rate": 9.988346972436693e-06, "loss": 1.1002, "step": 424 }, { "epoch": 0.08495539841583169, "grad_norm": 2.203125, "learning_rate": 9.988274957331385e-06, "loss": 1.0286, "step": 425 }, { "epoch": 0.08515529347092776, "grad_norm": 2.109375, "learning_rate": 9.988202720647438e-06, "loss": 0.9752, "step": 426 }, { "epoch": 0.08535518852602383, "grad_norm": 2.234375, "learning_rate": 9.988130262388058e-06, "loss": 1.131, "step": 427 }, { "epoch": 0.0855550835811199, "grad_norm": 2.21875, "learning_rate": 9.988057582556465e-06, "loss": 1.1002, "step": 428 }, { "epoch": 0.08575497863621599, "grad_norm": 2.375, "learning_rate": 9.987984681155884e-06, "loss": 1.2404, "step": 429 }, { "epoch": 0.08595487369131206, "grad_norm": 2.25, "learning_rate": 9.98791155818956e-06, "loss": 1.13, "step": 430 }, { "epoch": 0.08615476874640814, "grad_norm": 2.203125, "learning_rate": 9.987838213660736e-06, "loss": 1.1198, "step": 431 }, { "epoch": 0.08635466380150421, "grad_norm": 2.21875, "learning_rate": 9.987764647572671e-06, "loss": 1.1485, "step": 432 }, { "epoch": 0.08655455885660028, "grad_norm": 2.21875, "learning_rate": 9.987690859928633e-06, "loss": 1.1426, "step": 433 }, { "epoch": 0.08675445391169635, "grad_norm": 2.265625, "learning_rate": 9.987616850731899e-06, "loss": 1.1378, "step": 434 }, { "epoch": 0.08695434896679244, "grad_norm": 2.15625, "learning_rate": 9.987542619985758e-06, "loss": 1.2654, "step": 435 }, { "epoch": 0.08715424402188851, "grad_norm": 2.328125, "learning_rate": 9.987468167693507e-06, "loss": 1.1183, "step": 436 }, { "epoch": 0.08735413907698458, "grad_norm": 2.140625, "learning_rate": 9.987393493858453e-06, "loss": 1.1122, "step": 437 }, { "epoch": 0.08755403413208065, "grad_norm": 2.390625, "learning_rate": 9.98731859848391e-06, "loss": 1.1731, "step": 438 }, { "epoch": 0.08775392918717673, "grad_norm": 2.171875, "learning_rate": 9.98724348157321e-06, "loss": 1.0737, "step": 439 }, { "epoch": 0.08795382424227281, "grad_norm": 2.21875, "learning_rate": 9.987168143129687e-06, "loss": 1.1597, "step": 440 }, { "epoch": 0.08815371929736888, "grad_norm": 2.28125, "learning_rate": 9.987092583156688e-06, "loss": 1.1576, "step": 441 }, { "epoch": 0.08835361435246496, "grad_norm": 2.1875, "learning_rate": 9.987016801657568e-06, "loss": 1.0758, "step": 442 }, { "epoch": 0.08855350940756103, "grad_norm": 2.171875, "learning_rate": 9.986940798635694e-06, "loss": 1.1427, "step": 443 }, { "epoch": 0.0887534044626571, "grad_norm": 2.203125, "learning_rate": 9.986864574094443e-06, "loss": 1.1078, "step": 444 }, { "epoch": 0.08895329951775317, "grad_norm": 2.171875, "learning_rate": 9.986788128037202e-06, "loss": 1.1726, "step": 445 }, { "epoch": 0.08915319457284926, "grad_norm": 2.296875, "learning_rate": 9.986711460467362e-06, "loss": 1.158, "step": 446 }, { "epoch": 0.08935308962794533, "grad_norm": 2.203125, "learning_rate": 9.986634571388333e-06, "loss": 1.211, "step": 447 }, { "epoch": 0.0895529846830414, "grad_norm": 2.03125, "learning_rate": 9.986557460803527e-06, "loss": 1.0706, "step": 448 }, { "epoch": 0.08975287973813748, "grad_norm": 2.171875, "learning_rate": 9.986480128716374e-06, "loss": 1.0745, "step": 449 }, { "epoch": 0.08995277479323355, "grad_norm": 2.28125, "learning_rate": 9.986402575130305e-06, "loss": 1.1669, "step": 450 }, { "epoch": 0.09015266984832963, "grad_norm": 2.15625, "learning_rate": 9.986324800048767e-06, "loss": 1.0876, "step": 451 }, { "epoch": 0.0903525649034257, "grad_norm": 2.25, "learning_rate": 9.986246803475213e-06, "loss": 1.2014, "step": 452 }, { "epoch": 0.09055245995852178, "grad_norm": 2.484375, "learning_rate": 9.986168585413108e-06, "loss": 1.2562, "step": 453 }, { "epoch": 0.09075235501361785, "grad_norm": 2.203125, "learning_rate": 9.98609014586593e-06, "loss": 1.1761, "step": 454 }, { "epoch": 0.09095225006871392, "grad_norm": 2.140625, "learning_rate": 9.986011484837157e-06, "loss": 1.1152, "step": 455 }, { "epoch": 0.09115214512381, "grad_norm": 2.0625, "learning_rate": 9.985932602330287e-06, "loss": 1.047, "step": 456 }, { "epoch": 0.09135204017890608, "grad_norm": 2.09375, "learning_rate": 9.985853498348823e-06, "loss": 1.1492, "step": 457 }, { "epoch": 0.09155193523400215, "grad_norm": 2.25, "learning_rate": 9.985774172896281e-06, "loss": 1.1138, "step": 458 }, { "epoch": 0.09175183028909822, "grad_norm": 2.265625, "learning_rate": 9.985694625976181e-06, "loss": 1.106, "step": 459 }, { "epoch": 0.0919517253441943, "grad_norm": 2.3125, "learning_rate": 9.985614857592058e-06, "loss": 1.1019, "step": 460 }, { "epoch": 0.09215162039929037, "grad_norm": 2.40625, "learning_rate": 9.985534867747457e-06, "loss": 1.1848, "step": 461 }, { "epoch": 0.09235151545438644, "grad_norm": 2.171875, "learning_rate": 9.985454656445928e-06, "loss": 1.1093, "step": 462 }, { "epoch": 0.09255141050948253, "grad_norm": 2.21875, "learning_rate": 9.985374223691039e-06, "loss": 1.1284, "step": 463 }, { "epoch": 0.0927513055645786, "grad_norm": 2.3125, "learning_rate": 9.985293569486356e-06, "loss": 1.2787, "step": 464 }, { "epoch": 0.09295120061967467, "grad_norm": 2.171875, "learning_rate": 9.985212693835465e-06, "loss": 1.1167, "step": 465 }, { "epoch": 0.09315109567477074, "grad_norm": 2.0625, "learning_rate": 9.985131596741959e-06, "loss": 1.158, "step": 466 }, { "epoch": 0.09335099072986681, "grad_norm": 2.1875, "learning_rate": 9.985050278209442e-06, "loss": 1.2403, "step": 467 }, { "epoch": 0.0935508857849629, "grad_norm": 2.234375, "learning_rate": 9.984968738241522e-06, "loss": 1.1573, "step": 468 }, { "epoch": 0.09375078084005897, "grad_norm": 2.078125, "learning_rate": 9.984886976841824e-06, "loss": 1.0895, "step": 469 }, { "epoch": 0.09395067589515504, "grad_norm": 2.109375, "learning_rate": 9.98480499401398e-06, "loss": 1.0453, "step": 470 }, { "epoch": 0.09415057095025112, "grad_norm": 2.34375, "learning_rate": 9.984722789761627e-06, "loss": 1.2329, "step": 471 }, { "epoch": 0.09435046600534719, "grad_norm": 2.28125, "learning_rate": 9.984640364088422e-06, "loss": 1.213, "step": 472 }, { "epoch": 0.09455036106044326, "grad_norm": 2.234375, "learning_rate": 9.984557716998026e-06, "loss": 1.0911, "step": 473 }, { "epoch": 0.09475025611553935, "grad_norm": 2.21875, "learning_rate": 9.984474848494107e-06, "loss": 1.0302, "step": 474 }, { "epoch": 0.09495015117063542, "grad_norm": 2.1875, "learning_rate": 9.984391758580347e-06, "loss": 1.0809, "step": 475 }, { "epoch": 0.09515004622573149, "grad_norm": 2.25, "learning_rate": 9.984308447260437e-06, "loss": 1.1552, "step": 476 }, { "epoch": 0.09534994128082756, "grad_norm": 2.25, "learning_rate": 9.98422491453808e-06, "loss": 1.1293, "step": 477 }, { "epoch": 0.09554983633592364, "grad_norm": 2.3125, "learning_rate": 9.984141160416985e-06, "loss": 1.0692, "step": 478 }, { "epoch": 0.09574973139101972, "grad_norm": 2.375, "learning_rate": 9.98405718490087e-06, "loss": 1.2213, "step": 479 }, { "epoch": 0.0959496264461158, "grad_norm": 2.0625, "learning_rate": 9.983972987993468e-06, "loss": 1.1687, "step": 480 }, { "epoch": 0.09614952150121187, "grad_norm": 2.203125, "learning_rate": 9.983888569698519e-06, "loss": 1.1588, "step": 481 }, { "epoch": 0.09634941655630794, "grad_norm": 2.1875, "learning_rate": 9.983803930019771e-06, "loss": 1.1676, "step": 482 }, { "epoch": 0.09654931161140401, "grad_norm": 2.234375, "learning_rate": 9.983719068960985e-06, "loss": 1.1553, "step": 483 }, { "epoch": 0.09674920666650008, "grad_norm": 2.28125, "learning_rate": 9.983633986525932e-06, "loss": 1.2282, "step": 484 }, { "epoch": 0.09694910172159617, "grad_norm": 2.28125, "learning_rate": 9.983548682718388e-06, "loss": 1.17, "step": 485 }, { "epoch": 0.09714899677669224, "grad_norm": 2.359375, "learning_rate": 9.983463157542142e-06, "loss": 1.1835, "step": 486 }, { "epoch": 0.09734889183178831, "grad_norm": 2.140625, "learning_rate": 9.983377411000996e-06, "loss": 1.1484, "step": 487 }, { "epoch": 0.09754878688688438, "grad_norm": 2.25, "learning_rate": 9.983291443098759e-06, "loss": 1.1656, "step": 488 }, { "epoch": 0.09774868194198046, "grad_norm": 2.234375, "learning_rate": 9.983205253839247e-06, "loss": 1.1873, "step": 489 }, { "epoch": 0.09794857699707653, "grad_norm": 2.09375, "learning_rate": 9.98311884322629e-06, "loss": 1.0371, "step": 490 }, { "epoch": 0.09814847205217261, "grad_norm": 2.15625, "learning_rate": 9.983032211263725e-06, "loss": 1.1887, "step": 491 }, { "epoch": 0.09834836710726869, "grad_norm": 2.234375, "learning_rate": 9.982945357955406e-06, "loss": 1.0655, "step": 492 }, { "epoch": 0.09854826216236476, "grad_norm": 2.296875, "learning_rate": 9.982858283305181e-06, "loss": 1.1893, "step": 493 }, { "epoch": 0.09874815721746083, "grad_norm": 2.171875, "learning_rate": 9.982770987316926e-06, "loss": 1.1584, "step": 494 }, { "epoch": 0.0989480522725569, "grad_norm": 2.109375, "learning_rate": 9.982683469994515e-06, "loss": 1.1144, "step": 495 }, { "epoch": 0.09914794732765299, "grad_norm": 2.15625, "learning_rate": 9.982595731341838e-06, "loss": 1.153, "step": 496 }, { "epoch": 0.09934784238274906, "grad_norm": 2.234375, "learning_rate": 9.982507771362789e-06, "loss": 1.0859, "step": 497 }, { "epoch": 0.09954773743784513, "grad_norm": 2.15625, "learning_rate": 9.982419590061277e-06, "loss": 1.1539, "step": 498 }, { "epoch": 0.0997476324929412, "grad_norm": 2.3125, "learning_rate": 9.98233118744122e-06, "loss": 1.1645, "step": 499 }, { "epoch": 0.09994752754803728, "grad_norm": 2.25, "learning_rate": 9.982242563506543e-06, "loss": 1.2078, "step": 500 }, { "epoch": 0.10014742260313335, "grad_norm": 2.203125, "learning_rate": 9.982153718261183e-06, "loss": 1.1374, "step": 501 }, { "epoch": 0.10034731765822943, "grad_norm": 2.203125, "learning_rate": 9.982064651709088e-06, "loss": 1.1488, "step": 502 }, { "epoch": 0.1005472127133255, "grad_norm": 2.3125, "learning_rate": 9.981975363854212e-06, "loss": 1.11, "step": 503 }, { "epoch": 0.10074710776842158, "grad_norm": 2.234375, "learning_rate": 9.981885854700524e-06, "loss": 1.1718, "step": 504 }, { "epoch": 0.10094700282351765, "grad_norm": 2.28125, "learning_rate": 9.981796124251999e-06, "loss": 1.1763, "step": 505 }, { "epoch": 0.10114689787861372, "grad_norm": 2.109375, "learning_rate": 9.98170617251262e-06, "loss": 1.103, "step": 506 }, { "epoch": 0.10134679293370981, "grad_norm": 2.421875, "learning_rate": 9.981615999486386e-06, "loss": 1.1321, "step": 507 }, { "epoch": 0.10154668798880588, "grad_norm": 2.265625, "learning_rate": 9.981525605177301e-06, "loss": 1.1236, "step": 508 }, { "epoch": 0.10174658304390195, "grad_norm": 2.125, "learning_rate": 9.981434989589382e-06, "loss": 1.0633, "step": 509 }, { "epoch": 0.10194647809899803, "grad_norm": 2.25, "learning_rate": 9.981344152726651e-06, "loss": 1.2384, "step": 510 }, { "epoch": 0.1021463731540941, "grad_norm": 2.21875, "learning_rate": 9.981253094593147e-06, "loss": 1.0622, "step": 511 }, { "epoch": 0.10234626820919017, "grad_norm": 2.34375, "learning_rate": 9.98116181519291e-06, "loss": 1.1921, "step": 512 }, { "epoch": 0.10254616326428626, "grad_norm": 2.234375, "learning_rate": 9.981070314529998e-06, "loss": 1.0312, "step": 513 }, { "epoch": 0.10274605831938233, "grad_norm": 2.40625, "learning_rate": 9.980978592608475e-06, "loss": 1.1928, "step": 514 }, { "epoch": 0.1029459533744784, "grad_norm": 2.3125, "learning_rate": 9.980886649432413e-06, "loss": 1.2021, "step": 515 }, { "epoch": 0.10314584842957447, "grad_norm": 2.296875, "learning_rate": 9.9807944850059e-06, "loss": 1.1377, "step": 516 }, { "epoch": 0.10334574348467054, "grad_norm": 2.203125, "learning_rate": 9.980702099333029e-06, "loss": 1.0915, "step": 517 }, { "epoch": 0.10354563853976662, "grad_norm": 2.234375, "learning_rate": 9.980609492417901e-06, "loss": 1.1397, "step": 518 }, { "epoch": 0.1037455335948627, "grad_norm": 2.15625, "learning_rate": 9.980516664264632e-06, "loss": 1.0924, "step": 519 }, { "epoch": 0.10394542864995877, "grad_norm": 2.234375, "learning_rate": 9.980423614877344e-06, "loss": 1.2099, "step": 520 }, { "epoch": 0.10414532370505485, "grad_norm": 2.21875, "learning_rate": 9.980330344260172e-06, "loss": 1.124, "step": 521 }, { "epoch": 0.10434521876015092, "grad_norm": 2.28125, "learning_rate": 9.980236852417256e-06, "loss": 1.1106, "step": 522 }, { "epoch": 0.10454511381524699, "grad_norm": 2.203125, "learning_rate": 9.980143139352753e-06, "loss": 1.1668, "step": 523 }, { "epoch": 0.10474500887034308, "grad_norm": 2.078125, "learning_rate": 9.980049205070824e-06, "loss": 1.0663, "step": 524 }, { "epoch": 0.10494490392543915, "grad_norm": 2.203125, "learning_rate": 9.979955049575639e-06, "loss": 1.1818, "step": 525 }, { "epoch": 0.10514479898053522, "grad_norm": 2.25, "learning_rate": 9.979860672871384e-06, "loss": 1.1244, "step": 526 }, { "epoch": 0.10534469403563129, "grad_norm": 2.21875, "learning_rate": 9.979766074962249e-06, "loss": 1.1311, "step": 527 }, { "epoch": 0.10554458909072736, "grad_norm": 2.1875, "learning_rate": 9.979671255852437e-06, "loss": 1.1327, "step": 528 }, { "epoch": 0.10574448414582344, "grad_norm": 2.25, "learning_rate": 9.979576215546161e-06, "loss": 1.1418, "step": 529 }, { "epoch": 0.10594437920091952, "grad_norm": 2.125, "learning_rate": 9.97948095404764e-06, "loss": 1.1011, "step": 530 }, { "epoch": 0.1061442742560156, "grad_norm": 2.21875, "learning_rate": 9.979385471361108e-06, "loss": 1.0591, "step": 531 }, { "epoch": 0.10634416931111167, "grad_norm": 2.3125, "learning_rate": 9.979289767490803e-06, "loss": 1.1741, "step": 532 }, { "epoch": 0.10654406436620774, "grad_norm": 2.171875, "learning_rate": 9.97919384244098e-06, "loss": 1.1312, "step": 533 }, { "epoch": 0.10674395942130381, "grad_norm": 2.265625, "learning_rate": 9.979097696215898e-06, "loss": 1.1005, "step": 534 }, { "epoch": 0.1069438544763999, "grad_norm": 2.1875, "learning_rate": 9.979001328819828e-06, "loss": 1.0767, "step": 535 }, { "epoch": 0.10714374953149597, "grad_norm": 2.421875, "learning_rate": 9.978904740257051e-06, "loss": 1.2502, "step": 536 }, { "epoch": 0.10734364458659204, "grad_norm": 2.15625, "learning_rate": 9.978807930531857e-06, "loss": 1.0616, "step": 537 }, { "epoch": 0.10754353964168811, "grad_norm": 2.203125, "learning_rate": 9.978710899648547e-06, "loss": 1.1741, "step": 538 }, { "epoch": 0.10774343469678418, "grad_norm": 2.28125, "learning_rate": 9.978613647611429e-06, "loss": 1.1575, "step": 539 }, { "epoch": 0.10794332975188026, "grad_norm": 2.3125, "learning_rate": 9.978516174424826e-06, "loss": 1.2128, "step": 540 }, { "epoch": 0.10814322480697634, "grad_norm": 1.9765625, "learning_rate": 9.978418480093065e-06, "loss": 1.0435, "step": 541 }, { "epoch": 0.10834311986207242, "grad_norm": 2.28125, "learning_rate": 9.97832056462049e-06, "loss": 1.1376, "step": 542 }, { "epoch": 0.10854301491716849, "grad_norm": 2.1875, "learning_rate": 9.978222428011444e-06, "loss": 1.2298, "step": 543 }, { "epoch": 0.10874290997226456, "grad_norm": 2.21875, "learning_rate": 9.97812407027029e-06, "loss": 1.168, "step": 544 }, { "epoch": 0.10894280502736063, "grad_norm": 2.15625, "learning_rate": 9.978025491401397e-06, "loss": 1.1013, "step": 545 }, { "epoch": 0.1091427000824567, "grad_norm": 2.234375, "learning_rate": 9.977926691409143e-06, "loss": 1.194, "step": 546 }, { "epoch": 0.10934259513755279, "grad_norm": 2.171875, "learning_rate": 9.977827670297917e-06, "loss": 1.0775, "step": 547 }, { "epoch": 0.10954249019264886, "grad_norm": 2.140625, "learning_rate": 9.977728428072118e-06, "loss": 1.1678, "step": 548 }, { "epoch": 0.10974238524774493, "grad_norm": 2.171875, "learning_rate": 9.977628964736153e-06, "loss": 1.0741, "step": 549 }, { "epoch": 0.109942280302841, "grad_norm": 2.3125, "learning_rate": 9.977529280294442e-06, "loss": 1.2779, "step": 550 }, { "epoch": 0.11014217535793708, "grad_norm": 2.3125, "learning_rate": 9.977429374751411e-06, "loss": 1.2253, "step": 551 }, { "epoch": 0.11034207041303316, "grad_norm": 2.09375, "learning_rate": 9.9773292481115e-06, "loss": 1.065, "step": 552 }, { "epoch": 0.11054196546812924, "grad_norm": 2.125, "learning_rate": 9.977228900379155e-06, "loss": 1.154, "step": 553 }, { "epoch": 0.11074186052322531, "grad_norm": 2.359375, "learning_rate": 9.977128331558834e-06, "loss": 1.1185, "step": 554 }, { "epoch": 0.11094175557832138, "grad_norm": 2.359375, "learning_rate": 9.977027541655003e-06, "loss": 1.1429, "step": 555 }, { "epoch": 0.11114165063341745, "grad_norm": 2.25, "learning_rate": 9.97692653067214e-06, "loss": 1.1406, "step": 556 }, { "epoch": 0.11134154568851352, "grad_norm": 2.234375, "learning_rate": 9.976825298614734e-06, "loss": 1.1413, "step": 557 }, { "epoch": 0.11154144074360961, "grad_norm": 2.125, "learning_rate": 9.976723845487278e-06, "loss": 1.0157, "step": 558 }, { "epoch": 0.11174133579870568, "grad_norm": 2.40625, "learning_rate": 9.97662217129428e-06, "loss": 1.2004, "step": 559 }, { "epoch": 0.11194123085380175, "grad_norm": 2.3125, "learning_rate": 9.97652027604026e-06, "loss": 1.159, "step": 560 }, { "epoch": 0.11214112590889783, "grad_norm": 2.171875, "learning_rate": 9.976418159729737e-06, "loss": 1.0831, "step": 561 }, { "epoch": 0.1123410209639939, "grad_norm": 2.140625, "learning_rate": 9.976315822367254e-06, "loss": 1.1762, "step": 562 }, { "epoch": 0.11254091601908998, "grad_norm": 2.203125, "learning_rate": 9.97621326395735e-06, "loss": 1.1847, "step": 563 }, { "epoch": 0.11274081107418606, "grad_norm": 2.15625, "learning_rate": 9.976110484504587e-06, "loss": 1.0756, "step": 564 }, { "epoch": 0.11294070612928213, "grad_norm": 2.234375, "learning_rate": 9.976007484013528e-06, "loss": 1.0894, "step": 565 }, { "epoch": 0.1131406011843782, "grad_norm": 2.1875, "learning_rate": 9.975904262488747e-06, "loss": 1.0933, "step": 566 }, { "epoch": 0.11334049623947427, "grad_norm": 2.21875, "learning_rate": 9.97580081993483e-06, "loss": 1.0876, "step": 567 }, { "epoch": 0.11354039129457034, "grad_norm": 2.28125, "learning_rate": 9.975697156356372e-06, "loss": 1.0503, "step": 568 }, { "epoch": 0.11374028634966643, "grad_norm": 2.140625, "learning_rate": 9.975593271757977e-06, "loss": 1.0898, "step": 569 }, { "epoch": 0.1139401814047625, "grad_norm": 2.171875, "learning_rate": 9.975489166144262e-06, "loss": 1.1363, "step": 570 }, { "epoch": 0.11414007645985857, "grad_norm": 2.15625, "learning_rate": 9.975384839519849e-06, "loss": 1.1002, "step": 571 }, { "epoch": 0.11433997151495465, "grad_norm": 2.140625, "learning_rate": 9.975280291889373e-06, "loss": 1.0651, "step": 572 }, { "epoch": 0.11453986657005072, "grad_norm": 2.234375, "learning_rate": 9.975175523257477e-06, "loss": 1.0237, "step": 573 }, { "epoch": 0.11473976162514679, "grad_norm": 2.28125, "learning_rate": 9.975070533628817e-06, "loss": 1.072, "step": 574 }, { "epoch": 0.11493965668024288, "grad_norm": 2.265625, "learning_rate": 9.974965323008055e-06, "loss": 1.0854, "step": 575 }, { "epoch": 0.11513955173533895, "grad_norm": 2.4375, "learning_rate": 9.974859891399863e-06, "loss": 1.1208, "step": 576 }, { "epoch": 0.11533944679043502, "grad_norm": 2.1875, "learning_rate": 9.974754238808927e-06, "loss": 1.1068, "step": 577 }, { "epoch": 0.1155393418455311, "grad_norm": 2.125, "learning_rate": 9.974648365239938e-06, "loss": 1.0612, "step": 578 }, { "epoch": 0.11573923690062717, "grad_norm": 2.21875, "learning_rate": 9.974542270697602e-06, "loss": 1.1479, "step": 579 }, { "epoch": 0.11593913195572325, "grad_norm": 2.125, "learning_rate": 9.974435955186628e-06, "loss": 1.1138, "step": 580 }, { "epoch": 0.11613902701081932, "grad_norm": 2.234375, "learning_rate": 9.974329418711742e-06, "loss": 1.1786, "step": 581 }, { "epoch": 0.1163389220659154, "grad_norm": 2.21875, "learning_rate": 9.974222661277672e-06, "loss": 1.1402, "step": 582 }, { "epoch": 0.11653881712101147, "grad_norm": 2.265625, "learning_rate": 9.974115682889164e-06, "loss": 1.0779, "step": 583 }, { "epoch": 0.11673871217610754, "grad_norm": 2.203125, "learning_rate": 9.97400848355097e-06, "loss": 1.1407, "step": 584 }, { "epoch": 0.11693860723120361, "grad_norm": 2.515625, "learning_rate": 9.973901063267848e-06, "loss": 1.1509, "step": 585 }, { "epoch": 0.1171385022862997, "grad_norm": 2.234375, "learning_rate": 9.973793422044573e-06, "loss": 1.1145, "step": 586 }, { "epoch": 0.11733839734139577, "grad_norm": 2.3125, "learning_rate": 9.973685559885927e-06, "loss": 1.1153, "step": 587 }, { "epoch": 0.11753829239649184, "grad_norm": 2.25, "learning_rate": 9.973577476796697e-06, "loss": 1.1684, "step": 588 }, { "epoch": 0.11773818745158791, "grad_norm": 2.234375, "learning_rate": 9.973469172781688e-06, "loss": 1.1161, "step": 589 }, { "epoch": 0.11793808250668399, "grad_norm": 2.265625, "learning_rate": 9.973360647845708e-06, "loss": 1.1282, "step": 590 }, { "epoch": 0.11813797756178007, "grad_norm": 2.203125, "learning_rate": 9.973251901993582e-06, "loss": 1.0917, "step": 591 }, { "epoch": 0.11833787261687614, "grad_norm": 2.296875, "learning_rate": 9.973142935230135e-06, "loss": 1.222, "step": 592 }, { "epoch": 0.11853776767197222, "grad_norm": 2.25, "learning_rate": 9.97303374756021e-06, "loss": 1.1674, "step": 593 }, { "epoch": 0.11873766272706829, "grad_norm": 2.171875, "learning_rate": 9.972924338988658e-06, "loss": 1.095, "step": 594 }, { "epoch": 0.11893755778216436, "grad_norm": 2.140625, "learning_rate": 9.972814709520339e-06, "loss": 1.0656, "step": 595 }, { "epoch": 0.11913745283726043, "grad_norm": 2.265625, "learning_rate": 9.97270485916012e-06, "loss": 1.0735, "step": 596 }, { "epoch": 0.11933734789235652, "grad_norm": 2.109375, "learning_rate": 9.972594787912884e-06, "loss": 0.9428, "step": 597 }, { "epoch": 0.11953724294745259, "grad_norm": 2.046875, "learning_rate": 9.972484495783518e-06, "loss": 1.092, "step": 598 }, { "epoch": 0.11973713800254866, "grad_norm": 2.125, "learning_rate": 9.972373982776922e-06, "loss": 1.0373, "step": 599 }, { "epoch": 0.11993703305764473, "grad_norm": 2.15625, "learning_rate": 9.972263248898004e-06, "loss": 1.1532, "step": 600 }, { "epoch": 0.1201369281127408, "grad_norm": 2.234375, "learning_rate": 9.972152294151682e-06, "loss": 1.0678, "step": 601 }, { "epoch": 0.12033682316783688, "grad_norm": 2.171875, "learning_rate": 9.972041118542889e-06, "loss": 1.0426, "step": 602 }, { "epoch": 0.12053671822293296, "grad_norm": 2.171875, "learning_rate": 9.97192972207656e-06, "loss": 1.1312, "step": 603 }, { "epoch": 0.12073661327802904, "grad_norm": 2.125, "learning_rate": 9.971818104757643e-06, "loss": 1.0671, "step": 604 }, { "epoch": 0.12093650833312511, "grad_norm": 2.21875, "learning_rate": 9.971706266591097e-06, "loss": 1.1811, "step": 605 }, { "epoch": 0.12113640338822118, "grad_norm": 2.1875, "learning_rate": 9.97159420758189e-06, "loss": 1.1151, "step": 606 }, { "epoch": 0.12133629844331725, "grad_norm": 2.0625, "learning_rate": 9.971481927734998e-06, "loss": 1.0453, "step": 607 }, { "epoch": 0.12153619349841334, "grad_norm": 2.125, "learning_rate": 9.97136942705541e-06, "loss": 1.0407, "step": 608 }, { "epoch": 0.12173608855350941, "grad_norm": 2.078125, "learning_rate": 9.971256705548125e-06, "loss": 1.0148, "step": 609 }, { "epoch": 0.12193598360860548, "grad_norm": 2.21875, "learning_rate": 9.971143763218145e-06, "loss": 1.1297, "step": 610 }, { "epoch": 0.12213587866370156, "grad_norm": 2.078125, "learning_rate": 9.971030600070493e-06, "loss": 1.0892, "step": 611 }, { "epoch": 0.12233577371879763, "grad_norm": 2.25, "learning_rate": 9.970917216110192e-06, "loss": 1.1809, "step": 612 }, { "epoch": 0.1225356687738937, "grad_norm": 2.1875, "learning_rate": 9.970803611342278e-06, "loss": 1.1111, "step": 613 }, { "epoch": 0.12273556382898979, "grad_norm": 2.203125, "learning_rate": 9.970689785771798e-06, "loss": 1.0206, "step": 614 }, { "epoch": 0.12293545888408586, "grad_norm": 2.359375, "learning_rate": 9.97057573940381e-06, "loss": 1.2189, "step": 615 }, { "epoch": 0.12313535393918193, "grad_norm": 2.234375, "learning_rate": 9.970461472243378e-06, "loss": 1.114, "step": 616 }, { "epoch": 0.123335248994278, "grad_norm": 2.09375, "learning_rate": 9.97034698429558e-06, "loss": 1.0699, "step": 617 }, { "epoch": 0.12353514404937407, "grad_norm": 2.109375, "learning_rate": 9.970232275565497e-06, "loss": 1.1188, "step": 618 }, { "epoch": 0.12373503910447016, "grad_norm": 2.234375, "learning_rate": 9.970117346058229e-06, "loss": 1.138, "step": 619 }, { "epoch": 0.12393493415956623, "grad_norm": 2.265625, "learning_rate": 9.970002195778879e-06, "loss": 1.2651, "step": 620 }, { "epoch": 0.1241348292146623, "grad_norm": 2.265625, "learning_rate": 9.969886824732561e-06, "loss": 1.1851, "step": 621 }, { "epoch": 0.12433472426975838, "grad_norm": 2.21875, "learning_rate": 9.969771232924404e-06, "loss": 1.0908, "step": 622 }, { "epoch": 0.12453461932485445, "grad_norm": 2.21875, "learning_rate": 9.969655420359537e-06, "loss": 1.0961, "step": 623 }, { "epoch": 0.12473451437995052, "grad_norm": 2.21875, "learning_rate": 9.969539387043106e-06, "loss": 1.0848, "step": 624 }, { "epoch": 0.1249344094350466, "grad_norm": 2.125, "learning_rate": 9.969423132980269e-06, "loss": 1.1229, "step": 625 }, { "epoch": 0.12513430449014268, "grad_norm": 2.140625, "learning_rate": 9.969306658176185e-06, "loss": 1.1251, "step": 626 }, { "epoch": 0.12533419954523875, "grad_norm": 2.140625, "learning_rate": 9.969189962636032e-06, "loss": 1.0888, "step": 627 }, { "epoch": 0.12553409460033482, "grad_norm": 2.265625, "learning_rate": 9.96907304636499e-06, "loss": 1.1758, "step": 628 }, { "epoch": 0.1257339896554309, "grad_norm": 2.21875, "learning_rate": 9.968955909368256e-06, "loss": 1.2216, "step": 629 }, { "epoch": 0.12593388471052697, "grad_norm": 2.15625, "learning_rate": 9.96883855165103e-06, "loss": 1.1126, "step": 630 }, { "epoch": 0.12613377976562304, "grad_norm": 2.234375, "learning_rate": 9.968720973218525e-06, "loss": 1.0975, "step": 631 }, { "epoch": 0.1263336748207191, "grad_norm": 2.328125, "learning_rate": 9.968603174075967e-06, "loss": 1.1615, "step": 632 }, { "epoch": 0.1265335698758152, "grad_norm": 2.1875, "learning_rate": 9.968485154228584e-06, "loss": 1.0805, "step": 633 }, { "epoch": 0.12673346493091128, "grad_norm": 2.390625, "learning_rate": 9.968366913681624e-06, "loss": 1.0741, "step": 634 }, { "epoch": 0.12693335998600735, "grad_norm": 2.34375, "learning_rate": 9.968248452440335e-06, "loss": 1.1183, "step": 635 }, { "epoch": 0.12713325504110343, "grad_norm": 2.140625, "learning_rate": 9.96812977050998e-06, "loss": 1.069, "step": 636 }, { "epoch": 0.1273331500961995, "grad_norm": 2.125, "learning_rate": 9.968010867895832e-06, "loss": 1.1027, "step": 637 }, { "epoch": 0.12753304515129557, "grad_norm": 2.265625, "learning_rate": 9.967891744603173e-06, "loss": 1.1573, "step": 638 }, { "epoch": 0.12773294020639164, "grad_norm": 2.28125, "learning_rate": 9.967772400637292e-06, "loss": 1.1458, "step": 639 }, { "epoch": 0.12793283526148772, "grad_norm": 2.21875, "learning_rate": 9.96765283600349e-06, "loss": 1.1447, "step": 640 }, { "epoch": 0.1281327303165838, "grad_norm": 2.296875, "learning_rate": 9.967533050707081e-06, "loss": 1.1196, "step": 641 }, { "epoch": 0.12833262537167986, "grad_norm": 2.171875, "learning_rate": 9.967413044753385e-06, "loss": 1.1622, "step": 642 }, { "epoch": 0.12853252042677593, "grad_norm": 2.15625, "learning_rate": 9.967292818147731e-06, "loss": 1.1221, "step": 643 }, { "epoch": 0.128732415481872, "grad_norm": 2.109375, "learning_rate": 9.967172370895462e-06, "loss": 1.0577, "step": 644 }, { "epoch": 0.1289323105369681, "grad_norm": 2.125, "learning_rate": 9.967051703001926e-06, "loss": 0.9921, "step": 645 }, { "epoch": 0.12913220559206418, "grad_norm": 2.265625, "learning_rate": 9.966930814472484e-06, "loss": 1.2149, "step": 646 }, { "epoch": 0.12933210064716025, "grad_norm": 2.28125, "learning_rate": 9.966809705312506e-06, "loss": 1.1856, "step": 647 }, { "epoch": 0.12953199570225632, "grad_norm": 2.171875, "learning_rate": 9.96668837552737e-06, "loss": 1.1351, "step": 648 }, { "epoch": 0.1297318907573524, "grad_norm": 2.171875, "learning_rate": 9.966566825122467e-06, "loss": 1.059, "step": 649 }, { "epoch": 0.12993178581244846, "grad_norm": 2.390625, "learning_rate": 9.966445054103198e-06, "loss": 1.1092, "step": 650 }, { "epoch": 0.13013168086754454, "grad_norm": 2.296875, "learning_rate": 9.96632306247497e-06, "loss": 1.126, "step": 651 }, { "epoch": 0.1303315759226406, "grad_norm": 2.1875, "learning_rate": 9.9662008502432e-06, "loss": 1.1809, "step": 652 }, { "epoch": 0.13053147097773668, "grad_norm": 2.15625, "learning_rate": 9.96607841741332e-06, "loss": 1.1616, "step": 653 }, { "epoch": 0.13073136603283275, "grad_norm": 2.265625, "learning_rate": 9.965955763990765e-06, "loss": 1.0585, "step": 654 }, { "epoch": 0.13093126108792882, "grad_norm": 2.203125, "learning_rate": 9.965832889980987e-06, "loss": 1.0355, "step": 655 }, { "epoch": 0.13113115614302492, "grad_norm": 2.25, "learning_rate": 9.965709795389441e-06, "loss": 1.2128, "step": 656 }, { "epoch": 0.131331051198121, "grad_norm": 2.265625, "learning_rate": 9.965586480221599e-06, "loss": 1.1607, "step": 657 }, { "epoch": 0.13153094625321707, "grad_norm": 2.46875, "learning_rate": 9.965462944482935e-06, "loss": 1.2712, "step": 658 }, { "epoch": 0.13173084130831314, "grad_norm": 2.28125, "learning_rate": 9.965339188178936e-06, "loss": 1.143, "step": 659 }, { "epoch": 0.1319307363634092, "grad_norm": 2.296875, "learning_rate": 9.965215211315103e-06, "loss": 1.1531, "step": 660 }, { "epoch": 0.13213063141850528, "grad_norm": 2.15625, "learning_rate": 9.965091013896937e-06, "loss": 1.0669, "step": 661 }, { "epoch": 0.13233052647360136, "grad_norm": 2.125, "learning_rate": 9.964966595929961e-06, "loss": 1.0308, "step": 662 }, { "epoch": 0.13253042152869743, "grad_norm": 2.265625, "learning_rate": 9.9648419574197e-06, "loss": 1.1347, "step": 663 }, { "epoch": 0.1327303165837935, "grad_norm": 2.078125, "learning_rate": 9.964717098371687e-06, "loss": 1.1258, "step": 664 }, { "epoch": 0.13293021163888957, "grad_norm": 2.125, "learning_rate": 9.964592018791473e-06, "loss": 1.2204, "step": 665 }, { "epoch": 0.13313010669398564, "grad_norm": 2.078125, "learning_rate": 9.964466718684609e-06, "loss": 1.1216, "step": 666 }, { "epoch": 0.13333000174908174, "grad_norm": 2.1875, "learning_rate": 9.964341198056665e-06, "loss": 1.1338, "step": 667 }, { "epoch": 0.13352989680417782, "grad_norm": 2.21875, "learning_rate": 9.964215456913215e-06, "loss": 1.1389, "step": 668 }, { "epoch": 0.1337297918592739, "grad_norm": 2.25, "learning_rate": 9.964089495259846e-06, "loss": 1.2297, "step": 669 }, { "epoch": 0.13392968691436996, "grad_norm": 2.1875, "learning_rate": 9.96396331310215e-06, "loss": 1.1715, "step": 670 }, { "epoch": 0.13412958196946603, "grad_norm": 2.046875, "learning_rate": 9.963836910445735e-06, "loss": 1.0458, "step": 671 }, { "epoch": 0.1343294770245621, "grad_norm": 2.25, "learning_rate": 9.963710287296213e-06, "loss": 1.1225, "step": 672 }, { "epoch": 0.13452937207965818, "grad_norm": 2.203125, "learning_rate": 9.963583443659213e-06, "loss": 1.1095, "step": 673 }, { "epoch": 0.13472926713475425, "grad_norm": 2.140625, "learning_rate": 9.963456379540364e-06, "loss": 1.1456, "step": 674 }, { "epoch": 0.13492916218985032, "grad_norm": 2.1875, "learning_rate": 9.963329094945313e-06, "loss": 1.1126, "step": 675 }, { "epoch": 0.1351290572449464, "grad_norm": 2.234375, "learning_rate": 9.963201589879715e-06, "loss": 1.0257, "step": 676 }, { "epoch": 0.13532895230004247, "grad_norm": 2.09375, "learning_rate": 9.963073864349232e-06, "loss": 1.0276, "step": 677 }, { "epoch": 0.13552884735513857, "grad_norm": 2.21875, "learning_rate": 9.962945918359537e-06, "loss": 1.031, "step": 678 }, { "epoch": 0.13572874241023464, "grad_norm": 2.125, "learning_rate": 9.962817751916316e-06, "loss": 1.0606, "step": 679 }, { "epoch": 0.1359286374653307, "grad_norm": 2.078125, "learning_rate": 9.962689365025259e-06, "loss": 1.0507, "step": 680 }, { "epoch": 0.13612853252042678, "grad_norm": 2.296875, "learning_rate": 9.962560757692072e-06, "loss": 1.2604, "step": 681 }, { "epoch": 0.13632842757552285, "grad_norm": 2.125, "learning_rate": 9.962431929922464e-06, "loss": 1.1347, "step": 682 }, { "epoch": 0.13652832263061893, "grad_norm": 2.171875, "learning_rate": 9.962302881722162e-06, "loss": 1.0511, "step": 683 }, { "epoch": 0.136728217685715, "grad_norm": 2.34375, "learning_rate": 9.962173613096895e-06, "loss": 1.0503, "step": 684 }, { "epoch": 0.13692811274081107, "grad_norm": 2.140625, "learning_rate": 9.962044124052406e-06, "loss": 1.0673, "step": 685 }, { "epoch": 0.13712800779590714, "grad_norm": 2.34375, "learning_rate": 9.961914414594447e-06, "loss": 1.2253, "step": 686 }, { "epoch": 0.13732790285100321, "grad_norm": 2.15625, "learning_rate": 9.96178448472878e-06, "loss": 1.0849, "step": 687 }, { "epoch": 0.13752779790609929, "grad_norm": 2.0625, "learning_rate": 9.961654334461175e-06, "loss": 1.0873, "step": 688 }, { "epoch": 0.13772769296119539, "grad_norm": 2.078125, "learning_rate": 9.961523963797415e-06, "loss": 1.1177, "step": 689 }, { "epoch": 0.13792758801629146, "grad_norm": 2.25, "learning_rate": 9.961393372743291e-06, "loss": 1.1586, "step": 690 }, { "epoch": 0.13812748307138753, "grad_norm": 2.296875, "learning_rate": 9.961262561304604e-06, "loss": 1.1439, "step": 691 }, { "epoch": 0.1383273781264836, "grad_norm": 2.296875, "learning_rate": 9.961131529487161e-06, "loss": 1.143, "step": 692 }, { "epoch": 0.13852727318157967, "grad_norm": 2.203125, "learning_rate": 9.961000277296788e-06, "loss": 1.121, "step": 693 }, { "epoch": 0.13872716823667575, "grad_norm": 2.203125, "learning_rate": 9.960868804739312e-06, "loss": 1.1314, "step": 694 }, { "epoch": 0.13892706329177182, "grad_norm": 2.25, "learning_rate": 9.960737111820572e-06, "loss": 1.1114, "step": 695 }, { "epoch": 0.1391269583468679, "grad_norm": 2.28125, "learning_rate": 9.96060519854642e-06, "loss": 1.22, "step": 696 }, { "epoch": 0.13932685340196396, "grad_norm": 2.28125, "learning_rate": 9.960473064922716e-06, "loss": 1.2051, "step": 697 }, { "epoch": 0.13952674845706003, "grad_norm": 2.15625, "learning_rate": 9.960340710955327e-06, "loss": 1.0331, "step": 698 }, { "epoch": 0.1397266435121561, "grad_norm": 2.21875, "learning_rate": 9.960208136650137e-06, "loss": 1.1898, "step": 699 }, { "epoch": 0.13992653856725218, "grad_norm": 2.265625, "learning_rate": 9.960075342013027e-06, "loss": 1.1642, "step": 700 }, { "epoch": 0.14012643362234828, "grad_norm": 2.46875, "learning_rate": 9.959942327049901e-06, "loss": 1.1635, "step": 701 }, { "epoch": 0.14032632867744435, "grad_norm": 2.21875, "learning_rate": 9.959809091766667e-06, "loss": 1.1644, "step": 702 }, { "epoch": 0.14052622373254042, "grad_norm": 2.234375, "learning_rate": 9.959675636169242e-06, "loss": 1.1439, "step": 703 }, { "epoch": 0.1407261187876365, "grad_norm": 2.1875, "learning_rate": 9.959541960263557e-06, "loss": 1.0681, "step": 704 }, { "epoch": 0.14092601384273257, "grad_norm": 2.15625, "learning_rate": 9.959408064055547e-06, "loss": 1.1353, "step": 705 }, { "epoch": 0.14112590889782864, "grad_norm": 2.109375, "learning_rate": 9.95927394755116e-06, "loss": 1.1704, "step": 706 }, { "epoch": 0.1413258039529247, "grad_norm": 2.234375, "learning_rate": 9.959139610756353e-06, "loss": 1.2734, "step": 707 }, { "epoch": 0.14152569900802078, "grad_norm": 2.171875, "learning_rate": 9.959005053677096e-06, "loss": 1.1302, "step": 708 }, { "epoch": 0.14172559406311686, "grad_norm": 2.21875, "learning_rate": 9.958870276319364e-06, "loss": 1.1517, "step": 709 }, { "epoch": 0.14192548911821293, "grad_norm": 2.03125, "learning_rate": 9.958735278689143e-06, "loss": 1.0484, "step": 710 }, { "epoch": 0.142125384173309, "grad_norm": 2.3125, "learning_rate": 9.958600060792429e-06, "loss": 1.1174, "step": 711 }, { "epoch": 0.1423252792284051, "grad_norm": 2.203125, "learning_rate": 9.958464622635233e-06, "loss": 1.1346, "step": 712 }, { "epoch": 0.14252517428350117, "grad_norm": 2.171875, "learning_rate": 9.958328964223566e-06, "loss": 1.1224, "step": 713 }, { "epoch": 0.14272506933859724, "grad_norm": 2.234375, "learning_rate": 9.958193085563456e-06, "loss": 1.1046, "step": 714 }, { "epoch": 0.14292496439369332, "grad_norm": 2.1875, "learning_rate": 9.95805698666094e-06, "loss": 1.0429, "step": 715 }, { "epoch": 0.1431248594487894, "grad_norm": 2.25, "learning_rate": 9.957920667522063e-06, "loss": 1.2122, "step": 716 }, { "epoch": 0.14332475450388546, "grad_norm": 2.15625, "learning_rate": 9.957784128152877e-06, "loss": 1.1542, "step": 717 }, { "epoch": 0.14352464955898153, "grad_norm": 2.21875, "learning_rate": 9.957647368559451e-06, "loss": 1.1105, "step": 718 }, { "epoch": 0.1437245446140776, "grad_norm": 2.203125, "learning_rate": 9.957510388747858e-06, "loss": 1.2298, "step": 719 }, { "epoch": 0.14392443966917368, "grad_norm": 2.125, "learning_rate": 9.957373188724184e-06, "loss": 1.0803, "step": 720 }, { "epoch": 0.14412433472426975, "grad_norm": 2.109375, "learning_rate": 9.95723576849452e-06, "loss": 1.0925, "step": 721 }, { "epoch": 0.14432422977936582, "grad_norm": 2.3125, "learning_rate": 9.957098128064974e-06, "loss": 1.1767, "step": 722 }, { "epoch": 0.14452412483446192, "grad_norm": 2.15625, "learning_rate": 9.95696026744166e-06, "loss": 1.0755, "step": 723 }, { "epoch": 0.144724019889558, "grad_norm": 2.171875, "learning_rate": 9.956822186630697e-06, "loss": 1.1333, "step": 724 }, { "epoch": 0.14492391494465406, "grad_norm": 2.234375, "learning_rate": 9.956683885638225e-06, "loss": 1.1836, "step": 725 }, { "epoch": 0.14512380999975014, "grad_norm": 2.234375, "learning_rate": 9.956545364470383e-06, "loss": 1.1527, "step": 726 }, { "epoch": 0.1453237050548462, "grad_norm": 2.03125, "learning_rate": 9.956406623133326e-06, "loss": 1.0995, "step": 727 }, { "epoch": 0.14552360010994228, "grad_norm": 2.21875, "learning_rate": 9.956267661633215e-06, "loss": 1.1751, "step": 728 }, { "epoch": 0.14572349516503835, "grad_norm": 2.234375, "learning_rate": 9.956128479976223e-06, "loss": 1.2085, "step": 729 }, { "epoch": 0.14592339022013442, "grad_norm": 2.171875, "learning_rate": 9.955989078168535e-06, "loss": 1.1981, "step": 730 }, { "epoch": 0.1461232852752305, "grad_norm": 2.0625, "learning_rate": 9.955849456216343e-06, "loss": 1.109, "step": 731 }, { "epoch": 0.14632318033032657, "grad_norm": 2.171875, "learning_rate": 9.955709614125844e-06, "loss": 1.1483, "step": 732 }, { "epoch": 0.14652307538542264, "grad_norm": 2.15625, "learning_rate": 9.955569551903255e-06, "loss": 1.1024, "step": 733 }, { "epoch": 0.14672297044051874, "grad_norm": 2.046875, "learning_rate": 9.955429269554795e-06, "loss": 1.0162, "step": 734 }, { "epoch": 0.1469228654956148, "grad_norm": 2.125, "learning_rate": 9.955288767086698e-06, "loss": 1.1791, "step": 735 }, { "epoch": 0.14712276055071088, "grad_norm": 2.1875, "learning_rate": 9.9551480445052e-06, "loss": 1.156, "step": 736 }, { "epoch": 0.14732265560580696, "grad_norm": 2.1875, "learning_rate": 9.955007101816558e-06, "loss": 1.1466, "step": 737 }, { "epoch": 0.14752255066090303, "grad_norm": 2.03125, "learning_rate": 9.954865939027028e-06, "loss": 1.0374, "step": 738 }, { "epoch": 0.1477224457159991, "grad_norm": 2.0625, "learning_rate": 9.954724556142884e-06, "loss": 1.0774, "step": 739 }, { "epoch": 0.14792234077109517, "grad_norm": 2.1875, "learning_rate": 9.954582953170403e-06, "loss": 1.0355, "step": 740 }, { "epoch": 0.14812223582619125, "grad_norm": 2.21875, "learning_rate": 9.954441130115876e-06, "loss": 1.1776, "step": 741 }, { "epoch": 0.14832213088128732, "grad_norm": 2.203125, "learning_rate": 9.954299086985604e-06, "loss": 1.1553, "step": 742 }, { "epoch": 0.1485220259363834, "grad_norm": 2.171875, "learning_rate": 9.954156823785896e-06, "loss": 1.0897, "step": 743 }, { "epoch": 0.14872192099147946, "grad_norm": 2.171875, "learning_rate": 9.95401434052307e-06, "loss": 1.141, "step": 744 }, { "epoch": 0.14892181604657556, "grad_norm": 2.125, "learning_rate": 9.953871637203456e-06, "loss": 1.0888, "step": 745 }, { "epoch": 0.14912171110167163, "grad_norm": 2.109375, "learning_rate": 9.953728713833395e-06, "loss": 1.0699, "step": 746 }, { "epoch": 0.1493216061567677, "grad_norm": 2.078125, "learning_rate": 9.95358557041923e-06, "loss": 1.0551, "step": 747 }, { "epoch": 0.14952150121186378, "grad_norm": 2.203125, "learning_rate": 9.953442206967327e-06, "loss": 1.0822, "step": 748 }, { "epoch": 0.14972139626695985, "grad_norm": 2.078125, "learning_rate": 9.953298623484049e-06, "loss": 1.0417, "step": 749 }, { "epoch": 0.14992129132205592, "grad_norm": 2.296875, "learning_rate": 9.953154819975773e-06, "loss": 1.2122, "step": 750 }, { "epoch": 0.150121186377152, "grad_norm": 2.125, "learning_rate": 9.95301079644889e-06, "loss": 1.0791, "step": 751 }, { "epoch": 0.15032108143224807, "grad_norm": 2.25, "learning_rate": 9.952866552909797e-06, "loss": 1.1611, "step": 752 }, { "epoch": 0.15052097648734414, "grad_norm": 2.265625, "learning_rate": 9.9527220893649e-06, "loss": 1.1831, "step": 753 }, { "epoch": 0.1507208715424402, "grad_norm": 2.21875, "learning_rate": 9.952577405820618e-06, "loss": 1.1212, "step": 754 }, { "epoch": 0.15092076659753628, "grad_norm": 2.1875, "learning_rate": 9.952432502283378e-06, "loss": 1.0424, "step": 755 }, { "epoch": 0.15112066165263235, "grad_norm": 2.0625, "learning_rate": 9.952287378759613e-06, "loss": 1.0381, "step": 756 }, { "epoch": 0.15132055670772845, "grad_norm": 2.1875, "learning_rate": 9.95214203525577e-06, "loss": 1.1073, "step": 757 }, { "epoch": 0.15152045176282453, "grad_norm": 2.203125, "learning_rate": 9.951996471778308e-06, "loss": 1.092, "step": 758 }, { "epoch": 0.1517203468179206, "grad_norm": 2.390625, "learning_rate": 9.951850688333693e-06, "loss": 1.1301, "step": 759 }, { "epoch": 0.15192024187301667, "grad_norm": 2.234375, "learning_rate": 9.951704684928398e-06, "loss": 1.123, "step": 760 }, { "epoch": 0.15212013692811274, "grad_norm": 2.25, "learning_rate": 9.95155846156891e-06, "loss": 1.0566, "step": 761 }, { "epoch": 0.15232003198320881, "grad_norm": 2.203125, "learning_rate": 9.951412018261724e-06, "loss": 1.0931, "step": 762 }, { "epoch": 0.1525199270383049, "grad_norm": 2.0625, "learning_rate": 9.951265355013345e-06, "loss": 0.9994, "step": 763 }, { "epoch": 0.15271982209340096, "grad_norm": 2.21875, "learning_rate": 9.951118471830287e-06, "loss": 1.0814, "step": 764 }, { "epoch": 0.15291971714849703, "grad_norm": 2.3125, "learning_rate": 9.950971368719077e-06, "loss": 1.1147, "step": 765 }, { "epoch": 0.1531196122035931, "grad_norm": 2.109375, "learning_rate": 9.950824045686247e-06, "loss": 1.0999, "step": 766 }, { "epoch": 0.15331950725868917, "grad_norm": 2.265625, "learning_rate": 9.95067650273834e-06, "loss": 1.1821, "step": 767 }, { "epoch": 0.15351940231378527, "grad_norm": 2.046875, "learning_rate": 9.950528739881915e-06, "loss": 1.1256, "step": 768 }, { "epoch": 0.15371929736888135, "grad_norm": 2.03125, "learning_rate": 9.95038075712353e-06, "loss": 1.1463, "step": 769 }, { "epoch": 0.15391919242397742, "grad_norm": 2.1875, "learning_rate": 9.950232554469759e-06, "loss": 1.0449, "step": 770 }, { "epoch": 0.1541190874790735, "grad_norm": 2.15625, "learning_rate": 9.950084131927188e-06, "loss": 1.0059, "step": 771 }, { "epoch": 0.15431898253416956, "grad_norm": 2.109375, "learning_rate": 9.949935489502409e-06, "loss": 1.0697, "step": 772 }, { "epoch": 0.15451887758926564, "grad_norm": 2.28125, "learning_rate": 9.949786627202023e-06, "loss": 1.1426, "step": 773 }, { "epoch": 0.1547187726443617, "grad_norm": 2.15625, "learning_rate": 9.949637545032644e-06, "loss": 1.2014, "step": 774 }, { "epoch": 0.15491866769945778, "grad_norm": 2.15625, "learning_rate": 9.949488243000895e-06, "loss": 1.0977, "step": 775 }, { "epoch": 0.15511856275455385, "grad_norm": 2.109375, "learning_rate": 9.949338721113406e-06, "loss": 1.0963, "step": 776 }, { "epoch": 0.15531845780964992, "grad_norm": 2.1875, "learning_rate": 9.94918897937682e-06, "loss": 1.0902, "step": 777 }, { "epoch": 0.155518352864746, "grad_norm": 2.0625, "learning_rate": 9.949039017797788e-06, "loss": 1.0116, "step": 778 }, { "epoch": 0.1557182479198421, "grad_norm": 2.203125, "learning_rate": 9.948888836382971e-06, "loss": 1.1572, "step": 779 }, { "epoch": 0.15591814297493817, "grad_norm": 2.1875, "learning_rate": 9.948738435139042e-06, "loss": 1.0033, "step": 780 }, { "epoch": 0.15611803803003424, "grad_norm": 2.3125, "learning_rate": 9.948587814072679e-06, "loss": 0.9917, "step": 781 }, { "epoch": 0.1563179330851303, "grad_norm": 2.125, "learning_rate": 9.948436973190574e-06, "loss": 1.1254, "step": 782 }, { "epoch": 0.15651782814022638, "grad_norm": 2.1875, "learning_rate": 9.948285912499427e-06, "loss": 1.04, "step": 783 }, { "epoch": 0.15671772319532246, "grad_norm": 2.171875, "learning_rate": 9.948134632005948e-06, "loss": 1.1461, "step": 784 }, { "epoch": 0.15691761825041853, "grad_norm": 2.140625, "learning_rate": 9.947983131716858e-06, "loss": 1.1382, "step": 785 }, { "epoch": 0.1571175133055146, "grad_norm": 2.140625, "learning_rate": 9.947831411638884e-06, "loss": 1.0689, "step": 786 }, { "epoch": 0.15731740836061067, "grad_norm": 2.21875, "learning_rate": 9.947679471778768e-06, "loss": 1.0443, "step": 787 }, { "epoch": 0.15751730341570674, "grad_norm": 2.078125, "learning_rate": 9.947527312143259e-06, "loss": 1.0321, "step": 788 }, { "epoch": 0.15771719847080282, "grad_norm": 2.34375, "learning_rate": 9.947374932739115e-06, "loss": 1.233, "step": 789 }, { "epoch": 0.15791709352589892, "grad_norm": 2.265625, "learning_rate": 9.947222333573105e-06, "loss": 1.1441, "step": 790 }, { "epoch": 0.158116988580995, "grad_norm": 2.078125, "learning_rate": 9.947069514652006e-06, "loss": 1.055, "step": 791 }, { "epoch": 0.15831688363609106, "grad_norm": 2.296875, "learning_rate": 9.94691647598261e-06, "loss": 1.1694, "step": 792 }, { "epoch": 0.15851677869118713, "grad_norm": 2.078125, "learning_rate": 9.946763217571712e-06, "loss": 0.9969, "step": 793 }, { "epoch": 0.1587166737462832, "grad_norm": 2.140625, "learning_rate": 9.946609739426119e-06, "loss": 1.1176, "step": 794 }, { "epoch": 0.15891656880137928, "grad_norm": 2.296875, "learning_rate": 9.94645604155265e-06, "loss": 1.1539, "step": 795 }, { "epoch": 0.15911646385647535, "grad_norm": 2.09375, "learning_rate": 9.94630212395813e-06, "loss": 1.1059, "step": 796 }, { "epoch": 0.15931635891157142, "grad_norm": 2.15625, "learning_rate": 9.9461479866494e-06, "loss": 1.1263, "step": 797 }, { "epoch": 0.1595162539666675, "grad_norm": 2.125, "learning_rate": 9.945993629633305e-06, "loss": 1.1279, "step": 798 }, { "epoch": 0.15971614902176356, "grad_norm": 2.234375, "learning_rate": 9.945839052916702e-06, "loss": 1.0915, "step": 799 }, { "epoch": 0.15991604407685964, "grad_norm": 2.359375, "learning_rate": 9.945684256506454e-06, "loss": 1.2039, "step": 800 }, { "epoch": 0.16011593913195574, "grad_norm": 2.265625, "learning_rate": 9.945529240409442e-06, "loss": 1.1241, "step": 801 }, { "epoch": 0.1603158341870518, "grad_norm": 2.28125, "learning_rate": 9.945374004632547e-06, "loss": 1.1659, "step": 802 }, { "epoch": 0.16051572924214788, "grad_norm": 2.265625, "learning_rate": 9.945218549182668e-06, "loss": 1.1306, "step": 803 }, { "epoch": 0.16071562429724395, "grad_norm": 2.3125, "learning_rate": 9.945062874066709e-06, "loss": 1.0898, "step": 804 }, { "epoch": 0.16091551935234003, "grad_norm": 2.25, "learning_rate": 9.944906979291587e-06, "loss": 1.1502, "step": 805 }, { "epoch": 0.1611154144074361, "grad_norm": 2.25, "learning_rate": 9.944750864864224e-06, "loss": 1.1441, "step": 806 }, { "epoch": 0.16131530946253217, "grad_norm": 2.09375, "learning_rate": 9.944594530791553e-06, "loss": 1.0863, "step": 807 }, { "epoch": 0.16151520451762824, "grad_norm": 2.078125, "learning_rate": 9.944437977080525e-06, "loss": 1.1074, "step": 808 }, { "epoch": 0.1617150995727243, "grad_norm": 2.03125, "learning_rate": 9.944281203738087e-06, "loss": 1.0624, "step": 809 }, { "epoch": 0.16191499462782039, "grad_norm": 2.203125, "learning_rate": 9.944124210771209e-06, "loss": 1.2287, "step": 810 }, { "epoch": 0.16211488968291646, "grad_norm": 2.25, "learning_rate": 9.94396699818686e-06, "loss": 1.1621, "step": 811 }, { "epoch": 0.16231478473801253, "grad_norm": 2.265625, "learning_rate": 9.943809565992024e-06, "loss": 1.1585, "step": 812 }, { "epoch": 0.16251467979310863, "grad_norm": 2.15625, "learning_rate": 9.943651914193697e-06, "loss": 1.0682, "step": 813 }, { "epoch": 0.1627145748482047, "grad_norm": 2.15625, "learning_rate": 9.943494042798878e-06, "loss": 1.101, "step": 814 }, { "epoch": 0.16291446990330077, "grad_norm": 2.3125, "learning_rate": 9.943335951814583e-06, "loss": 1.1353, "step": 815 }, { "epoch": 0.16311436495839685, "grad_norm": 2.140625, "learning_rate": 9.943177641247833e-06, "loss": 1.0897, "step": 816 }, { "epoch": 0.16331426001349292, "grad_norm": 5.25, "learning_rate": 9.94301911110566e-06, "loss": 0.9894, "step": 817 }, { "epoch": 0.163514155068589, "grad_norm": 2.234375, "learning_rate": 9.942860361395106e-06, "loss": 1.095, "step": 818 }, { "epoch": 0.16371405012368506, "grad_norm": 2.328125, "learning_rate": 9.942701392123222e-06, "loss": 1.1018, "step": 819 }, { "epoch": 0.16391394517878113, "grad_norm": 2.125, "learning_rate": 9.942542203297072e-06, "loss": 1.1963, "step": 820 }, { "epoch": 0.1641138402338772, "grad_norm": 2.09375, "learning_rate": 9.942382794923723e-06, "loss": 1.0891, "step": 821 }, { "epoch": 0.16431373528897328, "grad_norm": 2.1875, "learning_rate": 9.94222316701026e-06, "loss": 1.1304, "step": 822 }, { "epoch": 0.16451363034406935, "grad_norm": 2.125, "learning_rate": 9.942063319563769e-06, "loss": 1.0862, "step": 823 }, { "epoch": 0.16471352539916545, "grad_norm": 3.203125, "learning_rate": 9.941903252591356e-06, "loss": 1.1904, "step": 824 }, { "epoch": 0.16491342045426152, "grad_norm": 2.09375, "learning_rate": 9.941742966100128e-06, "loss": 1.1039, "step": 825 }, { "epoch": 0.1651133155093576, "grad_norm": 2.296875, "learning_rate": 9.941582460097203e-06, "loss": 1.0461, "step": 826 }, { "epoch": 0.16531321056445367, "grad_norm": 2.328125, "learning_rate": 9.941421734589715e-06, "loss": 1.0681, "step": 827 }, { "epoch": 0.16551310561954974, "grad_norm": 2.03125, "learning_rate": 9.9412607895848e-06, "loss": 1.0501, "step": 828 }, { "epoch": 0.1657130006746458, "grad_norm": 2.3125, "learning_rate": 9.94109962508961e-06, "loss": 1.1685, "step": 829 }, { "epoch": 0.16591289572974188, "grad_norm": 2.3125, "learning_rate": 9.9409382411113e-06, "loss": 1.0765, "step": 830 }, { "epoch": 0.16611279078483795, "grad_norm": 2.109375, "learning_rate": 9.940776637657044e-06, "loss": 1.0488, "step": 831 }, { "epoch": 0.16631268583993403, "grad_norm": 2.046875, "learning_rate": 9.940614814734015e-06, "loss": 1.0233, "step": 832 }, { "epoch": 0.1665125808950301, "grad_norm": 2.125, "learning_rate": 9.940452772349405e-06, "loss": 1.1514, "step": 833 }, { "epoch": 0.16671247595012617, "grad_norm": 2.0625, "learning_rate": 9.940290510510411e-06, "loss": 1.0156, "step": 834 }, { "epoch": 0.16691237100522227, "grad_norm": 2.1875, "learning_rate": 9.940128029224239e-06, "loss": 1.0412, "step": 835 }, { "epoch": 0.16711226606031834, "grad_norm": 2.109375, "learning_rate": 9.939965328498107e-06, "loss": 1.1049, "step": 836 }, { "epoch": 0.16731216111541442, "grad_norm": 2.328125, "learning_rate": 9.939802408339244e-06, "loss": 1.173, "step": 837 }, { "epoch": 0.1675120561705105, "grad_norm": 2.3125, "learning_rate": 9.939639268754886e-06, "loss": 1.1309, "step": 838 }, { "epoch": 0.16771195122560656, "grad_norm": 2.15625, "learning_rate": 9.939475909752278e-06, "loss": 1.0505, "step": 839 }, { "epoch": 0.16791184628070263, "grad_norm": 2.296875, "learning_rate": 9.939312331338678e-06, "loss": 1.0639, "step": 840 }, { "epoch": 0.1681117413357987, "grad_norm": 2.03125, "learning_rate": 9.939148533521353e-06, "loss": 1.0412, "step": 841 }, { "epoch": 0.16831163639089478, "grad_norm": 2.15625, "learning_rate": 9.938984516307575e-06, "loss": 1.063, "step": 842 }, { "epoch": 0.16851153144599085, "grad_norm": 2.109375, "learning_rate": 9.938820279704635e-06, "loss": 1.1225, "step": 843 }, { "epoch": 0.16871142650108692, "grad_norm": 2.0625, "learning_rate": 9.938655823719823e-06, "loss": 1.1079, "step": 844 }, { "epoch": 0.168911321556183, "grad_norm": 2.1875, "learning_rate": 9.938491148360448e-06, "loss": 1.1533, "step": 845 }, { "epoch": 0.1691112166112791, "grad_norm": 2.28125, "learning_rate": 9.938326253633825e-06, "loss": 1.1815, "step": 846 }, { "epoch": 0.16931111166637516, "grad_norm": 2.15625, "learning_rate": 9.938161139547276e-06, "loss": 1.0423, "step": 847 }, { "epoch": 0.16951100672147124, "grad_norm": 2.0625, "learning_rate": 9.937995806108135e-06, "loss": 1.1081, "step": 848 }, { "epoch": 0.1697109017765673, "grad_norm": 2.1875, "learning_rate": 9.93783025332375e-06, "loss": 1.0819, "step": 849 }, { "epoch": 0.16991079683166338, "grad_norm": 2.21875, "learning_rate": 9.937664481201472e-06, "loss": 1.0859, "step": 850 }, { "epoch": 0.17011069188675945, "grad_norm": 2.171875, "learning_rate": 9.937498489748665e-06, "loss": 1.1142, "step": 851 }, { "epoch": 0.17031058694185552, "grad_norm": 2.3125, "learning_rate": 9.937332278972703e-06, "loss": 1.1683, "step": 852 }, { "epoch": 0.1705104819969516, "grad_norm": 2.109375, "learning_rate": 9.937165848880968e-06, "loss": 1.0265, "step": 853 }, { "epoch": 0.17071037705204767, "grad_norm": 2.15625, "learning_rate": 9.936999199480854e-06, "loss": 1.0466, "step": 854 }, { "epoch": 0.17091027210714374, "grad_norm": 2.140625, "learning_rate": 9.936832330779761e-06, "loss": 1.0995, "step": 855 }, { "epoch": 0.1711101671622398, "grad_norm": 2.21875, "learning_rate": 9.936665242785105e-06, "loss": 1.256, "step": 856 }, { "epoch": 0.1713100622173359, "grad_norm": 2.15625, "learning_rate": 9.936497935504306e-06, "loss": 1.0735, "step": 857 }, { "epoch": 0.17150995727243198, "grad_norm": 2.21875, "learning_rate": 9.936330408944794e-06, "loss": 1.1574, "step": 858 }, { "epoch": 0.17170985232752806, "grad_norm": 2.125, "learning_rate": 9.936162663114014e-06, "loss": 1.0946, "step": 859 }, { "epoch": 0.17190974738262413, "grad_norm": 2.4375, "learning_rate": 9.935994698019416e-06, "loss": 1.1149, "step": 860 }, { "epoch": 0.1721096424377202, "grad_norm": 2.09375, "learning_rate": 9.93582651366846e-06, "loss": 1.0987, "step": 861 }, { "epoch": 0.17230953749281627, "grad_norm": 2.125, "learning_rate": 9.935658110068618e-06, "loss": 1.0275, "step": 862 }, { "epoch": 0.17250943254791234, "grad_norm": 2.203125, "learning_rate": 9.93548948722737e-06, "loss": 1.0396, "step": 863 }, { "epoch": 0.17270932760300842, "grad_norm": 2.15625, "learning_rate": 9.935320645152205e-06, "loss": 1.0945, "step": 864 }, { "epoch": 0.1729092226581045, "grad_norm": 2.171875, "learning_rate": 9.935151583850624e-06, "loss": 1.1701, "step": 865 }, { "epoch": 0.17310911771320056, "grad_norm": 2.171875, "learning_rate": 9.934982303330138e-06, "loss": 1.0033, "step": 866 }, { "epoch": 0.17330901276829663, "grad_norm": 2.21875, "learning_rate": 9.934812803598265e-06, "loss": 1.027, "step": 867 }, { "epoch": 0.1735089078233927, "grad_norm": 2.140625, "learning_rate": 9.934643084662533e-06, "loss": 1.1306, "step": 868 }, { "epoch": 0.1737088028784888, "grad_norm": 2.28125, "learning_rate": 9.934473146530483e-06, "loss": 1.1985, "step": 869 }, { "epoch": 0.17390869793358488, "grad_norm": 2.234375, "learning_rate": 9.934302989209663e-06, "loss": 1.2504, "step": 870 }, { "epoch": 0.17410859298868095, "grad_norm": 2.109375, "learning_rate": 9.934132612707631e-06, "loss": 1.0985, "step": 871 }, { "epoch": 0.17430848804377702, "grad_norm": 2.1875, "learning_rate": 9.933962017031957e-06, "loss": 1.1471, "step": 872 }, { "epoch": 0.1745083830988731, "grad_norm": 2.296875, "learning_rate": 9.933791202190215e-06, "loss": 1.0599, "step": 873 }, { "epoch": 0.17470827815396917, "grad_norm": 2.203125, "learning_rate": 9.933620168189995e-06, "loss": 1.162, "step": 874 }, { "epoch": 0.17490817320906524, "grad_norm": 2.125, "learning_rate": 9.933448915038895e-06, "loss": 1.083, "step": 875 }, { "epoch": 0.1751080682641613, "grad_norm": 2.15625, "learning_rate": 9.93327744274452e-06, "loss": 1.116, "step": 876 }, { "epoch": 0.17530796331925738, "grad_norm": 2.0625, "learning_rate": 9.933105751314489e-06, "loss": 0.98, "step": 877 }, { "epoch": 0.17550785837435345, "grad_norm": 2.109375, "learning_rate": 9.932933840756428e-06, "loss": 1.0147, "step": 878 }, { "epoch": 0.17570775342944953, "grad_norm": 2.140625, "learning_rate": 9.93276171107797e-06, "loss": 1.0976, "step": 879 }, { "epoch": 0.17590764848454563, "grad_norm": 2.1875, "learning_rate": 9.932589362286766e-06, "loss": 1.0766, "step": 880 }, { "epoch": 0.1761075435396417, "grad_norm": 2.0625, "learning_rate": 9.932416794390467e-06, "loss": 1.0134, "step": 881 }, { "epoch": 0.17630743859473777, "grad_norm": 2.015625, "learning_rate": 9.932244007396742e-06, "loss": 1.0746, "step": 882 }, { "epoch": 0.17650733364983384, "grad_norm": 2.15625, "learning_rate": 9.932071001313265e-06, "loss": 1.066, "step": 883 }, { "epoch": 0.17670722870492991, "grad_norm": 2.203125, "learning_rate": 9.931897776147724e-06, "loss": 1.1709, "step": 884 }, { "epoch": 0.17690712376002599, "grad_norm": 2.3125, "learning_rate": 9.931724331907806e-06, "loss": 1.15, "step": 885 }, { "epoch": 0.17710701881512206, "grad_norm": 2.34375, "learning_rate": 9.931550668601222e-06, "loss": 1.1492, "step": 886 }, { "epoch": 0.17730691387021813, "grad_norm": 2.265625, "learning_rate": 9.931376786235684e-06, "loss": 1.1116, "step": 887 }, { "epoch": 0.1775068089253142, "grad_norm": 2.4375, "learning_rate": 9.931202684818914e-06, "loss": 1.1235, "step": 888 }, { "epoch": 0.17770670398041027, "grad_norm": 2.125, "learning_rate": 9.931028364358651e-06, "loss": 1.0914, "step": 889 }, { "epoch": 0.17790659903550635, "grad_norm": 2.21875, "learning_rate": 9.930853824862632e-06, "loss": 1.1155, "step": 890 }, { "epoch": 0.17810649409060245, "grad_norm": 2.046875, "learning_rate": 9.930679066338613e-06, "loss": 1.0528, "step": 891 }, { "epoch": 0.17830638914569852, "grad_norm": 2.109375, "learning_rate": 9.930504088794356e-06, "loss": 1.1009, "step": 892 }, { "epoch": 0.1785062842007946, "grad_norm": 2.15625, "learning_rate": 9.930328892237636e-06, "loss": 1.1056, "step": 893 }, { "epoch": 0.17870617925589066, "grad_norm": 2.09375, "learning_rate": 9.930153476676231e-06, "loss": 1.0138, "step": 894 }, { "epoch": 0.17890607431098673, "grad_norm": 2.28125, "learning_rate": 9.929977842117935e-06, "loss": 1.0252, "step": 895 }, { "epoch": 0.1791059693660828, "grad_norm": 2.140625, "learning_rate": 9.92980198857055e-06, "loss": 1.0429, "step": 896 }, { "epoch": 0.17930586442117888, "grad_norm": 2.109375, "learning_rate": 9.92962591604189e-06, "loss": 1.0689, "step": 897 }, { "epoch": 0.17950575947627495, "grad_norm": 2.125, "learning_rate": 9.929449624539772e-06, "loss": 1.0321, "step": 898 }, { "epoch": 0.17970565453137102, "grad_norm": 2.125, "learning_rate": 9.929273114072027e-06, "loss": 1.0742, "step": 899 }, { "epoch": 0.1799055495864671, "grad_norm": 2.078125, "learning_rate": 9.929096384646498e-06, "loss": 1.1502, "step": 900 }, { "epoch": 0.18010544464156317, "grad_norm": 2.125, "learning_rate": 9.928919436271032e-06, "loss": 1.1121, "step": 901 }, { "epoch": 0.18030533969665927, "grad_norm": 2.140625, "learning_rate": 9.928742268953493e-06, "loss": 1.1333, "step": 902 }, { "epoch": 0.18050523475175534, "grad_norm": 2.125, "learning_rate": 9.928564882701749e-06, "loss": 1.1531, "step": 903 }, { "epoch": 0.1807051298068514, "grad_norm": 2.28125, "learning_rate": 9.928387277523676e-06, "loss": 1.1334, "step": 904 }, { "epoch": 0.18090502486194748, "grad_norm": 2.140625, "learning_rate": 9.92820945342717e-06, "loss": 1.2134, "step": 905 }, { "epoch": 0.18110491991704356, "grad_norm": 2.25, "learning_rate": 9.928031410420125e-06, "loss": 1.0337, "step": 906 }, { "epoch": 0.18130481497213963, "grad_norm": 2.1875, "learning_rate": 9.927853148510451e-06, "loss": 1.0733, "step": 907 }, { "epoch": 0.1815047100272357, "grad_norm": 2.40625, "learning_rate": 9.92767466770607e-06, "loss": 1.065, "step": 908 }, { "epoch": 0.18170460508233177, "grad_norm": 2.109375, "learning_rate": 9.927495968014903e-06, "loss": 1.1061, "step": 909 }, { "epoch": 0.18190450013742784, "grad_norm": 2.140625, "learning_rate": 9.92731704944489e-06, "loss": 1.1142, "step": 910 }, { "epoch": 0.18210439519252392, "grad_norm": 2.109375, "learning_rate": 9.92713791200398e-06, "loss": 1.0194, "step": 911 }, { "epoch": 0.18230429024762, "grad_norm": 2.1875, "learning_rate": 9.926958555700134e-06, "loss": 1.1316, "step": 912 }, { "epoch": 0.1825041853027161, "grad_norm": 2.078125, "learning_rate": 9.926778980541314e-06, "loss": 1.0904, "step": 913 }, { "epoch": 0.18270408035781216, "grad_norm": 2.109375, "learning_rate": 9.926599186535496e-06, "loss": 1.1301, "step": 914 }, { "epoch": 0.18290397541290823, "grad_norm": 2.015625, "learning_rate": 9.92641917369067e-06, "loss": 1.0447, "step": 915 }, { "epoch": 0.1831038704680043, "grad_norm": 2.140625, "learning_rate": 9.92623894201483e-06, "loss": 1.0149, "step": 916 }, { "epoch": 0.18330376552310038, "grad_norm": 2.09375, "learning_rate": 9.926058491515982e-06, "loss": 1.1744, "step": 917 }, { "epoch": 0.18350366057819645, "grad_norm": 2.125, "learning_rate": 9.92587782220214e-06, "loss": 1.0773, "step": 918 }, { "epoch": 0.18370355563329252, "grad_norm": 2.15625, "learning_rate": 9.925696934081335e-06, "loss": 1.0894, "step": 919 }, { "epoch": 0.1839034506883886, "grad_norm": 2.078125, "learning_rate": 9.925515827161596e-06, "loss": 1.1613, "step": 920 }, { "epoch": 0.18410334574348466, "grad_norm": 2.171875, "learning_rate": 9.925334501450972e-06, "loss": 1.1258, "step": 921 }, { "epoch": 0.18430324079858074, "grad_norm": 2.078125, "learning_rate": 9.925152956957513e-06, "loss": 1.0427, "step": 922 }, { "epoch": 0.1845031358536768, "grad_norm": 2.21875, "learning_rate": 9.924971193689287e-06, "loss": 1.0592, "step": 923 }, { "epoch": 0.18470303090877288, "grad_norm": 2.125, "learning_rate": 9.924789211654367e-06, "loss": 1.1031, "step": 924 }, { "epoch": 0.18490292596386898, "grad_norm": 2.0625, "learning_rate": 9.924607010860833e-06, "loss": 1.077, "step": 925 }, { "epoch": 0.18510282101896505, "grad_norm": 2.25, "learning_rate": 9.924424591316785e-06, "loss": 1.1194, "step": 926 }, { "epoch": 0.18530271607406112, "grad_norm": 2.125, "learning_rate": 9.924241953030323e-06, "loss": 1.0626, "step": 927 }, { "epoch": 0.1855026111291572, "grad_norm": 2.171875, "learning_rate": 9.924059096009556e-06, "loss": 1.1046, "step": 928 }, { "epoch": 0.18570250618425327, "grad_norm": 2.265625, "learning_rate": 9.923876020262613e-06, "loss": 1.1724, "step": 929 }, { "epoch": 0.18590240123934934, "grad_norm": 2.28125, "learning_rate": 9.923692725797622e-06, "loss": 1.1293, "step": 930 }, { "epoch": 0.1861022962944454, "grad_norm": 2.171875, "learning_rate": 9.923509212622726e-06, "loss": 1.1245, "step": 931 }, { "epoch": 0.18630219134954148, "grad_norm": 2.171875, "learning_rate": 9.923325480746077e-06, "loss": 1.0509, "step": 932 }, { "epoch": 0.18650208640463756, "grad_norm": 2.234375, "learning_rate": 9.923141530175835e-06, "loss": 1.0779, "step": 933 }, { "epoch": 0.18670198145973363, "grad_norm": 2.125, "learning_rate": 9.922957360920173e-06, "loss": 1.1274, "step": 934 }, { "epoch": 0.1869018765148297, "grad_norm": 2.140625, "learning_rate": 9.922772972987271e-06, "loss": 1.1678, "step": 935 }, { "epoch": 0.1871017715699258, "grad_norm": 2.140625, "learning_rate": 9.922588366385319e-06, "loss": 1.1291, "step": 936 }, { "epoch": 0.18730166662502187, "grad_norm": 2.234375, "learning_rate": 9.922403541122516e-06, "loss": 0.9955, "step": 937 }, { "epoch": 0.18750156168011795, "grad_norm": 2.234375, "learning_rate": 9.922218497207075e-06, "loss": 1.0822, "step": 938 }, { "epoch": 0.18770145673521402, "grad_norm": 2.140625, "learning_rate": 9.922033234647213e-06, "loss": 1.0365, "step": 939 }, { "epoch": 0.1879013517903101, "grad_norm": 2.03125, "learning_rate": 9.921847753451162e-06, "loss": 1.0929, "step": 940 }, { "epoch": 0.18810124684540616, "grad_norm": 2.34375, "learning_rate": 9.92166205362716e-06, "loss": 1.1478, "step": 941 }, { "epoch": 0.18830114190050223, "grad_norm": 2.0625, "learning_rate": 9.921476135183452e-06, "loss": 1.037, "step": 942 }, { "epoch": 0.1885010369555983, "grad_norm": 2.09375, "learning_rate": 9.921289998128303e-06, "loss": 1.0561, "step": 943 }, { "epoch": 0.18870093201069438, "grad_norm": 2.3125, "learning_rate": 9.921103642469976e-06, "loss": 1.0575, "step": 944 }, { "epoch": 0.18890082706579045, "grad_norm": 2.109375, "learning_rate": 9.92091706821675e-06, "loss": 1.1551, "step": 945 }, { "epoch": 0.18910072212088652, "grad_norm": 2.15625, "learning_rate": 9.920730275376915e-06, "loss": 1.09, "step": 946 }, { "epoch": 0.18930061717598262, "grad_norm": 2.15625, "learning_rate": 9.920543263958767e-06, "loss": 1.0898, "step": 947 }, { "epoch": 0.1895005122310787, "grad_norm": 2.484375, "learning_rate": 9.920356033970613e-06, "loss": 1.1994, "step": 948 }, { "epoch": 0.18970040728617477, "grad_norm": 2.296875, "learning_rate": 9.920168585420768e-06, "loss": 1.2403, "step": 949 }, { "epoch": 0.18990030234127084, "grad_norm": 2.125, "learning_rate": 9.91998091831756e-06, "loss": 1.2184, "step": 950 }, { "epoch": 0.1901001973963669, "grad_norm": 2.28125, "learning_rate": 9.919793032669324e-06, "loss": 1.2009, "step": 951 }, { "epoch": 0.19030009245146298, "grad_norm": 2.078125, "learning_rate": 9.91960492848441e-06, "loss": 1.0286, "step": 952 }, { "epoch": 0.19049998750655905, "grad_norm": 2.21875, "learning_rate": 9.91941660577117e-06, "loss": 1.0413, "step": 953 }, { "epoch": 0.19069988256165513, "grad_norm": 2.15625, "learning_rate": 9.919228064537968e-06, "loss": 1.0637, "step": 954 }, { "epoch": 0.1908997776167512, "grad_norm": 2.3125, "learning_rate": 9.91903930479318e-06, "loss": 1.206, "step": 955 }, { "epoch": 0.19109967267184727, "grad_norm": 2.046875, "learning_rate": 9.918850326545195e-06, "loss": 1.0584, "step": 956 }, { "epoch": 0.19129956772694334, "grad_norm": 2.15625, "learning_rate": 9.918661129802402e-06, "loss": 1.1496, "step": 957 }, { "epoch": 0.19149946278203944, "grad_norm": 2.046875, "learning_rate": 9.918471714573205e-06, "loss": 1.0116, "step": 958 }, { "epoch": 0.19169935783713551, "grad_norm": 2.21875, "learning_rate": 9.918282080866022e-06, "loss": 1.1686, "step": 959 }, { "epoch": 0.1918992528922316, "grad_norm": 2.265625, "learning_rate": 9.918092228689276e-06, "loss": 1.0539, "step": 960 }, { "epoch": 0.19209914794732766, "grad_norm": 2.125, "learning_rate": 9.917902158051395e-06, "loss": 1.1964, "step": 961 }, { "epoch": 0.19229904300242373, "grad_norm": 2.203125, "learning_rate": 9.917711868960826e-06, "loss": 1.1666, "step": 962 }, { "epoch": 0.1924989380575198, "grad_norm": 2.109375, "learning_rate": 9.917521361426023e-06, "loss": 1.0984, "step": 963 }, { "epoch": 0.19269883311261587, "grad_norm": 2.140625, "learning_rate": 9.917330635455445e-06, "loss": 1.137, "step": 964 }, { "epoch": 0.19289872816771195, "grad_norm": 2.0625, "learning_rate": 9.917139691057565e-06, "loss": 1.084, "step": 965 }, { "epoch": 0.19309862322280802, "grad_norm": 1.9921875, "learning_rate": 9.916948528240865e-06, "loss": 1.056, "step": 966 }, { "epoch": 0.1932985182779041, "grad_norm": 2.09375, "learning_rate": 9.916757147013837e-06, "loss": 1.0956, "step": 967 }, { "epoch": 0.19349841333300016, "grad_norm": 2.25, "learning_rate": 9.916565547384981e-06, "loss": 1.1427, "step": 968 }, { "epoch": 0.19369830838809626, "grad_norm": 2.046875, "learning_rate": 9.91637372936281e-06, "loss": 1.0469, "step": 969 }, { "epoch": 0.19389820344319234, "grad_norm": 1.9765625, "learning_rate": 9.916181692955841e-06, "loss": 1.0015, "step": 970 }, { "epoch": 0.1940980984982884, "grad_norm": 2.375, "learning_rate": 9.915989438172608e-06, "loss": 1.1941, "step": 971 }, { "epoch": 0.19429799355338448, "grad_norm": 2.328125, "learning_rate": 9.915796965021648e-06, "loss": 1.109, "step": 972 }, { "epoch": 0.19449788860848055, "grad_norm": 2.140625, "learning_rate": 9.915604273511514e-06, "loss": 1.0788, "step": 973 }, { "epoch": 0.19469778366357662, "grad_norm": 2.015625, "learning_rate": 9.915411363650762e-06, "loss": 1.0593, "step": 974 }, { "epoch": 0.1948976787186727, "grad_norm": 2.3125, "learning_rate": 9.915218235447962e-06, "loss": 1.1721, "step": 975 }, { "epoch": 0.19509757377376877, "grad_norm": 2.328125, "learning_rate": 9.915024888911692e-06, "loss": 1.1182, "step": 976 }, { "epoch": 0.19529746882886484, "grad_norm": 2.171875, "learning_rate": 9.914831324050542e-06, "loss": 1.1052, "step": 977 }, { "epoch": 0.1954973638839609, "grad_norm": 2.140625, "learning_rate": 9.914637540873112e-06, "loss": 1.1633, "step": 978 }, { "epoch": 0.19569725893905698, "grad_norm": 2.046875, "learning_rate": 9.914443539388003e-06, "loss": 1.1462, "step": 979 }, { "epoch": 0.19589715399415306, "grad_norm": 2.21875, "learning_rate": 9.914249319603839e-06, "loss": 1.0912, "step": 980 }, { "epoch": 0.19609704904924916, "grad_norm": 2.078125, "learning_rate": 9.914054881529245e-06, "loss": 1.0723, "step": 981 }, { "epoch": 0.19629694410434523, "grad_norm": 2.125, "learning_rate": 9.91386022517286e-06, "loss": 1.1248, "step": 982 }, { "epoch": 0.1964968391594413, "grad_norm": 2.109375, "learning_rate": 9.913665350543324e-06, "loss": 1.0391, "step": 983 }, { "epoch": 0.19669673421453737, "grad_norm": 2.234375, "learning_rate": 9.913470257649303e-06, "loss": 1.1673, "step": 984 }, { "epoch": 0.19689662926963344, "grad_norm": 2.078125, "learning_rate": 9.913274946499453e-06, "loss": 1.1034, "step": 985 }, { "epoch": 0.19709652432472952, "grad_norm": 2.140625, "learning_rate": 9.913079417102458e-06, "loss": 1.1082, "step": 986 }, { "epoch": 0.1972964193798256, "grad_norm": 2.265625, "learning_rate": 9.912883669467e-06, "loss": 1.1261, "step": 987 }, { "epoch": 0.19749631443492166, "grad_norm": 2.21875, "learning_rate": 9.912687703601774e-06, "loss": 1.1113, "step": 988 }, { "epoch": 0.19769620949001773, "grad_norm": 2.078125, "learning_rate": 9.912491519515484e-06, "loss": 1.1534, "step": 989 }, { "epoch": 0.1978961045451138, "grad_norm": 2.234375, "learning_rate": 9.912295117216844e-06, "loss": 1.0584, "step": 990 }, { "epoch": 0.19809599960020988, "grad_norm": 2.109375, "learning_rate": 9.912098496714582e-06, "loss": 1.0466, "step": 991 }, { "epoch": 0.19829589465530598, "grad_norm": 2.109375, "learning_rate": 9.911901658017428e-06, "loss": 1.0825, "step": 992 }, { "epoch": 0.19849578971040205, "grad_norm": 2.109375, "learning_rate": 9.911704601134127e-06, "loss": 1.1456, "step": 993 }, { "epoch": 0.19869568476549812, "grad_norm": 2.296875, "learning_rate": 9.911507326073433e-06, "loss": 1.1568, "step": 994 }, { "epoch": 0.1988955798205942, "grad_norm": 2.15625, "learning_rate": 9.911309832844108e-06, "loss": 1.1335, "step": 995 }, { "epoch": 0.19909547487569026, "grad_norm": 2.078125, "learning_rate": 9.911112121454925e-06, "loss": 1.0891, "step": 996 }, { "epoch": 0.19929536993078634, "grad_norm": 2.21875, "learning_rate": 9.910914191914664e-06, "loss": 1.1071, "step": 997 }, { "epoch": 0.1994952649858824, "grad_norm": 2.171875, "learning_rate": 9.910716044232122e-06, "loss": 1.0985, "step": 998 }, { "epoch": 0.19969516004097848, "grad_norm": 2.015625, "learning_rate": 9.910517678416097e-06, "loss": 1.055, "step": 999 }, { "epoch": 0.19989505509607455, "grad_norm": 2.03125, "learning_rate": 9.9103190944754e-06, "loss": 1.092, "step": 1000 }, { "epoch": 0.20009495015117063, "grad_norm": 2.125, "learning_rate": 9.910120292418855e-06, "loss": 1.1513, "step": 1001 }, { "epoch": 0.2002948452062667, "grad_norm": 2.3125, "learning_rate": 9.909921272255289e-06, "loss": 1.0961, "step": 1002 }, { "epoch": 0.2004947402613628, "grad_norm": 2.171875, "learning_rate": 9.909722033993546e-06, "loss": 1.1312, "step": 1003 }, { "epoch": 0.20069463531645887, "grad_norm": 2.0625, "learning_rate": 9.909522577642474e-06, "loss": 1.088, "step": 1004 }, { "epoch": 0.20089453037155494, "grad_norm": 2.140625, "learning_rate": 9.909322903210934e-06, "loss": 1.0347, "step": 1005 }, { "epoch": 0.201094425426651, "grad_norm": 2.234375, "learning_rate": 9.909123010707793e-06, "loss": 1.097, "step": 1006 }, { "epoch": 0.20129432048174709, "grad_norm": 2.15625, "learning_rate": 9.908922900141935e-06, "loss": 1.158, "step": 1007 }, { "epoch": 0.20149421553684316, "grad_norm": 2.09375, "learning_rate": 9.908722571522244e-06, "loss": 1.1167, "step": 1008 }, { "epoch": 0.20169411059193923, "grad_norm": 2.015625, "learning_rate": 9.90852202485762e-06, "loss": 1.0482, "step": 1009 }, { "epoch": 0.2018940056470353, "grad_norm": 2.03125, "learning_rate": 9.908321260156975e-06, "loss": 1.0526, "step": 1010 }, { "epoch": 0.20209390070213137, "grad_norm": 2.21875, "learning_rate": 9.908120277429224e-06, "loss": 1.1366, "step": 1011 }, { "epoch": 0.20229379575722745, "grad_norm": 2.09375, "learning_rate": 9.90791907668329e-06, "loss": 1.0403, "step": 1012 }, { "epoch": 0.20249369081232352, "grad_norm": 2.140625, "learning_rate": 9.907717657928117e-06, "loss": 1.0606, "step": 1013 }, { "epoch": 0.20269358586741962, "grad_norm": 2.390625, "learning_rate": 9.907516021172652e-06, "loss": 1.0486, "step": 1014 }, { "epoch": 0.2028934809225157, "grad_norm": 2.1875, "learning_rate": 9.907314166425847e-06, "loss": 1.1866, "step": 1015 }, { "epoch": 0.20309337597761176, "grad_norm": 2.078125, "learning_rate": 9.907112093696672e-06, "loss": 1.0322, "step": 1016 }, { "epoch": 0.20329327103270783, "grad_norm": 2.109375, "learning_rate": 9.906909802994101e-06, "loss": 1.1012, "step": 1017 }, { "epoch": 0.2034931660878039, "grad_norm": 2.015625, "learning_rate": 9.90670729432712e-06, "loss": 1.0504, "step": 1018 }, { "epoch": 0.20369306114289998, "grad_norm": 2.078125, "learning_rate": 9.906504567704727e-06, "loss": 1.0983, "step": 1019 }, { "epoch": 0.20389295619799605, "grad_norm": 2.125, "learning_rate": 9.906301623135925e-06, "loss": 1.1327, "step": 1020 }, { "epoch": 0.20409285125309212, "grad_norm": 2.109375, "learning_rate": 9.906098460629728e-06, "loss": 1.0782, "step": 1021 }, { "epoch": 0.2042927463081882, "grad_norm": 2.078125, "learning_rate": 9.905895080195161e-06, "loss": 1.0306, "step": 1022 }, { "epoch": 0.20449264136328427, "grad_norm": 2.15625, "learning_rate": 9.90569148184126e-06, "loss": 1.0627, "step": 1023 }, { "epoch": 0.20469253641838034, "grad_norm": 1.9921875, "learning_rate": 9.905487665577067e-06, "loss": 1.0469, "step": 1024 }, { "epoch": 0.20489243147347644, "grad_norm": 2.109375, "learning_rate": 9.905283631411635e-06, "loss": 0.9674, "step": 1025 }, { "epoch": 0.2050923265285725, "grad_norm": 2.1875, "learning_rate": 9.905079379354028e-06, "loss": 1.1038, "step": 1026 }, { "epoch": 0.20529222158366858, "grad_norm": 2.171875, "learning_rate": 9.904874909413318e-06, "loss": 1.0563, "step": 1027 }, { "epoch": 0.20549211663876465, "grad_norm": 2.09375, "learning_rate": 9.90467022159859e-06, "loss": 1.079, "step": 1028 }, { "epoch": 0.20569201169386073, "grad_norm": 2.125, "learning_rate": 9.904465315918934e-06, "loss": 1.0519, "step": 1029 }, { "epoch": 0.2058919067489568, "grad_norm": 2.328125, "learning_rate": 9.904260192383452e-06, "loss": 1.1527, "step": 1030 }, { "epoch": 0.20609180180405287, "grad_norm": 2.140625, "learning_rate": 9.904054851001257e-06, "loss": 1.0425, "step": 1031 }, { "epoch": 0.20629169685914894, "grad_norm": 2.21875, "learning_rate": 9.903849291781468e-06, "loss": 1.0826, "step": 1032 }, { "epoch": 0.20649159191424502, "grad_norm": 2.28125, "learning_rate": 9.903643514733218e-06, "loss": 1.0684, "step": 1033 }, { "epoch": 0.2066914869693411, "grad_norm": 2.1875, "learning_rate": 9.903437519865648e-06, "loss": 1.0938, "step": 1034 }, { "epoch": 0.20689138202443716, "grad_norm": 2.15625, "learning_rate": 9.903231307187906e-06, "loss": 1.1274, "step": 1035 }, { "epoch": 0.20709127707953323, "grad_norm": 2.171875, "learning_rate": 9.903024876709154e-06, "loss": 1.1545, "step": 1036 }, { "epoch": 0.20729117213462933, "grad_norm": 2.015625, "learning_rate": 9.902818228438557e-06, "loss": 1.0743, "step": 1037 }, { "epoch": 0.2074910671897254, "grad_norm": 2.171875, "learning_rate": 9.902611362385302e-06, "loss": 1.0972, "step": 1038 }, { "epoch": 0.20769096224482148, "grad_norm": 4.625, "learning_rate": 9.902404278558573e-06, "loss": 1.0203, "step": 1039 }, { "epoch": 0.20789085729991755, "grad_norm": 2.09375, "learning_rate": 9.902196976967568e-06, "loss": 1.0272, "step": 1040 }, { "epoch": 0.20809075235501362, "grad_norm": 2.328125, "learning_rate": 9.901989457621497e-06, "loss": 1.2083, "step": 1041 }, { "epoch": 0.2082906474101097, "grad_norm": 2.03125, "learning_rate": 9.90178172052958e-06, "loss": 1.0203, "step": 1042 }, { "epoch": 0.20849054246520576, "grad_norm": 2.140625, "learning_rate": 9.90157376570104e-06, "loss": 1.1863, "step": 1043 }, { "epoch": 0.20869043752030184, "grad_norm": 2.1875, "learning_rate": 9.901365593145119e-06, "loss": 1.2206, "step": 1044 }, { "epoch": 0.2088903325753979, "grad_norm": 2.28125, "learning_rate": 9.90115720287106e-06, "loss": 1.1041, "step": 1045 }, { "epoch": 0.20909022763049398, "grad_norm": 2.1875, "learning_rate": 9.900948594888122e-06, "loss": 1.1631, "step": 1046 }, { "epoch": 0.20929012268559005, "grad_norm": 2.15625, "learning_rate": 9.900739769205571e-06, "loss": 1.0053, "step": 1047 }, { "epoch": 0.20949001774068615, "grad_norm": 2.28125, "learning_rate": 9.900530725832685e-06, "loss": 1.0736, "step": 1048 }, { "epoch": 0.20968991279578222, "grad_norm": 2.171875, "learning_rate": 9.900321464778745e-06, "loss": 1.1255, "step": 1049 }, { "epoch": 0.2098898078508783, "grad_norm": 2.234375, "learning_rate": 9.90011198605305e-06, "loss": 1.0491, "step": 1050 }, { "epoch": 0.21008970290597437, "grad_norm": 2.15625, "learning_rate": 9.899902289664902e-06, "loss": 1.0678, "step": 1051 }, { "epoch": 0.21028959796107044, "grad_norm": 2.078125, "learning_rate": 9.899692375623619e-06, "loss": 1.0399, "step": 1052 }, { "epoch": 0.2104894930161665, "grad_norm": 2.203125, "learning_rate": 9.899482243938526e-06, "loss": 1.0988, "step": 1053 }, { "epoch": 0.21068938807126258, "grad_norm": 2.0625, "learning_rate": 9.89927189461895e-06, "loss": 1.0065, "step": 1054 }, { "epoch": 0.21088928312635866, "grad_norm": 2.171875, "learning_rate": 9.899061327674245e-06, "loss": 1.0968, "step": 1055 }, { "epoch": 0.21108917818145473, "grad_norm": 2.1875, "learning_rate": 9.898850543113756e-06, "loss": 1.1262, "step": 1056 }, { "epoch": 0.2112890732365508, "grad_norm": 2.09375, "learning_rate": 9.89863954094685e-06, "loss": 0.9881, "step": 1057 }, { "epoch": 0.21148896829164687, "grad_norm": 2.140625, "learning_rate": 9.8984283211829e-06, "loss": 1.1341, "step": 1058 }, { "epoch": 0.21168886334674297, "grad_norm": 2.109375, "learning_rate": 9.898216883831284e-06, "loss": 1.0557, "step": 1059 }, { "epoch": 0.21188875840183904, "grad_norm": 2.125, "learning_rate": 9.898005228901399e-06, "loss": 1.1488, "step": 1060 }, { "epoch": 0.21208865345693512, "grad_norm": 2.21875, "learning_rate": 9.897793356402646e-06, "loss": 1.1246, "step": 1061 }, { "epoch": 0.2122885485120312, "grad_norm": 2.171875, "learning_rate": 9.897581266344434e-06, "loss": 1.1322, "step": 1062 }, { "epoch": 0.21248844356712726, "grad_norm": 2.21875, "learning_rate": 9.897368958736185e-06, "loss": 1.0576, "step": 1063 }, { "epoch": 0.21268833862222333, "grad_norm": 2.0625, "learning_rate": 9.89715643358733e-06, "loss": 1.0027, "step": 1064 }, { "epoch": 0.2128882336773194, "grad_norm": 1.953125, "learning_rate": 9.89694369090731e-06, "loss": 1.091, "step": 1065 }, { "epoch": 0.21308812873241548, "grad_norm": 2.125, "learning_rate": 9.896730730705574e-06, "loss": 1.0713, "step": 1066 }, { "epoch": 0.21328802378751155, "grad_norm": 2.25, "learning_rate": 9.896517552991581e-06, "loss": 1.1078, "step": 1067 }, { "epoch": 0.21348791884260762, "grad_norm": 2.25, "learning_rate": 9.896304157774802e-06, "loss": 1.1173, "step": 1068 }, { "epoch": 0.2136878138977037, "grad_norm": 2.125, "learning_rate": 9.896090545064717e-06, "loss": 1.0837, "step": 1069 }, { "epoch": 0.2138877089527998, "grad_norm": 2.28125, "learning_rate": 9.895876714870809e-06, "loss": 1.1325, "step": 1070 }, { "epoch": 0.21408760400789587, "grad_norm": 2.25, "learning_rate": 9.895662667202583e-06, "loss": 1.0625, "step": 1071 }, { "epoch": 0.21428749906299194, "grad_norm": 2.0, "learning_rate": 9.895448402069543e-06, "loss": 1.0352, "step": 1072 }, { "epoch": 0.214487394118088, "grad_norm": 2.15625, "learning_rate": 9.895233919481208e-06, "loss": 1.1547, "step": 1073 }, { "epoch": 0.21468728917318408, "grad_norm": 2.140625, "learning_rate": 9.895019219447107e-06, "loss": 1.1479, "step": 1074 }, { "epoch": 0.21488718422828015, "grad_norm": 2.0625, "learning_rate": 9.894804301976773e-06, "loss": 0.9864, "step": 1075 }, { "epoch": 0.21508707928337623, "grad_norm": 1.9921875, "learning_rate": 9.894589167079754e-06, "loss": 1.0687, "step": 1076 }, { "epoch": 0.2152869743384723, "grad_norm": 2.15625, "learning_rate": 9.894373814765609e-06, "loss": 1.063, "step": 1077 }, { "epoch": 0.21548686939356837, "grad_norm": 2.09375, "learning_rate": 9.894158245043902e-06, "loss": 1.114, "step": 1078 }, { "epoch": 0.21568676444866444, "grad_norm": 2.046875, "learning_rate": 9.893942457924206e-06, "loss": 1.1266, "step": 1079 }, { "epoch": 0.21588665950376051, "grad_norm": 2.203125, "learning_rate": 9.89372645341611e-06, "loss": 1.0792, "step": 1080 }, { "epoch": 0.21608655455885661, "grad_norm": 2.1875, "learning_rate": 9.893510231529209e-06, "loss": 1.1263, "step": 1081 }, { "epoch": 0.21628644961395269, "grad_norm": 2.078125, "learning_rate": 9.893293792273104e-06, "loss": 1.0622, "step": 1082 }, { "epoch": 0.21648634466904876, "grad_norm": 2.21875, "learning_rate": 9.893077135657413e-06, "loss": 1.1443, "step": 1083 }, { "epoch": 0.21668623972414483, "grad_norm": 2.125, "learning_rate": 9.892860261691756e-06, "loss": 1.0848, "step": 1084 }, { "epoch": 0.2168861347792409, "grad_norm": 2.171875, "learning_rate": 9.892643170385771e-06, "loss": 1.0369, "step": 1085 }, { "epoch": 0.21708602983433697, "grad_norm": 2.078125, "learning_rate": 9.8924258617491e-06, "loss": 1.0839, "step": 1086 }, { "epoch": 0.21728592488943305, "grad_norm": 1.96875, "learning_rate": 9.892208335791392e-06, "loss": 1.0948, "step": 1087 }, { "epoch": 0.21748581994452912, "grad_norm": 2.203125, "learning_rate": 9.891990592522314e-06, "loss": 1.0512, "step": 1088 }, { "epoch": 0.2176857149996252, "grad_norm": 2.0625, "learning_rate": 9.891772631951535e-06, "loss": 1.0416, "step": 1089 }, { "epoch": 0.21788561005472126, "grad_norm": 2.0625, "learning_rate": 9.891554454088738e-06, "loss": 1.1323, "step": 1090 }, { "epoch": 0.21808550510981733, "grad_norm": 2.125, "learning_rate": 9.891336058943617e-06, "loss": 1.1509, "step": 1091 }, { "epoch": 0.2182854001649134, "grad_norm": 2.1875, "learning_rate": 9.891117446525869e-06, "loss": 1.0703, "step": 1092 }, { "epoch": 0.2184852952200095, "grad_norm": 2.21875, "learning_rate": 9.890898616845206e-06, "loss": 1.1602, "step": 1093 }, { "epoch": 0.21868519027510558, "grad_norm": 2.234375, "learning_rate": 9.890679569911349e-06, "loss": 1.0828, "step": 1094 }, { "epoch": 0.21888508533020165, "grad_norm": 2.0, "learning_rate": 9.890460305734028e-06, "loss": 1.0106, "step": 1095 }, { "epoch": 0.21908498038529772, "grad_norm": 2.0625, "learning_rate": 9.890240824322983e-06, "loss": 1.1057, "step": 1096 }, { "epoch": 0.2192848754403938, "grad_norm": 2.140625, "learning_rate": 9.890021125687962e-06, "loss": 1.1892, "step": 1097 }, { "epoch": 0.21948477049548987, "grad_norm": 2.03125, "learning_rate": 9.889801209838725e-06, "loss": 1.0643, "step": 1098 }, { "epoch": 0.21968466555058594, "grad_norm": 2.234375, "learning_rate": 9.889581076785042e-06, "loss": 1.1837, "step": 1099 }, { "epoch": 0.219884560605682, "grad_norm": 2.703125, "learning_rate": 9.889360726536687e-06, "loss": 1.2431, "step": 1100 }, { "epoch": 0.22008445566077808, "grad_norm": 2.28125, "learning_rate": 9.889140159103454e-06, "loss": 1.1512, "step": 1101 }, { "epoch": 0.22028435071587416, "grad_norm": 2.34375, "learning_rate": 9.888919374495134e-06, "loss": 1.1054, "step": 1102 }, { "epoch": 0.22048424577097023, "grad_norm": 2.046875, "learning_rate": 9.88869837272154e-06, "loss": 1.0885, "step": 1103 }, { "epoch": 0.22068414082606633, "grad_norm": 2.1875, "learning_rate": 9.888477153792486e-06, "loss": 1.0958, "step": 1104 }, { "epoch": 0.2208840358811624, "grad_norm": 2.265625, "learning_rate": 9.888255717717798e-06, "loss": 1.1226, "step": 1105 }, { "epoch": 0.22108393093625847, "grad_norm": 2.140625, "learning_rate": 9.888034064507314e-06, "loss": 1.1548, "step": 1106 }, { "epoch": 0.22128382599135454, "grad_norm": 2.078125, "learning_rate": 9.887812194170878e-06, "loss": 1.0003, "step": 1107 }, { "epoch": 0.22148372104645062, "grad_norm": 2.1875, "learning_rate": 9.887590106718348e-06, "loss": 1.0862, "step": 1108 }, { "epoch": 0.2216836161015467, "grad_norm": 2.046875, "learning_rate": 9.887367802159587e-06, "loss": 1.0139, "step": 1109 }, { "epoch": 0.22188351115664276, "grad_norm": 2.3125, "learning_rate": 9.887145280504468e-06, "loss": 1.072, "step": 1110 }, { "epoch": 0.22208340621173883, "grad_norm": 2.125, "learning_rate": 9.88692254176288e-06, "loss": 0.973, "step": 1111 }, { "epoch": 0.2222833012668349, "grad_norm": 2.265625, "learning_rate": 9.886699585944715e-06, "loss": 1.1539, "step": 1112 }, { "epoch": 0.22248319632193098, "grad_norm": 2.03125, "learning_rate": 9.886476413059874e-06, "loss": 0.9878, "step": 1113 }, { "epoch": 0.22268309137702705, "grad_norm": 2.234375, "learning_rate": 9.886253023118276e-06, "loss": 1.1039, "step": 1114 }, { "epoch": 0.22288298643212315, "grad_norm": 2.1875, "learning_rate": 9.886029416129837e-06, "loss": 1.0656, "step": 1115 }, { "epoch": 0.22308288148721922, "grad_norm": 2.140625, "learning_rate": 9.885805592104494e-06, "loss": 1.0074, "step": 1116 }, { "epoch": 0.2232827765423153, "grad_norm": 2.28125, "learning_rate": 9.88558155105219e-06, "loss": 1.1455, "step": 1117 }, { "epoch": 0.22348267159741136, "grad_norm": 2.140625, "learning_rate": 9.885357292982873e-06, "loss": 1.1776, "step": 1118 }, { "epoch": 0.22368256665250744, "grad_norm": 2.125, "learning_rate": 9.885132817906509e-06, "loss": 1.1606, "step": 1119 }, { "epoch": 0.2238824617076035, "grad_norm": 2.171875, "learning_rate": 9.884908125833066e-06, "loss": 1.1144, "step": 1120 }, { "epoch": 0.22408235676269958, "grad_norm": 2.109375, "learning_rate": 9.884683216772527e-06, "loss": 1.1148, "step": 1121 }, { "epoch": 0.22428225181779565, "grad_norm": 2.140625, "learning_rate": 9.88445809073488e-06, "loss": 1.0931, "step": 1122 }, { "epoch": 0.22448214687289172, "grad_norm": 2.140625, "learning_rate": 9.884232747730125e-06, "loss": 1.1479, "step": 1123 }, { "epoch": 0.2246820419279878, "grad_norm": 2.078125, "learning_rate": 9.884007187768275e-06, "loss": 0.9995, "step": 1124 }, { "epoch": 0.22488193698308387, "grad_norm": 2.234375, "learning_rate": 9.883781410859347e-06, "loss": 1.1488, "step": 1125 }, { "epoch": 0.22508183203817997, "grad_norm": 2.140625, "learning_rate": 9.88355541701337e-06, "loss": 1.0807, "step": 1126 }, { "epoch": 0.22528172709327604, "grad_norm": 2.09375, "learning_rate": 9.883329206240383e-06, "loss": 1.0489, "step": 1127 }, { "epoch": 0.2254816221483721, "grad_norm": 2.03125, "learning_rate": 9.883102778550434e-06, "loss": 1.0662, "step": 1128 }, { "epoch": 0.22568151720346818, "grad_norm": 2.171875, "learning_rate": 9.882876133953582e-06, "loss": 1.1357, "step": 1129 }, { "epoch": 0.22588141225856426, "grad_norm": 2.09375, "learning_rate": 9.882649272459892e-06, "loss": 1.0505, "step": 1130 }, { "epoch": 0.22608130731366033, "grad_norm": 2.125, "learning_rate": 9.882422194079444e-06, "loss": 1.1083, "step": 1131 }, { "epoch": 0.2262812023687564, "grad_norm": 2.171875, "learning_rate": 9.882194898822324e-06, "loss": 1.1381, "step": 1132 }, { "epoch": 0.22648109742385247, "grad_norm": 2.140625, "learning_rate": 9.881967386698627e-06, "loss": 1.0367, "step": 1133 }, { "epoch": 0.22668099247894855, "grad_norm": 2.328125, "learning_rate": 9.881739657718462e-06, "loss": 1.2138, "step": 1134 }, { "epoch": 0.22688088753404462, "grad_norm": 2.125, "learning_rate": 9.881511711891941e-06, "loss": 1.1267, "step": 1135 }, { "epoch": 0.2270807825891407, "grad_norm": 2.21875, "learning_rate": 9.88128354922919e-06, "loss": 1.0884, "step": 1136 }, { "epoch": 0.2272806776442368, "grad_norm": 2.140625, "learning_rate": 9.881055169740347e-06, "loss": 1.1466, "step": 1137 }, { "epoch": 0.22748057269933286, "grad_norm": 2.0625, "learning_rate": 9.880826573435555e-06, "loss": 1.16, "step": 1138 }, { "epoch": 0.22768046775442893, "grad_norm": 2.296875, "learning_rate": 9.880597760324966e-06, "loss": 1.1587, "step": 1139 }, { "epoch": 0.227880362809525, "grad_norm": 2.109375, "learning_rate": 9.880368730418749e-06, "loss": 1.1319, "step": 1140 }, { "epoch": 0.22808025786462108, "grad_norm": 2.171875, "learning_rate": 9.880139483727071e-06, "loss": 1.1318, "step": 1141 }, { "epoch": 0.22828015291971715, "grad_norm": 2.0625, "learning_rate": 9.879910020260119e-06, "loss": 1.0074, "step": 1142 }, { "epoch": 0.22848004797481322, "grad_norm": 2.21875, "learning_rate": 9.879680340028087e-06, "loss": 1.0707, "step": 1143 }, { "epoch": 0.2286799430299093, "grad_norm": 2.140625, "learning_rate": 9.879450443041172e-06, "loss": 1.0146, "step": 1144 }, { "epoch": 0.22887983808500537, "grad_norm": 2.203125, "learning_rate": 9.879220329309591e-06, "loss": 1.1047, "step": 1145 }, { "epoch": 0.22907973314010144, "grad_norm": 2.234375, "learning_rate": 9.878989998843565e-06, "loss": 1.0841, "step": 1146 }, { "epoch": 0.2292796281951975, "grad_norm": 2.1875, "learning_rate": 9.878759451653323e-06, "loss": 1.1131, "step": 1147 }, { "epoch": 0.22947952325029358, "grad_norm": 2.140625, "learning_rate": 9.87852868774911e-06, "loss": 1.1771, "step": 1148 }, { "epoch": 0.22967941830538968, "grad_norm": 2.25, "learning_rate": 9.878297707141172e-06, "loss": 1.1596, "step": 1149 }, { "epoch": 0.22987931336048575, "grad_norm": 2.203125, "learning_rate": 9.87806650983977e-06, "loss": 1.1953, "step": 1150 }, { "epoch": 0.23007920841558183, "grad_norm": 2.109375, "learning_rate": 9.877835095855174e-06, "loss": 1.0653, "step": 1151 }, { "epoch": 0.2302791034706779, "grad_norm": 2.1875, "learning_rate": 9.877603465197667e-06, "loss": 1.0236, "step": 1152 }, { "epoch": 0.23047899852577397, "grad_norm": 2.171875, "learning_rate": 9.877371617877533e-06, "loss": 1.1288, "step": 1153 }, { "epoch": 0.23067889358087004, "grad_norm": 2.109375, "learning_rate": 9.877139553905072e-06, "loss": 1.0238, "step": 1154 }, { "epoch": 0.23087878863596611, "grad_norm": 2.296875, "learning_rate": 9.876907273290594e-06, "loss": 1.0817, "step": 1155 }, { "epoch": 0.2310786836910622, "grad_norm": 2.1875, "learning_rate": 9.876674776044417e-06, "loss": 1.1418, "step": 1156 }, { "epoch": 0.23127857874615826, "grad_norm": 2.09375, "learning_rate": 9.876442062176866e-06, "loss": 1.1305, "step": 1157 }, { "epoch": 0.23147847380125433, "grad_norm": 2.03125, "learning_rate": 9.87620913169828e-06, "loss": 1.0216, "step": 1158 }, { "epoch": 0.2316783688563504, "grad_norm": 2.140625, "learning_rate": 9.875975984619004e-06, "loss": 1.0736, "step": 1159 }, { "epoch": 0.2318782639114465, "grad_norm": 2.171875, "learning_rate": 9.875742620949395e-06, "loss": 1.0892, "step": 1160 }, { "epoch": 0.23207815896654257, "grad_norm": 2.1875, "learning_rate": 9.875509040699821e-06, "loss": 1.0612, "step": 1161 }, { "epoch": 0.23227805402163865, "grad_norm": 2.109375, "learning_rate": 9.875275243880657e-06, "loss": 1.1469, "step": 1162 }, { "epoch": 0.23247794907673472, "grad_norm": 2.15625, "learning_rate": 9.875041230502286e-06, "loss": 1.1195, "step": 1163 }, { "epoch": 0.2326778441318308, "grad_norm": 2.125, "learning_rate": 9.874807000575105e-06, "loss": 1.0738, "step": 1164 }, { "epoch": 0.23287773918692686, "grad_norm": 2.15625, "learning_rate": 9.874572554109517e-06, "loss": 1.0748, "step": 1165 }, { "epoch": 0.23307763424202294, "grad_norm": 2.125, "learning_rate": 9.874337891115938e-06, "loss": 1.0728, "step": 1166 }, { "epoch": 0.233277529297119, "grad_norm": 2.0625, "learning_rate": 9.874103011604788e-06, "loss": 1.0373, "step": 1167 }, { "epoch": 0.23347742435221508, "grad_norm": 2.1875, "learning_rate": 9.873867915586504e-06, "loss": 1.0788, "step": 1168 }, { "epoch": 0.23367731940731115, "grad_norm": 2.15625, "learning_rate": 9.873632603071528e-06, "loss": 1.1423, "step": 1169 }, { "epoch": 0.23387721446240722, "grad_norm": 2.265625, "learning_rate": 9.873397074070312e-06, "loss": 1.1078, "step": 1170 }, { "epoch": 0.23407710951750332, "grad_norm": 2.140625, "learning_rate": 9.873161328593319e-06, "loss": 1.1371, "step": 1171 }, { "epoch": 0.2342770045725994, "grad_norm": 2.03125, "learning_rate": 9.87292536665102e-06, "loss": 1.0824, "step": 1172 }, { "epoch": 0.23447689962769547, "grad_norm": 2.328125, "learning_rate": 9.872689188253895e-06, "loss": 1.191, "step": 1173 }, { "epoch": 0.23467679468279154, "grad_norm": 2.0, "learning_rate": 9.872452793412439e-06, "loss": 1.0575, "step": 1174 }, { "epoch": 0.2348766897378876, "grad_norm": 2.109375, "learning_rate": 9.872216182137148e-06, "loss": 1.1062, "step": 1175 }, { "epoch": 0.23507658479298368, "grad_norm": 2.203125, "learning_rate": 9.871979354438539e-06, "loss": 1.1298, "step": 1176 }, { "epoch": 0.23527647984807976, "grad_norm": 2.28125, "learning_rate": 9.871742310327124e-06, "loss": 1.1012, "step": 1177 }, { "epoch": 0.23547637490317583, "grad_norm": 2.15625, "learning_rate": 9.871505049813436e-06, "loss": 1.0347, "step": 1178 }, { "epoch": 0.2356762699582719, "grad_norm": 2.09375, "learning_rate": 9.871267572908015e-06, "loss": 1.0454, "step": 1179 }, { "epoch": 0.23587616501336797, "grad_norm": 2.140625, "learning_rate": 9.871029879621408e-06, "loss": 1.1042, "step": 1180 }, { "epoch": 0.23607606006846404, "grad_norm": 2.15625, "learning_rate": 9.870791969964173e-06, "loss": 1.0733, "step": 1181 }, { "epoch": 0.23627595512356014, "grad_norm": 2.21875, "learning_rate": 9.870553843946879e-06, "loss": 1.0674, "step": 1182 }, { "epoch": 0.23647585017865622, "grad_norm": 2.203125, "learning_rate": 9.870315501580106e-06, "loss": 1.0334, "step": 1183 }, { "epoch": 0.2366757452337523, "grad_norm": 2.109375, "learning_rate": 9.870076942874435e-06, "loss": 1.0535, "step": 1184 }, { "epoch": 0.23687564028884836, "grad_norm": 2.1875, "learning_rate": 9.86983816784047e-06, "loss": 1.093, "step": 1185 }, { "epoch": 0.23707553534394443, "grad_norm": 2.234375, "learning_rate": 9.869599176488812e-06, "loss": 1.0887, "step": 1186 }, { "epoch": 0.2372754303990405, "grad_norm": 2.21875, "learning_rate": 9.869359968830078e-06, "loss": 1.0941, "step": 1187 }, { "epoch": 0.23747532545413658, "grad_norm": 2.15625, "learning_rate": 9.869120544874895e-06, "loss": 1.0711, "step": 1188 }, { "epoch": 0.23767522050923265, "grad_norm": 2.0625, "learning_rate": 9.868880904633898e-06, "loss": 1.2008, "step": 1189 }, { "epoch": 0.23787511556432872, "grad_norm": 2.1875, "learning_rate": 9.86864104811773e-06, "loss": 1.1192, "step": 1190 }, { "epoch": 0.2380750106194248, "grad_norm": 2.15625, "learning_rate": 9.868400975337046e-06, "loss": 1.0715, "step": 1191 }, { "epoch": 0.23827490567452086, "grad_norm": 2.296875, "learning_rate": 9.868160686302513e-06, "loss": 1.1605, "step": 1192 }, { "epoch": 0.23847480072961696, "grad_norm": 2.1875, "learning_rate": 9.867920181024802e-06, "loss": 1.1909, "step": 1193 }, { "epoch": 0.23867469578471304, "grad_norm": 1.984375, "learning_rate": 9.867679459514596e-06, "loss": 1.0285, "step": 1194 }, { "epoch": 0.2388745908398091, "grad_norm": 2.125, "learning_rate": 9.867438521782586e-06, "loss": 1.1216, "step": 1195 }, { "epoch": 0.23907448589490518, "grad_norm": 2.078125, "learning_rate": 9.867197367839481e-06, "loss": 1.0404, "step": 1196 }, { "epoch": 0.23927438095000125, "grad_norm": 2.171875, "learning_rate": 9.866955997695984e-06, "loss": 1.1369, "step": 1197 }, { "epoch": 0.23947427600509733, "grad_norm": 2.15625, "learning_rate": 9.866714411362825e-06, "loss": 1.0819, "step": 1198 }, { "epoch": 0.2396741710601934, "grad_norm": 2.09375, "learning_rate": 9.86647260885073e-06, "loss": 1.0858, "step": 1199 }, { "epoch": 0.23987406611528947, "grad_norm": 2.15625, "learning_rate": 9.866230590170442e-06, "loss": 1.081, "step": 1200 }, { "epoch": 0.24007396117038554, "grad_norm": 2.078125, "learning_rate": 9.86598835533271e-06, "loss": 1.0809, "step": 1201 }, { "epoch": 0.2402738562254816, "grad_norm": 2.1875, "learning_rate": 9.865745904348296e-06, "loss": 1.1058, "step": 1202 }, { "epoch": 0.24047375128057769, "grad_norm": 2.0625, "learning_rate": 9.86550323722797e-06, "loss": 1.1048, "step": 1203 }, { "epoch": 0.24067364633567376, "grad_norm": 2.109375, "learning_rate": 9.865260353982506e-06, "loss": 1.0469, "step": 1204 }, { "epoch": 0.24087354139076986, "grad_norm": 2.203125, "learning_rate": 9.8650172546227e-06, "loss": 1.082, "step": 1205 }, { "epoch": 0.24107343644586593, "grad_norm": 2.0625, "learning_rate": 9.864773939159346e-06, "loss": 1.1258, "step": 1206 }, { "epoch": 0.241273331500962, "grad_norm": 2.3125, "learning_rate": 9.864530407603253e-06, "loss": 1.1412, "step": 1207 }, { "epoch": 0.24147322655605807, "grad_norm": 2.1875, "learning_rate": 9.86428665996524e-06, "loss": 0.947, "step": 1208 }, { "epoch": 0.24167312161115415, "grad_norm": 2.109375, "learning_rate": 9.864042696256132e-06, "loss": 1.1225, "step": 1209 }, { "epoch": 0.24187301666625022, "grad_norm": 2.109375, "learning_rate": 9.863798516486767e-06, "loss": 1.1101, "step": 1210 }, { "epoch": 0.2420729117213463, "grad_norm": 2.0625, "learning_rate": 9.86355412066799e-06, "loss": 1.0778, "step": 1211 }, { "epoch": 0.24227280677644236, "grad_norm": 2.203125, "learning_rate": 9.86330950881066e-06, "loss": 1.151, "step": 1212 }, { "epoch": 0.24247270183153843, "grad_norm": 2.125, "learning_rate": 9.863064680925643e-06, "loss": 1.0395, "step": 1213 }, { "epoch": 0.2426725968866345, "grad_norm": 2.09375, "learning_rate": 9.86281963702381e-06, "loss": 1.03, "step": 1214 }, { "epoch": 0.24287249194173058, "grad_norm": 2.1875, "learning_rate": 9.862574377116048e-06, "loss": 1.0622, "step": 1215 }, { "epoch": 0.24307238699682668, "grad_norm": 2.09375, "learning_rate": 9.862328901213253e-06, "loss": 1.0321, "step": 1216 }, { "epoch": 0.24327228205192275, "grad_norm": 2.078125, "learning_rate": 9.862083209326326e-06, "loss": 1.1011, "step": 1217 }, { "epoch": 0.24347217710701882, "grad_norm": 2.078125, "learning_rate": 9.861837301466182e-06, "loss": 1.1446, "step": 1218 }, { "epoch": 0.2436720721621149, "grad_norm": 2.21875, "learning_rate": 9.861591177643744e-06, "loss": 1.1048, "step": 1219 }, { "epoch": 0.24387196721721097, "grad_norm": 2.09375, "learning_rate": 9.861344837869947e-06, "loss": 1.1738, "step": 1220 }, { "epoch": 0.24407186227230704, "grad_norm": 2.15625, "learning_rate": 9.86109828215573e-06, "loss": 1.0178, "step": 1221 }, { "epoch": 0.2442717573274031, "grad_norm": 2.140625, "learning_rate": 9.860851510512046e-06, "loss": 1.0646, "step": 1222 }, { "epoch": 0.24447165238249918, "grad_norm": 2.109375, "learning_rate": 9.860604522949859e-06, "loss": 1.0765, "step": 1223 }, { "epoch": 0.24467154743759525, "grad_norm": 2.1875, "learning_rate": 9.860357319480137e-06, "loss": 1.1155, "step": 1224 }, { "epoch": 0.24487144249269133, "grad_norm": 2.25, "learning_rate": 9.860109900113861e-06, "loss": 1.1207, "step": 1225 }, { "epoch": 0.2450713375477874, "grad_norm": 2.265625, "learning_rate": 9.859862264862023e-06, "loss": 1.1647, "step": 1226 }, { "epoch": 0.2452712326028835, "grad_norm": 2.09375, "learning_rate": 9.859614413735623e-06, "loss": 1.0827, "step": 1227 }, { "epoch": 0.24547112765797957, "grad_norm": 2.140625, "learning_rate": 9.85936634674567e-06, "loss": 1.0661, "step": 1228 }, { "epoch": 0.24567102271307564, "grad_norm": 1.96875, "learning_rate": 9.859118063903182e-06, "loss": 1.1036, "step": 1229 }, { "epoch": 0.24587091776817172, "grad_norm": 2.109375, "learning_rate": 9.858869565219189e-06, "loss": 1.0841, "step": 1230 }, { "epoch": 0.2460708128232678, "grad_norm": 2.09375, "learning_rate": 9.85862085070473e-06, "loss": 1.0565, "step": 1231 }, { "epoch": 0.24627070787836386, "grad_norm": 2.1875, "learning_rate": 9.85837192037085e-06, "loss": 1.1141, "step": 1232 }, { "epoch": 0.24647060293345993, "grad_norm": 2.015625, "learning_rate": 9.858122774228609e-06, "loss": 1.0912, "step": 1233 }, { "epoch": 0.246670497988556, "grad_norm": 1.9921875, "learning_rate": 9.857873412289071e-06, "loss": 0.9702, "step": 1234 }, { "epoch": 0.24687039304365208, "grad_norm": 2.21875, "learning_rate": 9.85762383456332e-06, "loss": 1.1655, "step": 1235 }, { "epoch": 0.24707028809874815, "grad_norm": 2.109375, "learning_rate": 9.857374041062433e-06, "loss": 1.0594, "step": 1236 }, { "epoch": 0.24727018315384422, "grad_norm": 2.09375, "learning_rate": 9.85712403179751e-06, "loss": 1.0415, "step": 1237 }, { "epoch": 0.24747007820894032, "grad_norm": 2.0625, "learning_rate": 9.856873806779656e-06, "loss": 1.0599, "step": 1238 }, { "epoch": 0.2476699732640364, "grad_norm": 2.125, "learning_rate": 9.856623366019988e-06, "loss": 1.0679, "step": 1239 }, { "epoch": 0.24786986831913246, "grad_norm": 2.265625, "learning_rate": 9.85637270952963e-06, "loss": 1.1581, "step": 1240 }, { "epoch": 0.24806976337422854, "grad_norm": 1.984375, "learning_rate": 9.856121837319712e-06, "loss": 0.9953, "step": 1241 }, { "epoch": 0.2482696584293246, "grad_norm": 2.125, "learning_rate": 9.855870749401381e-06, "loss": 1.0956, "step": 1242 }, { "epoch": 0.24846955348442068, "grad_norm": 2.328125, "learning_rate": 9.855619445785791e-06, "loss": 1.0473, "step": 1243 }, { "epoch": 0.24866944853951675, "grad_norm": 2.09375, "learning_rate": 9.855367926484103e-06, "loss": 1.1015, "step": 1244 }, { "epoch": 0.24886934359461282, "grad_norm": 2.1875, "learning_rate": 9.85511619150749e-06, "loss": 1.1346, "step": 1245 }, { "epoch": 0.2490692386497089, "grad_norm": 2.28125, "learning_rate": 9.854864240867137e-06, "loss": 1.1165, "step": 1246 }, { "epoch": 0.24926913370480497, "grad_norm": 2.25, "learning_rate": 9.85461207457423e-06, "loss": 1.1865, "step": 1247 }, { "epoch": 0.24946902875990104, "grad_norm": 2.171875, "learning_rate": 9.854359692639974e-06, "loss": 1.0496, "step": 1248 }, { "epoch": 0.24966892381499714, "grad_norm": 2.140625, "learning_rate": 9.854107095075578e-06, "loss": 1.0782, "step": 1249 }, { "epoch": 0.2498688188700932, "grad_norm": 2.0625, "learning_rate": 9.853854281892265e-06, "loss": 1.0533, "step": 1250 }, { "epoch": 0.25006871392518926, "grad_norm": 2.140625, "learning_rate": 9.853601253101262e-06, "loss": 1.1048, "step": 1251 }, { "epoch": 0.25026860898028536, "grad_norm": 2.09375, "learning_rate": 9.85334800871381e-06, "loss": 1.0275, "step": 1252 }, { "epoch": 0.2504685040353814, "grad_norm": 2.125, "learning_rate": 9.853094548741158e-06, "loss": 1.1582, "step": 1253 }, { "epoch": 0.2506683990904775, "grad_norm": 2.125, "learning_rate": 9.852840873194565e-06, "loss": 1.0982, "step": 1254 }, { "epoch": 0.2508682941455736, "grad_norm": 2.171875, "learning_rate": 9.852586982085298e-06, "loss": 1.0824, "step": 1255 }, { "epoch": 0.25106818920066964, "grad_norm": 2.203125, "learning_rate": 9.852332875424636e-06, "loss": 1.1468, "step": 1256 }, { "epoch": 0.25126808425576574, "grad_norm": 2.109375, "learning_rate": 9.852078553223865e-06, "loss": 1.0221, "step": 1257 }, { "epoch": 0.2514679793108618, "grad_norm": 2.140625, "learning_rate": 9.851824015494284e-06, "loss": 1.049, "step": 1258 }, { "epoch": 0.2516678743659579, "grad_norm": 2.203125, "learning_rate": 9.851569262247198e-06, "loss": 1.1261, "step": 1259 }, { "epoch": 0.25186776942105393, "grad_norm": 2.046875, "learning_rate": 9.851314293493923e-06, "loss": 1.1281, "step": 1260 }, { "epoch": 0.25206766447615003, "grad_norm": 2.09375, "learning_rate": 9.851059109245785e-06, "loss": 1.022, "step": 1261 }, { "epoch": 0.2522675595312461, "grad_norm": 2.140625, "learning_rate": 9.850803709514121e-06, "loss": 1.1081, "step": 1262 }, { "epoch": 0.2524674545863422, "grad_norm": 2.09375, "learning_rate": 9.850548094310273e-06, "loss": 1.0569, "step": 1263 }, { "epoch": 0.2526673496414382, "grad_norm": 2.078125, "learning_rate": 9.850292263645597e-06, "loss": 1.0977, "step": 1264 }, { "epoch": 0.2528672446965343, "grad_norm": 2.265625, "learning_rate": 9.850036217531457e-06, "loss": 1.06, "step": 1265 }, { "epoch": 0.2530671397516304, "grad_norm": 2.046875, "learning_rate": 9.849779955979226e-06, "loss": 1.0829, "step": 1266 }, { "epoch": 0.25326703480672647, "grad_norm": 2.171875, "learning_rate": 9.849523479000287e-06, "loss": 1.1455, "step": 1267 }, { "epoch": 0.25346692986182257, "grad_norm": 2.265625, "learning_rate": 9.849266786606033e-06, "loss": 1.1681, "step": 1268 }, { "epoch": 0.2536668249169186, "grad_norm": 2.078125, "learning_rate": 9.849009878807867e-06, "loss": 1.07, "step": 1269 }, { "epoch": 0.2538667199720147, "grad_norm": 2.25, "learning_rate": 9.848752755617201e-06, "loss": 1.1013, "step": 1270 }, { "epoch": 0.25406661502711075, "grad_norm": 2.171875, "learning_rate": 9.848495417045454e-06, "loss": 1.1068, "step": 1271 }, { "epoch": 0.25426651008220685, "grad_norm": 2.1875, "learning_rate": 9.848237863104057e-06, "loss": 0.9974, "step": 1272 }, { "epoch": 0.2544664051373029, "grad_norm": 2.21875, "learning_rate": 9.847980093804455e-06, "loss": 1.1419, "step": 1273 }, { "epoch": 0.254666300192399, "grad_norm": 2.09375, "learning_rate": 9.847722109158094e-06, "loss": 1.0713, "step": 1274 }, { "epoch": 0.25486619524749504, "grad_norm": 2.234375, "learning_rate": 9.847463909176433e-06, "loss": 1.0939, "step": 1275 }, { "epoch": 0.25506609030259114, "grad_norm": 2.234375, "learning_rate": 9.847205493870944e-06, "loss": 1.1118, "step": 1276 }, { "epoch": 0.25526598535768724, "grad_norm": 2.046875, "learning_rate": 9.846946863253104e-06, "loss": 1.052, "step": 1277 }, { "epoch": 0.2554658804127833, "grad_norm": 2.125, "learning_rate": 9.846688017334405e-06, "loss": 1.0839, "step": 1278 }, { "epoch": 0.2556657754678794, "grad_norm": 2.1875, "learning_rate": 9.846428956126338e-06, "loss": 1.1021, "step": 1279 }, { "epoch": 0.25586567052297543, "grad_norm": 2.109375, "learning_rate": 9.846169679640417e-06, "loss": 1.011, "step": 1280 }, { "epoch": 0.25606556557807153, "grad_norm": 2.203125, "learning_rate": 9.845910187888155e-06, "loss": 1.0385, "step": 1281 }, { "epoch": 0.2562654606331676, "grad_norm": 2.234375, "learning_rate": 9.84565048088108e-06, "loss": 1.1017, "step": 1282 }, { "epoch": 0.2564653556882637, "grad_norm": 2.125, "learning_rate": 9.84539055863073e-06, "loss": 1.1327, "step": 1283 }, { "epoch": 0.2566652507433597, "grad_norm": 2.09375, "learning_rate": 9.845130421148646e-06, "loss": 1.1224, "step": 1284 }, { "epoch": 0.2568651457984558, "grad_norm": 2.046875, "learning_rate": 9.844870068446389e-06, "loss": 1.1337, "step": 1285 }, { "epoch": 0.25706504085355186, "grad_norm": 2.1875, "learning_rate": 9.84460950053552e-06, "loss": 1.0371, "step": 1286 }, { "epoch": 0.25726493590864796, "grad_norm": 2.171875, "learning_rate": 9.844348717427614e-06, "loss": 1.1558, "step": 1287 }, { "epoch": 0.257464830963744, "grad_norm": 2.015625, "learning_rate": 9.844087719134254e-06, "loss": 1.0198, "step": 1288 }, { "epoch": 0.2576647260188401, "grad_norm": 2.1875, "learning_rate": 9.843826505667038e-06, "loss": 1.1347, "step": 1289 }, { "epoch": 0.2578646210739362, "grad_norm": 2.125, "learning_rate": 9.843565077037563e-06, "loss": 1.2295, "step": 1290 }, { "epoch": 0.25806451612903225, "grad_norm": 2.109375, "learning_rate": 9.843303433257447e-06, "loss": 1.1543, "step": 1291 }, { "epoch": 0.25826441118412835, "grad_norm": 2.140625, "learning_rate": 9.843041574338307e-06, "loss": 1.1631, "step": 1292 }, { "epoch": 0.2584643062392244, "grad_norm": 2.25, "learning_rate": 9.84277950029178e-06, "loss": 1.0916, "step": 1293 }, { "epoch": 0.2586642012943205, "grad_norm": 2.0625, "learning_rate": 9.842517211129504e-06, "loss": 1.1489, "step": 1294 }, { "epoch": 0.25886409634941654, "grad_norm": 2.078125, "learning_rate": 9.842254706863131e-06, "loss": 0.9759, "step": 1295 }, { "epoch": 0.25906399140451264, "grad_norm": 2.109375, "learning_rate": 9.84199198750432e-06, "loss": 1.0633, "step": 1296 }, { "epoch": 0.2592638864596087, "grad_norm": 2.171875, "learning_rate": 9.841729053064744e-06, "loss": 1.0705, "step": 1297 }, { "epoch": 0.2594637815147048, "grad_norm": 2.203125, "learning_rate": 9.84146590355608e-06, "loss": 1.1205, "step": 1298 }, { "epoch": 0.2596636765698008, "grad_norm": 2.203125, "learning_rate": 9.841202538990016e-06, "loss": 1.1211, "step": 1299 }, { "epoch": 0.2598635716248969, "grad_norm": 2.15625, "learning_rate": 9.840938959378254e-06, "loss": 1.1488, "step": 1300 }, { "epoch": 0.260063466679993, "grad_norm": 2.203125, "learning_rate": 9.8406751647325e-06, "loss": 1.1001, "step": 1301 }, { "epoch": 0.26026336173508907, "grad_norm": 2.046875, "learning_rate": 9.840411155064472e-06, "loss": 1.0636, "step": 1302 }, { "epoch": 0.26046325679018517, "grad_norm": 2.078125, "learning_rate": 9.840146930385897e-06, "loss": 1.0858, "step": 1303 }, { "epoch": 0.2606631518452812, "grad_norm": 2.078125, "learning_rate": 9.839882490708512e-06, "loss": 0.9908, "step": 1304 }, { "epoch": 0.2608630469003773, "grad_norm": 2.171875, "learning_rate": 9.839617836044065e-06, "loss": 1.1681, "step": 1305 }, { "epoch": 0.26106294195547336, "grad_norm": 2.140625, "learning_rate": 9.83935296640431e-06, "loss": 1.1194, "step": 1306 }, { "epoch": 0.26126283701056946, "grad_norm": 2.1875, "learning_rate": 9.839087881801012e-06, "loss": 1.1615, "step": 1307 }, { "epoch": 0.2614627320656655, "grad_norm": 2.109375, "learning_rate": 9.83882258224595e-06, "loss": 1.0381, "step": 1308 }, { "epoch": 0.2616626271207616, "grad_norm": 2.125, "learning_rate": 9.838557067750903e-06, "loss": 1.1391, "step": 1309 }, { "epoch": 0.26186252217585765, "grad_norm": 2.28125, "learning_rate": 9.83829133832767e-06, "loss": 1.1343, "step": 1310 }, { "epoch": 0.26206241723095375, "grad_norm": 2.125, "learning_rate": 9.838025393988051e-06, "loss": 1.0177, "step": 1311 }, { "epoch": 0.26226231228604985, "grad_norm": 2.3125, "learning_rate": 9.83775923474386e-06, "loss": 1.1457, "step": 1312 }, { "epoch": 0.2624622073411459, "grad_norm": 2.140625, "learning_rate": 9.837492860606923e-06, "loss": 1.1076, "step": 1313 }, { "epoch": 0.262662102396242, "grad_norm": 2.15625, "learning_rate": 9.837226271589067e-06, "loss": 1.0448, "step": 1314 }, { "epoch": 0.26286199745133804, "grad_norm": 2.15625, "learning_rate": 9.836959467702139e-06, "loss": 1.0799, "step": 1315 }, { "epoch": 0.26306189250643414, "grad_norm": 2.0625, "learning_rate": 9.836692448957987e-06, "loss": 1.0002, "step": 1316 }, { "epoch": 0.2632617875615302, "grad_norm": 2.046875, "learning_rate": 9.836425215368472e-06, "loss": 1.1429, "step": 1317 }, { "epoch": 0.2634616826166263, "grad_norm": 2.171875, "learning_rate": 9.836157766945467e-06, "loss": 1.0986, "step": 1318 }, { "epoch": 0.2636615776717223, "grad_norm": 2.25, "learning_rate": 9.835890103700849e-06, "loss": 1.1768, "step": 1319 }, { "epoch": 0.2638614727268184, "grad_norm": 2.171875, "learning_rate": 9.83562222564651e-06, "loss": 1.0491, "step": 1320 }, { "epoch": 0.26406136778191447, "grad_norm": 2.0625, "learning_rate": 9.835354132794349e-06, "loss": 1.0405, "step": 1321 }, { "epoch": 0.26426126283701057, "grad_norm": 2.1875, "learning_rate": 9.835085825156274e-06, "loss": 1.1763, "step": 1322 }, { "epoch": 0.26446115789210667, "grad_norm": 2.078125, "learning_rate": 9.834817302744201e-06, "loss": 1.11, "step": 1323 }, { "epoch": 0.2646610529472027, "grad_norm": 2.140625, "learning_rate": 9.834548565570062e-06, "loss": 1.1882, "step": 1324 }, { "epoch": 0.2648609480022988, "grad_norm": 2.171875, "learning_rate": 9.834279613645791e-06, "loss": 1.1312, "step": 1325 }, { "epoch": 0.26506084305739486, "grad_norm": 2.375, "learning_rate": 9.834010446983335e-06, "loss": 1.1019, "step": 1326 }, { "epoch": 0.26526073811249096, "grad_norm": 2.109375, "learning_rate": 9.833741065594652e-06, "loss": 1.1003, "step": 1327 }, { "epoch": 0.265460633167587, "grad_norm": 2.15625, "learning_rate": 9.833471469491708e-06, "loss": 1.0937, "step": 1328 }, { "epoch": 0.2656605282226831, "grad_norm": 2.171875, "learning_rate": 9.833201658686478e-06, "loss": 1.0801, "step": 1329 }, { "epoch": 0.26586042327777915, "grad_norm": 2.140625, "learning_rate": 9.832931633190943e-06, "loss": 1.0232, "step": 1330 }, { "epoch": 0.26606031833287525, "grad_norm": 2.15625, "learning_rate": 9.832661393017104e-06, "loss": 1.1369, "step": 1331 }, { "epoch": 0.2662602133879713, "grad_norm": 2.09375, "learning_rate": 9.83239093817696e-06, "loss": 0.9929, "step": 1332 }, { "epoch": 0.2664601084430674, "grad_norm": 2.1875, "learning_rate": 9.83212026868253e-06, "loss": 1.1219, "step": 1333 }, { "epoch": 0.2666600034981635, "grad_norm": 2.0625, "learning_rate": 9.831849384545831e-06, "loss": 1.0917, "step": 1334 }, { "epoch": 0.26685989855325953, "grad_norm": 2.203125, "learning_rate": 9.8315782857789e-06, "loss": 1.2081, "step": 1335 }, { "epoch": 0.26705979360835563, "grad_norm": 2.171875, "learning_rate": 9.831306972393778e-06, "loss": 1.0511, "step": 1336 }, { "epoch": 0.2672596886634517, "grad_norm": 2.09375, "learning_rate": 9.831035444402514e-06, "loss": 1.1524, "step": 1337 }, { "epoch": 0.2674595837185478, "grad_norm": 1.9765625, "learning_rate": 9.830763701817173e-06, "loss": 1.0655, "step": 1338 }, { "epoch": 0.2676594787736438, "grad_norm": 2.171875, "learning_rate": 9.830491744649824e-06, "loss": 1.1691, "step": 1339 }, { "epoch": 0.2678593738287399, "grad_norm": 2.078125, "learning_rate": 9.830219572912546e-06, "loss": 1.0408, "step": 1340 }, { "epoch": 0.26805926888383597, "grad_norm": 2.171875, "learning_rate": 9.829947186617432e-06, "loss": 1.0452, "step": 1341 }, { "epoch": 0.26825916393893207, "grad_norm": 2.15625, "learning_rate": 9.82967458577658e-06, "loss": 1.1355, "step": 1342 }, { "epoch": 0.2684590589940281, "grad_norm": 2.125, "learning_rate": 9.829401770402099e-06, "loss": 1.1821, "step": 1343 }, { "epoch": 0.2686589540491242, "grad_norm": 2.203125, "learning_rate": 9.829128740506107e-06, "loss": 1.0181, "step": 1344 }, { "epoch": 0.2688588491042203, "grad_norm": 2.109375, "learning_rate": 9.828855496100733e-06, "loss": 1.1149, "step": 1345 }, { "epoch": 0.26905874415931635, "grad_norm": 2.125, "learning_rate": 9.828582037198111e-06, "loss": 1.0257, "step": 1346 }, { "epoch": 0.26925863921441245, "grad_norm": 2.171875, "learning_rate": 9.828308363810392e-06, "loss": 1.1245, "step": 1347 }, { "epoch": 0.2694585342695085, "grad_norm": 2.0625, "learning_rate": 9.828034475949732e-06, "loss": 1.066, "step": 1348 }, { "epoch": 0.2696584293246046, "grad_norm": 2.234375, "learning_rate": 9.827760373628295e-06, "loss": 1.1373, "step": 1349 }, { "epoch": 0.26985832437970064, "grad_norm": 2.25, "learning_rate": 9.82748605685826e-06, "loss": 1.2466, "step": 1350 }, { "epoch": 0.27005821943479674, "grad_norm": 2.078125, "learning_rate": 9.827211525651808e-06, "loss": 1.1199, "step": 1351 }, { "epoch": 0.2702581144898928, "grad_norm": 1.9921875, "learning_rate": 9.826936780021134e-06, "loss": 0.9884, "step": 1352 }, { "epoch": 0.2704580095449889, "grad_norm": 2.046875, "learning_rate": 9.826661819978446e-06, "loss": 1.0505, "step": 1353 }, { "epoch": 0.27065790460008493, "grad_norm": 2.09375, "learning_rate": 9.826386645535955e-06, "loss": 1.048, "step": 1354 }, { "epoch": 0.27085779965518103, "grad_norm": 2.25, "learning_rate": 9.826111256705885e-06, "loss": 1.2033, "step": 1355 }, { "epoch": 0.27105769471027713, "grad_norm": 2.09375, "learning_rate": 9.825835653500468e-06, "loss": 1.0404, "step": 1356 }, { "epoch": 0.2712575897653732, "grad_norm": 2.078125, "learning_rate": 9.825559835931948e-06, "loss": 1.077, "step": 1357 }, { "epoch": 0.2714574848204693, "grad_norm": 2.28125, "learning_rate": 9.825283804012573e-06, "loss": 1.1831, "step": 1358 }, { "epoch": 0.2716573798755653, "grad_norm": 2.015625, "learning_rate": 9.825007557754608e-06, "loss": 0.9796, "step": 1359 }, { "epoch": 0.2718572749306614, "grad_norm": 2.4375, "learning_rate": 9.824731097170323e-06, "loss": 1.0301, "step": 1360 }, { "epoch": 0.27205716998575746, "grad_norm": 2.3125, "learning_rate": 9.824454422271999e-06, "loss": 1.1681, "step": 1361 }, { "epoch": 0.27225706504085356, "grad_norm": 2.140625, "learning_rate": 9.824177533071922e-06, "loss": 1.0397, "step": 1362 }, { "epoch": 0.2724569600959496, "grad_norm": 2.1875, "learning_rate": 9.823900429582396e-06, "loss": 1.0641, "step": 1363 }, { "epoch": 0.2726568551510457, "grad_norm": 2.15625, "learning_rate": 9.823623111815728e-06, "loss": 1.0351, "step": 1364 }, { "epoch": 0.27285675020614175, "grad_norm": 2.078125, "learning_rate": 9.823345579784236e-06, "loss": 1.076, "step": 1365 }, { "epoch": 0.27305664526123785, "grad_norm": 2.140625, "learning_rate": 9.823067833500248e-06, "loss": 1.0386, "step": 1366 }, { "epoch": 0.27325654031633395, "grad_norm": 2.09375, "learning_rate": 9.822789872976105e-06, "loss": 1.1059, "step": 1367 }, { "epoch": 0.27345643537143, "grad_norm": 2.25, "learning_rate": 9.822511698224147e-06, "loss": 1.1233, "step": 1368 }, { "epoch": 0.2736563304265261, "grad_norm": 2.3125, "learning_rate": 9.822233309256738e-06, "loss": 1.1211, "step": 1369 }, { "epoch": 0.27385622548162214, "grad_norm": 2.109375, "learning_rate": 9.821954706086237e-06, "loss": 1.0413, "step": 1370 }, { "epoch": 0.27405612053671824, "grad_norm": 2.15625, "learning_rate": 9.821675888725025e-06, "loss": 1.0646, "step": 1371 }, { "epoch": 0.2742560155918143, "grad_norm": 2.15625, "learning_rate": 9.821396857185484e-06, "loss": 1.0904, "step": 1372 }, { "epoch": 0.2744559106469104, "grad_norm": 2.3125, "learning_rate": 9.821117611480011e-06, "loss": 1.0808, "step": 1373 }, { "epoch": 0.27465580570200643, "grad_norm": 2.125, "learning_rate": 9.820838151621008e-06, "loss": 1.0683, "step": 1374 }, { "epoch": 0.27485570075710253, "grad_norm": 2.21875, "learning_rate": 9.820558477620888e-06, "loss": 1.198, "step": 1375 }, { "epoch": 0.27505559581219857, "grad_norm": 2.1875, "learning_rate": 9.820278589492076e-06, "loss": 0.9376, "step": 1376 }, { "epoch": 0.27525549086729467, "grad_norm": 2.078125, "learning_rate": 9.819998487247004e-06, "loss": 1.0398, "step": 1377 }, { "epoch": 0.27545538592239077, "grad_norm": 2.03125, "learning_rate": 9.819718170898116e-06, "loss": 1.0014, "step": 1378 }, { "epoch": 0.2756552809774868, "grad_norm": 2.171875, "learning_rate": 9.819437640457858e-06, "loss": 1.0894, "step": 1379 }, { "epoch": 0.2758551760325829, "grad_norm": 2.109375, "learning_rate": 9.819156895938697e-06, "loss": 1.0558, "step": 1380 }, { "epoch": 0.27605507108767896, "grad_norm": 2.0625, "learning_rate": 9.8188759373531e-06, "loss": 1.0198, "step": 1381 }, { "epoch": 0.27625496614277506, "grad_norm": 2.296875, "learning_rate": 9.81859476471355e-06, "loss": 1.0753, "step": 1382 }, { "epoch": 0.2764548611978711, "grad_norm": 2.125, "learning_rate": 9.818313378032535e-06, "loss": 1.1915, "step": 1383 }, { "epoch": 0.2766547562529672, "grad_norm": 2.234375, "learning_rate": 9.818031777322554e-06, "loss": 1.081, "step": 1384 }, { "epoch": 0.27685465130806325, "grad_norm": 2.140625, "learning_rate": 9.817749962596115e-06, "loss": 1.0739, "step": 1385 }, { "epoch": 0.27705454636315935, "grad_norm": 2.171875, "learning_rate": 9.817467933865739e-06, "loss": 1.1438, "step": 1386 }, { "epoch": 0.2772544414182554, "grad_norm": 2.125, "learning_rate": 9.81718569114395e-06, "loss": 1.0289, "step": 1387 }, { "epoch": 0.2774543364733515, "grad_norm": 2.09375, "learning_rate": 9.81690323444329e-06, "loss": 1.1005, "step": 1388 }, { "epoch": 0.2776542315284476, "grad_norm": 2.265625, "learning_rate": 9.8166205637763e-06, "loss": 1.0965, "step": 1389 }, { "epoch": 0.27785412658354364, "grad_norm": 2.203125, "learning_rate": 9.81633767915554e-06, "loss": 1.1429, "step": 1390 }, { "epoch": 0.27805402163863974, "grad_norm": 2.046875, "learning_rate": 9.816054580593575e-06, "loss": 1.0227, "step": 1391 }, { "epoch": 0.2782539166937358, "grad_norm": 2.125, "learning_rate": 9.81577126810298e-06, "loss": 1.1028, "step": 1392 }, { "epoch": 0.2784538117488319, "grad_norm": 2.296875, "learning_rate": 9.815487741696339e-06, "loss": 1.244, "step": 1393 }, { "epoch": 0.2786537068039279, "grad_norm": 2.1875, "learning_rate": 9.815204001386245e-06, "loss": 1.0981, "step": 1394 }, { "epoch": 0.278853601859024, "grad_norm": 2.0625, "learning_rate": 9.814920047185306e-06, "loss": 1.1177, "step": 1395 }, { "epoch": 0.27905349691412007, "grad_norm": 2.09375, "learning_rate": 9.814635879106134e-06, "loss": 1.1771, "step": 1396 }, { "epoch": 0.27925339196921617, "grad_norm": 2.234375, "learning_rate": 9.814351497161348e-06, "loss": 1.1397, "step": 1397 }, { "epoch": 0.2794532870243122, "grad_norm": 2.296875, "learning_rate": 9.814066901363584e-06, "loss": 1.0615, "step": 1398 }, { "epoch": 0.2796531820794083, "grad_norm": 2.15625, "learning_rate": 9.81378209172548e-06, "loss": 1.0988, "step": 1399 }, { "epoch": 0.27985307713450436, "grad_norm": 2.09375, "learning_rate": 9.813497068259692e-06, "loss": 1.0661, "step": 1400 }, { "epoch": 0.28005297218960046, "grad_norm": 2.03125, "learning_rate": 9.813211830978879e-06, "loss": 1.1077, "step": 1401 }, { "epoch": 0.28025286724469656, "grad_norm": 2.171875, "learning_rate": 9.812926379895708e-06, "loss": 1.122, "step": 1402 }, { "epoch": 0.2804527622997926, "grad_norm": 2.171875, "learning_rate": 9.812640715022863e-06, "loss": 1.173, "step": 1403 }, { "epoch": 0.2806526573548887, "grad_norm": 2.078125, "learning_rate": 9.812354836373031e-06, "loss": 1.0561, "step": 1404 }, { "epoch": 0.28085255240998475, "grad_norm": 2.109375, "learning_rate": 9.812068743958912e-06, "loss": 1.1136, "step": 1405 }, { "epoch": 0.28105244746508085, "grad_norm": 2.125, "learning_rate": 9.811782437793211e-06, "loss": 1.0279, "step": 1406 }, { "epoch": 0.2812523425201769, "grad_norm": 2.15625, "learning_rate": 9.81149591788865e-06, "loss": 1.0633, "step": 1407 }, { "epoch": 0.281452237575273, "grad_norm": 2.171875, "learning_rate": 9.811209184257953e-06, "loss": 1.1056, "step": 1408 }, { "epoch": 0.28165213263036903, "grad_norm": 2.125, "learning_rate": 9.81092223691386e-06, "loss": 1.0893, "step": 1409 }, { "epoch": 0.28185202768546513, "grad_norm": 2.125, "learning_rate": 9.810635075869113e-06, "loss": 0.9967, "step": 1410 }, { "epoch": 0.2820519227405612, "grad_norm": 2.09375, "learning_rate": 9.81034770113647e-06, "loss": 1.0597, "step": 1411 }, { "epoch": 0.2822518177956573, "grad_norm": 2.140625, "learning_rate": 9.810060112728696e-06, "loss": 0.9984, "step": 1412 }, { "epoch": 0.2824517128507534, "grad_norm": 2.09375, "learning_rate": 9.809772310658567e-06, "loss": 1.0279, "step": 1413 }, { "epoch": 0.2826516079058494, "grad_norm": 2.234375, "learning_rate": 9.809484294938864e-06, "loss": 1.1348, "step": 1414 }, { "epoch": 0.2828515029609455, "grad_norm": 2.140625, "learning_rate": 9.809196065582383e-06, "loss": 1.0826, "step": 1415 }, { "epoch": 0.28305139801604157, "grad_norm": 2.171875, "learning_rate": 9.808907622601926e-06, "loss": 1.0565, "step": 1416 }, { "epoch": 0.28325129307113767, "grad_norm": 2.140625, "learning_rate": 9.808618966010306e-06, "loss": 1.1655, "step": 1417 }, { "epoch": 0.2834511881262337, "grad_norm": 2.21875, "learning_rate": 9.808330095820346e-06, "loss": 1.1342, "step": 1418 }, { "epoch": 0.2836510831813298, "grad_norm": 2.109375, "learning_rate": 9.808041012044875e-06, "loss": 1.07, "step": 1419 }, { "epoch": 0.28385097823642585, "grad_norm": 2.21875, "learning_rate": 9.807751714696737e-06, "loss": 1.0354, "step": 1420 }, { "epoch": 0.28405087329152195, "grad_norm": 2.125, "learning_rate": 9.807462203788782e-06, "loss": 1.0195, "step": 1421 }, { "epoch": 0.284250768346618, "grad_norm": 2.109375, "learning_rate": 9.807172479333868e-06, "loss": 1.1112, "step": 1422 }, { "epoch": 0.2844506634017141, "grad_norm": 2.046875, "learning_rate": 9.806882541344867e-06, "loss": 1.0059, "step": 1423 }, { "epoch": 0.2846505584568102, "grad_norm": 2.265625, "learning_rate": 9.806592389834654e-06, "loss": 1.1527, "step": 1424 }, { "epoch": 0.28485045351190624, "grad_norm": 2.15625, "learning_rate": 9.806302024816124e-06, "loss": 1.0392, "step": 1425 }, { "epoch": 0.28505034856700234, "grad_norm": 2.203125, "learning_rate": 9.806011446302169e-06, "loss": 1.1161, "step": 1426 }, { "epoch": 0.2852502436220984, "grad_norm": 2.1875, "learning_rate": 9.8057206543057e-06, "loss": 1.0734, "step": 1427 }, { "epoch": 0.2854501386771945, "grad_norm": 2.109375, "learning_rate": 9.805429648839633e-06, "loss": 1.0341, "step": 1428 }, { "epoch": 0.28565003373229053, "grad_norm": 2.109375, "learning_rate": 9.805138429916894e-06, "loss": 1.1391, "step": 1429 }, { "epoch": 0.28584992878738663, "grad_norm": 2.09375, "learning_rate": 9.80484699755042e-06, "loss": 1.1245, "step": 1430 }, { "epoch": 0.2860498238424827, "grad_norm": 2.125, "learning_rate": 9.804555351753153e-06, "loss": 1.1387, "step": 1431 }, { "epoch": 0.2862497188975788, "grad_norm": 2.75, "learning_rate": 9.804263492538054e-06, "loss": 1.0452, "step": 1432 }, { "epoch": 0.2864496139526748, "grad_norm": 2.078125, "learning_rate": 9.80397141991808e-06, "loss": 1.0985, "step": 1433 }, { "epoch": 0.2866495090077709, "grad_norm": 2.09375, "learning_rate": 9.80367913390621e-06, "loss": 0.9788, "step": 1434 }, { "epoch": 0.286849404062867, "grad_norm": 2.09375, "learning_rate": 9.803386634515427e-06, "loss": 1.053, "step": 1435 }, { "epoch": 0.28704929911796306, "grad_norm": 2.34375, "learning_rate": 9.803093921758721e-06, "loss": 1.109, "step": 1436 }, { "epoch": 0.28724919417305916, "grad_norm": 2.265625, "learning_rate": 9.802800995649098e-06, "loss": 1.2099, "step": 1437 }, { "epoch": 0.2874490892281552, "grad_norm": 2.03125, "learning_rate": 9.802507856199567e-06, "loss": 1.0597, "step": 1438 }, { "epoch": 0.2876489842832513, "grad_norm": 2.171875, "learning_rate": 9.802214503423149e-06, "loss": 1.0489, "step": 1439 }, { "epoch": 0.28784887933834735, "grad_norm": 2.140625, "learning_rate": 9.801920937332876e-06, "loss": 1.1361, "step": 1440 }, { "epoch": 0.28804877439344345, "grad_norm": 2.09375, "learning_rate": 9.801627157941788e-06, "loss": 1.0418, "step": 1441 }, { "epoch": 0.2882486694485395, "grad_norm": 2.21875, "learning_rate": 9.801333165262936e-06, "loss": 1.2393, "step": 1442 }, { "epoch": 0.2884485645036356, "grad_norm": 2.1875, "learning_rate": 9.801038959309376e-06, "loss": 1.1047, "step": 1443 }, { "epoch": 0.28864845955873164, "grad_norm": 2.140625, "learning_rate": 9.800744540094178e-06, "loss": 0.9954, "step": 1444 }, { "epoch": 0.28884835461382774, "grad_norm": 2.1875, "learning_rate": 9.80044990763042e-06, "loss": 1.1797, "step": 1445 }, { "epoch": 0.28904824966892384, "grad_norm": 2.234375, "learning_rate": 9.800155061931192e-06, "loss": 1.0957, "step": 1446 }, { "epoch": 0.2892481447240199, "grad_norm": 2.046875, "learning_rate": 9.799860003009587e-06, "loss": 1.1394, "step": 1447 }, { "epoch": 0.289448039779116, "grad_norm": 2.140625, "learning_rate": 9.799564730878713e-06, "loss": 1.0629, "step": 1448 }, { "epoch": 0.28964793483421203, "grad_norm": 2.296875, "learning_rate": 9.799269245551688e-06, "loss": 1.1368, "step": 1449 }, { "epoch": 0.28984782988930813, "grad_norm": 2.140625, "learning_rate": 9.798973547041633e-06, "loss": 1.0617, "step": 1450 }, { "epoch": 0.2900477249444042, "grad_norm": 2.09375, "learning_rate": 9.79867763536169e-06, "loss": 1.0883, "step": 1451 }, { "epoch": 0.2902476199995003, "grad_norm": 2.140625, "learning_rate": 9.798381510524995e-06, "loss": 1.042, "step": 1452 }, { "epoch": 0.2904475150545963, "grad_norm": 2.265625, "learning_rate": 9.798085172544707e-06, "loss": 1.1617, "step": 1453 }, { "epoch": 0.2906474101096924, "grad_norm": 2.015625, "learning_rate": 9.797788621433987e-06, "loss": 1.0817, "step": 1454 }, { "epoch": 0.29084730516478846, "grad_norm": 2.234375, "learning_rate": 9.797491857206009e-06, "loss": 1.2027, "step": 1455 }, { "epoch": 0.29104720021988456, "grad_norm": 1.9453125, "learning_rate": 9.797194879873956e-06, "loss": 1.1056, "step": 1456 }, { "epoch": 0.29124709527498066, "grad_norm": 2.296875, "learning_rate": 9.796897689451019e-06, "loss": 1.1966, "step": 1457 }, { "epoch": 0.2914469903300767, "grad_norm": 2.078125, "learning_rate": 9.7966002859504e-06, "loss": 1.1577, "step": 1458 }, { "epoch": 0.2916468853851728, "grad_norm": 2.0625, "learning_rate": 9.796302669385307e-06, "loss": 1.0255, "step": 1459 }, { "epoch": 0.29184678044026885, "grad_norm": 2.0625, "learning_rate": 9.796004839768962e-06, "loss": 1.1133, "step": 1460 }, { "epoch": 0.29204667549536495, "grad_norm": 2.15625, "learning_rate": 9.795706797114593e-06, "loss": 1.0865, "step": 1461 }, { "epoch": 0.292246570550461, "grad_norm": 2.0625, "learning_rate": 9.795408541435443e-06, "loss": 1.0332, "step": 1462 }, { "epoch": 0.2924464656055571, "grad_norm": 2.109375, "learning_rate": 9.795110072744756e-06, "loss": 1.1121, "step": 1463 }, { "epoch": 0.29264636066065314, "grad_norm": 2.21875, "learning_rate": 9.794811391055793e-06, "loss": 1.0971, "step": 1464 }, { "epoch": 0.29284625571574924, "grad_norm": 2.046875, "learning_rate": 9.79451249638182e-06, "loss": 1.0235, "step": 1465 }, { "epoch": 0.2930461507708453, "grad_norm": 2.078125, "learning_rate": 9.79421338873611e-06, "loss": 1.0476, "step": 1466 }, { "epoch": 0.2932460458259414, "grad_norm": 2.171875, "learning_rate": 9.793914068131959e-06, "loss": 1.1475, "step": 1467 }, { "epoch": 0.2934459408810375, "grad_norm": 2.328125, "learning_rate": 9.793614534582653e-06, "loss": 1.1651, "step": 1468 }, { "epoch": 0.2936458359361335, "grad_norm": 2.15625, "learning_rate": 9.793314788101502e-06, "loss": 1.1456, "step": 1469 }, { "epoch": 0.2938457309912296, "grad_norm": 2.109375, "learning_rate": 9.793014828701822e-06, "loss": 1.0554, "step": 1470 }, { "epoch": 0.29404562604632567, "grad_norm": 2.21875, "learning_rate": 9.792714656396934e-06, "loss": 1.0414, "step": 1471 }, { "epoch": 0.29424552110142177, "grad_norm": 2.09375, "learning_rate": 9.792414271200173e-06, "loss": 1.0277, "step": 1472 }, { "epoch": 0.2944454161565178, "grad_norm": 2.171875, "learning_rate": 9.79211367312488e-06, "loss": 1.1543, "step": 1473 }, { "epoch": 0.2946453112116139, "grad_norm": 2.21875, "learning_rate": 9.791812862184413e-06, "loss": 1.1505, "step": 1474 }, { "epoch": 0.29484520626670996, "grad_norm": 2.375, "learning_rate": 9.791511838392128e-06, "loss": 1.0876, "step": 1475 }, { "epoch": 0.29504510132180606, "grad_norm": 2.09375, "learning_rate": 9.7912106017614e-06, "loss": 1.1306, "step": 1476 }, { "epoch": 0.2952449963769021, "grad_norm": 2.0625, "learning_rate": 9.790909152305609e-06, "loss": 1.0793, "step": 1477 }, { "epoch": 0.2954448914319982, "grad_norm": 2.1875, "learning_rate": 9.790607490038145e-06, "loss": 1.0108, "step": 1478 }, { "epoch": 0.2956447864870943, "grad_norm": 2.125, "learning_rate": 9.790305614972407e-06, "loss": 1.0751, "step": 1479 }, { "epoch": 0.29584468154219035, "grad_norm": 2.0, "learning_rate": 9.790003527121806e-06, "loss": 1.0636, "step": 1480 }, { "epoch": 0.29604457659728645, "grad_norm": 2.03125, "learning_rate": 9.78970122649976e-06, "loss": 1.1285, "step": 1481 }, { "epoch": 0.2962444716523825, "grad_norm": 2.203125, "learning_rate": 9.789398713119696e-06, "loss": 1.0562, "step": 1482 }, { "epoch": 0.2964443667074786, "grad_norm": 2.140625, "learning_rate": 9.789095986995052e-06, "loss": 1.1148, "step": 1483 }, { "epoch": 0.29664426176257463, "grad_norm": 2.09375, "learning_rate": 9.788793048139277e-06, "loss": 1.0267, "step": 1484 }, { "epoch": 0.29684415681767073, "grad_norm": 2.140625, "learning_rate": 9.788489896565827e-06, "loss": 1.0022, "step": 1485 }, { "epoch": 0.2970440518727668, "grad_norm": 2.21875, "learning_rate": 9.788186532288166e-06, "loss": 1.0756, "step": 1486 }, { "epoch": 0.2972439469278629, "grad_norm": 2.21875, "learning_rate": 9.787882955319771e-06, "loss": 1.1432, "step": 1487 }, { "epoch": 0.2974438419829589, "grad_norm": 2.3125, "learning_rate": 9.787579165674129e-06, "loss": 1.1975, "step": 1488 }, { "epoch": 0.297643737038055, "grad_norm": 2.171875, "learning_rate": 9.787275163364729e-06, "loss": 1.087, "step": 1489 }, { "epoch": 0.2978436320931511, "grad_norm": 2.09375, "learning_rate": 9.786970948405077e-06, "loss": 1.0913, "step": 1490 }, { "epoch": 0.29804352714824717, "grad_norm": 2.15625, "learning_rate": 9.786666520808688e-06, "loss": 1.0329, "step": 1491 }, { "epoch": 0.29824342220334327, "grad_norm": 2.03125, "learning_rate": 9.786361880589084e-06, "loss": 1.1613, "step": 1492 }, { "epoch": 0.2984433172584393, "grad_norm": 2.0625, "learning_rate": 9.786057027759796e-06, "loss": 1.0602, "step": 1493 }, { "epoch": 0.2986432123135354, "grad_norm": 2.140625, "learning_rate": 9.785751962334365e-06, "loss": 1.125, "step": 1494 }, { "epoch": 0.29884310736863146, "grad_norm": 2.171875, "learning_rate": 9.785446684326345e-06, "loss": 1.1915, "step": 1495 }, { "epoch": 0.29904300242372756, "grad_norm": 2.03125, "learning_rate": 9.785141193749292e-06, "loss": 1.1184, "step": 1496 }, { "epoch": 0.2992428974788236, "grad_norm": 2.015625, "learning_rate": 9.78483549061678e-06, "loss": 1.0091, "step": 1497 }, { "epoch": 0.2994427925339197, "grad_norm": 2.03125, "learning_rate": 9.784529574942385e-06, "loss": 0.9844, "step": 1498 }, { "epoch": 0.29964268758901574, "grad_norm": 2.15625, "learning_rate": 9.784223446739698e-06, "loss": 1.1051, "step": 1499 }, { "epoch": 0.29984258264411184, "grad_norm": 2.109375, "learning_rate": 9.783917106022316e-06, "loss": 1.1233, "step": 1500 }, { "epoch": 0.29984258264411184, "eval_loss": 0.954179048538208, "eval_runtime": 595.9323, "eval_samples_per_second": 3.588, "eval_steps_per_second": 3.588, "step": 1500 }, { "epoch": 0.30004247769920794, "grad_norm": 2.125, "learning_rate": 9.783610552803849e-06, "loss": 1.0654, "step": 1501 }, { "epoch": 0.300242372754304, "grad_norm": 2.171875, "learning_rate": 9.78330378709791e-06, "loss": 1.0456, "step": 1502 }, { "epoch": 0.3004422678094001, "grad_norm": 2.203125, "learning_rate": 9.782996808918128e-06, "loss": 1.1971, "step": 1503 }, { "epoch": 0.30064216286449613, "grad_norm": 2.234375, "learning_rate": 9.782689618278139e-06, "loss": 1.1156, "step": 1504 }, { "epoch": 0.30084205791959223, "grad_norm": 2.203125, "learning_rate": 9.782382215191589e-06, "loss": 1.1085, "step": 1505 }, { "epoch": 0.3010419529746883, "grad_norm": 2.21875, "learning_rate": 9.782074599672131e-06, "loss": 1.0369, "step": 1506 }, { "epoch": 0.3012418480297844, "grad_norm": 2.09375, "learning_rate": 9.78176677173343e-06, "loss": 1.2029, "step": 1507 }, { "epoch": 0.3014417430848804, "grad_norm": 2.140625, "learning_rate": 9.78145873138916e-06, "loss": 1.1371, "step": 1508 }, { "epoch": 0.3016416381399765, "grad_norm": 2.078125, "learning_rate": 9.781150478653003e-06, "loss": 1.1452, "step": 1509 }, { "epoch": 0.30184153319507256, "grad_norm": 2.15625, "learning_rate": 9.780842013538652e-06, "loss": 1.0721, "step": 1510 }, { "epoch": 0.30204142825016866, "grad_norm": 2.1875, "learning_rate": 9.78053333605981e-06, "loss": 1.1146, "step": 1511 }, { "epoch": 0.3022413233052647, "grad_norm": 2.125, "learning_rate": 9.780224446230188e-06, "loss": 1.1378, "step": 1512 }, { "epoch": 0.3024412183603608, "grad_norm": 2.21875, "learning_rate": 9.779915344063506e-06, "loss": 1.0507, "step": 1513 }, { "epoch": 0.3026411134154569, "grad_norm": 2.171875, "learning_rate": 9.779606029573496e-06, "loss": 1.1384, "step": 1514 }, { "epoch": 0.30284100847055295, "grad_norm": 2.203125, "learning_rate": 9.779296502773896e-06, "loss": 1.0204, "step": 1515 }, { "epoch": 0.30304090352564905, "grad_norm": 2.125, "learning_rate": 9.778986763678455e-06, "loss": 1.0589, "step": 1516 }, { "epoch": 0.3032407985807451, "grad_norm": 2.171875, "learning_rate": 9.778676812300935e-06, "loss": 1.11, "step": 1517 }, { "epoch": 0.3034406936358412, "grad_norm": 2.125, "learning_rate": 9.778366648655098e-06, "loss": 1.0446, "step": 1518 }, { "epoch": 0.30364058869093724, "grad_norm": 2.21875, "learning_rate": 9.778056272754728e-06, "loss": 1.0326, "step": 1519 }, { "epoch": 0.30384048374603334, "grad_norm": 2.109375, "learning_rate": 9.777745684613606e-06, "loss": 1.0558, "step": 1520 }, { "epoch": 0.3040403788011294, "grad_norm": 2.28125, "learning_rate": 9.777434884245533e-06, "loss": 1.0122, "step": 1521 }, { "epoch": 0.3042402738562255, "grad_norm": 2.21875, "learning_rate": 9.77712387166431e-06, "loss": 1.0617, "step": 1522 }, { "epoch": 0.30444016891132153, "grad_norm": 2.171875, "learning_rate": 9.776812646883758e-06, "loss": 1.1102, "step": 1523 }, { "epoch": 0.30464006396641763, "grad_norm": 2.109375, "learning_rate": 9.776501209917697e-06, "loss": 1.1396, "step": 1524 }, { "epoch": 0.30483995902151373, "grad_norm": 2.015625, "learning_rate": 9.776189560779963e-06, "loss": 0.9904, "step": 1525 }, { "epoch": 0.3050398540766098, "grad_norm": 2.125, "learning_rate": 9.775877699484397e-06, "loss": 1.0014, "step": 1526 }, { "epoch": 0.3052397491317059, "grad_norm": 2.296875, "learning_rate": 9.775565626044856e-06, "loss": 1.0889, "step": 1527 }, { "epoch": 0.3054396441868019, "grad_norm": 2.109375, "learning_rate": 9.775253340475199e-06, "loss": 1.0932, "step": 1528 }, { "epoch": 0.305639539241898, "grad_norm": 2.125, "learning_rate": 9.774940842789298e-06, "loss": 1.0992, "step": 1529 }, { "epoch": 0.30583943429699406, "grad_norm": 2.09375, "learning_rate": 9.774628133001037e-06, "loss": 1.0882, "step": 1530 }, { "epoch": 0.30603932935209016, "grad_norm": 2.203125, "learning_rate": 9.7743152111243e-06, "loss": 1.1073, "step": 1531 }, { "epoch": 0.3062392244071862, "grad_norm": 2.09375, "learning_rate": 9.774002077172994e-06, "loss": 1.0974, "step": 1532 }, { "epoch": 0.3064391194622823, "grad_norm": 2.109375, "learning_rate": 9.773688731161027e-06, "loss": 1.029, "step": 1533 }, { "epoch": 0.30663901451737835, "grad_norm": 2.09375, "learning_rate": 9.773375173102315e-06, "loss": 1.1065, "step": 1534 }, { "epoch": 0.30683890957247445, "grad_norm": 2.171875, "learning_rate": 9.773061403010786e-06, "loss": 1.0404, "step": 1535 }, { "epoch": 0.30703880462757055, "grad_norm": 2.1875, "learning_rate": 9.772747420900381e-06, "loss": 1.0275, "step": 1536 }, { "epoch": 0.3072386996826666, "grad_norm": 2.21875, "learning_rate": 9.772433226785045e-06, "loss": 1.0852, "step": 1537 }, { "epoch": 0.3074385947377627, "grad_norm": 2.125, "learning_rate": 9.772118820678735e-06, "loss": 1.0726, "step": 1538 }, { "epoch": 0.30763848979285874, "grad_norm": 2.15625, "learning_rate": 9.771804202595417e-06, "loss": 1.1003, "step": 1539 }, { "epoch": 0.30783838484795484, "grad_norm": 2.28125, "learning_rate": 9.771489372549064e-06, "loss": 1.0986, "step": 1540 }, { "epoch": 0.3080382799030509, "grad_norm": 2.109375, "learning_rate": 9.771174330553665e-06, "loss": 1.0746, "step": 1541 }, { "epoch": 0.308238174958147, "grad_norm": 2.125, "learning_rate": 9.770859076623211e-06, "loss": 0.9782, "step": 1542 }, { "epoch": 0.308438070013243, "grad_norm": 2.140625, "learning_rate": 9.770543610771706e-06, "loss": 1.1435, "step": 1543 }, { "epoch": 0.3086379650683391, "grad_norm": 2.046875, "learning_rate": 9.770227933013163e-06, "loss": 1.0373, "step": 1544 }, { "epoch": 0.30883786012343517, "grad_norm": 2.046875, "learning_rate": 9.769912043361606e-06, "loss": 1.0527, "step": 1545 }, { "epoch": 0.30903775517853127, "grad_norm": 2.28125, "learning_rate": 9.769595941831066e-06, "loss": 1.185, "step": 1546 }, { "epoch": 0.30923765023362737, "grad_norm": 2.078125, "learning_rate": 9.76927962843558e-06, "loss": 1.0581, "step": 1547 }, { "epoch": 0.3094375452887234, "grad_norm": 2.15625, "learning_rate": 9.768963103189206e-06, "loss": 1.1129, "step": 1548 }, { "epoch": 0.3096374403438195, "grad_norm": 2.125, "learning_rate": 9.768646366105997e-06, "loss": 1.1202, "step": 1549 }, { "epoch": 0.30983733539891556, "grad_norm": 2.125, "learning_rate": 9.768329417200029e-06, "loss": 1.1176, "step": 1550 }, { "epoch": 0.31003723045401166, "grad_norm": 2.078125, "learning_rate": 9.768012256485376e-06, "loss": 1.0759, "step": 1551 }, { "epoch": 0.3102371255091077, "grad_norm": 2.234375, "learning_rate": 9.767694883976128e-06, "loss": 1.1666, "step": 1552 }, { "epoch": 0.3104370205642038, "grad_norm": 1.921875, "learning_rate": 9.767377299686382e-06, "loss": 1.0544, "step": 1553 }, { "epoch": 0.31063691561929985, "grad_norm": 2.1875, "learning_rate": 9.767059503630247e-06, "loss": 1.1891, "step": 1554 }, { "epoch": 0.31083681067439595, "grad_norm": 2.234375, "learning_rate": 9.766741495821838e-06, "loss": 1.1982, "step": 1555 }, { "epoch": 0.311036705729492, "grad_norm": 2.21875, "learning_rate": 9.76642327627528e-06, "loss": 1.0769, "step": 1556 }, { "epoch": 0.3112366007845881, "grad_norm": 2.203125, "learning_rate": 9.766104845004709e-06, "loss": 1.1212, "step": 1557 }, { "epoch": 0.3114364958396842, "grad_norm": 2.296875, "learning_rate": 9.76578620202427e-06, "loss": 1.0862, "step": 1558 }, { "epoch": 0.31163639089478024, "grad_norm": 2.03125, "learning_rate": 9.765467347348116e-06, "loss": 1.0631, "step": 1559 }, { "epoch": 0.31183628594987634, "grad_norm": 2.046875, "learning_rate": 9.765148280990412e-06, "loss": 1.0448, "step": 1560 }, { "epoch": 0.3120361810049724, "grad_norm": 2.109375, "learning_rate": 9.76482900296533e-06, "loss": 1.0488, "step": 1561 }, { "epoch": 0.3122360760600685, "grad_norm": 2.171875, "learning_rate": 9.764509513287054e-06, "loss": 1.0261, "step": 1562 }, { "epoch": 0.3124359711151645, "grad_norm": 2.125, "learning_rate": 9.764189811969773e-06, "loss": 1.0469, "step": 1563 }, { "epoch": 0.3126358661702606, "grad_norm": 2.15625, "learning_rate": 9.763869899027689e-06, "loss": 1.0626, "step": 1564 }, { "epoch": 0.31283576122535667, "grad_norm": 2.03125, "learning_rate": 9.763549774475014e-06, "loss": 1.0377, "step": 1565 }, { "epoch": 0.31303565628045277, "grad_norm": 2.0625, "learning_rate": 9.763229438325968e-06, "loss": 0.9987, "step": 1566 }, { "epoch": 0.3132355513355488, "grad_norm": 2.125, "learning_rate": 9.762908890594777e-06, "loss": 1.0444, "step": 1567 }, { "epoch": 0.3134354463906449, "grad_norm": 2.109375, "learning_rate": 9.762588131295681e-06, "loss": 1.1318, "step": 1568 }, { "epoch": 0.313635341445741, "grad_norm": 2.140625, "learning_rate": 9.76226716044293e-06, "loss": 1.0587, "step": 1569 }, { "epoch": 0.31383523650083706, "grad_norm": 2.234375, "learning_rate": 9.761945978050782e-06, "loss": 1.1694, "step": 1570 }, { "epoch": 0.31403513155593316, "grad_norm": 2.09375, "learning_rate": 9.7616245841335e-06, "loss": 1.0964, "step": 1571 }, { "epoch": 0.3142350266110292, "grad_norm": 2.046875, "learning_rate": 9.761302978705364e-06, "loss": 1.0968, "step": 1572 }, { "epoch": 0.3144349216661253, "grad_norm": 2.1875, "learning_rate": 9.760981161780657e-06, "loss": 1.0919, "step": 1573 }, { "epoch": 0.31463481672122134, "grad_norm": 2.109375, "learning_rate": 9.760659133373675e-06, "loss": 1.0793, "step": 1574 }, { "epoch": 0.31483471177631744, "grad_norm": 2.0625, "learning_rate": 9.760336893498724e-06, "loss": 1.1369, "step": 1575 }, { "epoch": 0.3150346068314135, "grad_norm": 2.21875, "learning_rate": 9.760014442170116e-06, "loss": 1.2367, "step": 1576 }, { "epoch": 0.3152345018865096, "grad_norm": 2.109375, "learning_rate": 9.759691779402175e-06, "loss": 1.0322, "step": 1577 }, { "epoch": 0.31543439694160563, "grad_norm": 2.40625, "learning_rate": 9.759368905209234e-06, "loss": 1.2766, "step": 1578 }, { "epoch": 0.31563429199670173, "grad_norm": 2.0625, "learning_rate": 9.759045819605635e-06, "loss": 1.1224, "step": 1579 }, { "epoch": 0.31583418705179783, "grad_norm": 2.125, "learning_rate": 9.758722522605727e-06, "loss": 1.1049, "step": 1580 }, { "epoch": 0.3160340821068939, "grad_norm": 2.1875, "learning_rate": 9.758399014223874e-06, "loss": 1.1412, "step": 1581 }, { "epoch": 0.31623397716199, "grad_norm": 2.078125, "learning_rate": 9.758075294474445e-06, "loss": 1.0146, "step": 1582 }, { "epoch": 0.316433872217086, "grad_norm": 2.125, "learning_rate": 9.75775136337182e-06, "loss": 1.0551, "step": 1583 }, { "epoch": 0.3166337672721821, "grad_norm": 2.3125, "learning_rate": 9.757427220930387e-06, "loss": 1.1061, "step": 1584 }, { "epoch": 0.31683366232727816, "grad_norm": 2.09375, "learning_rate": 9.757102867164544e-06, "loss": 1.0484, "step": 1585 }, { "epoch": 0.31703355738237426, "grad_norm": 2.625, "learning_rate": 9.756778302088701e-06, "loss": 1.0879, "step": 1586 }, { "epoch": 0.3172334524374703, "grad_norm": 2.09375, "learning_rate": 9.756453525717274e-06, "loss": 1.0491, "step": 1587 }, { "epoch": 0.3174333474925664, "grad_norm": 2.125, "learning_rate": 9.75612853806469e-06, "loss": 1.049, "step": 1588 }, { "epoch": 0.31763324254766245, "grad_norm": 2.078125, "learning_rate": 9.755803339145382e-06, "loss": 1.1043, "step": 1589 }, { "epoch": 0.31783313760275855, "grad_norm": 2.03125, "learning_rate": 9.755477928973797e-06, "loss": 1.0747, "step": 1590 }, { "epoch": 0.31803303265785465, "grad_norm": 2.171875, "learning_rate": 9.755152307564393e-06, "loss": 1.031, "step": 1591 }, { "epoch": 0.3182329277129507, "grad_norm": 2.171875, "learning_rate": 9.75482647493163e-06, "loss": 1.1383, "step": 1592 }, { "epoch": 0.3184328227680468, "grad_norm": 2.1875, "learning_rate": 9.754500431089984e-06, "loss": 1.1015, "step": 1593 }, { "epoch": 0.31863271782314284, "grad_norm": 2.09375, "learning_rate": 9.754174176053936e-06, "loss": 1.083, "step": 1594 }, { "epoch": 0.31883261287823894, "grad_norm": 2.140625, "learning_rate": 9.75384770983798e-06, "loss": 1.004, "step": 1595 }, { "epoch": 0.319032507933335, "grad_norm": 1.984375, "learning_rate": 9.753521032456615e-06, "loss": 1.0074, "step": 1596 }, { "epoch": 0.3192324029884311, "grad_norm": 2.0625, "learning_rate": 9.753194143924354e-06, "loss": 1.1477, "step": 1597 }, { "epoch": 0.31943229804352713, "grad_norm": 2.1875, "learning_rate": 9.752867044255716e-06, "loss": 1.0784, "step": 1598 }, { "epoch": 0.31963219309862323, "grad_norm": 2.1875, "learning_rate": 9.752539733465231e-06, "loss": 1.0531, "step": 1599 }, { "epoch": 0.3198320881537193, "grad_norm": 2.09375, "learning_rate": 9.75221221156744e-06, "loss": 1.08, "step": 1600 }, { "epoch": 0.3200319832088154, "grad_norm": 2.09375, "learning_rate": 9.75188447857689e-06, "loss": 1.093, "step": 1601 }, { "epoch": 0.3202318782639115, "grad_norm": 2.078125, "learning_rate": 9.75155653450814e-06, "loss": 1.1182, "step": 1602 }, { "epoch": 0.3204317733190075, "grad_norm": 2.125, "learning_rate": 9.751228379375754e-06, "loss": 1.058, "step": 1603 }, { "epoch": 0.3206316683741036, "grad_norm": 2.046875, "learning_rate": 9.750900013194312e-06, "loss": 1.0517, "step": 1604 }, { "epoch": 0.32083156342919966, "grad_norm": 2.015625, "learning_rate": 9.750571435978399e-06, "loss": 0.9927, "step": 1605 }, { "epoch": 0.32103145848429576, "grad_norm": 2.140625, "learning_rate": 9.750242647742609e-06, "loss": 1.1342, "step": 1606 }, { "epoch": 0.3212313535393918, "grad_norm": 2.234375, "learning_rate": 9.74991364850155e-06, "loss": 1.0087, "step": 1607 }, { "epoch": 0.3214312485944879, "grad_norm": 2.078125, "learning_rate": 9.749584438269833e-06, "loss": 1.1029, "step": 1608 }, { "epoch": 0.32163114364958395, "grad_norm": 2.1875, "learning_rate": 9.749255017062081e-06, "loss": 1.1581, "step": 1609 }, { "epoch": 0.32183103870468005, "grad_norm": 2.109375, "learning_rate": 9.74892538489293e-06, "loss": 1.0755, "step": 1610 }, { "epoch": 0.3220309337597761, "grad_norm": 2.203125, "learning_rate": 9.748595541777021e-06, "loss": 1.1912, "step": 1611 }, { "epoch": 0.3222308288148722, "grad_norm": 2.203125, "learning_rate": 9.748265487729003e-06, "loss": 1.165, "step": 1612 }, { "epoch": 0.3224307238699683, "grad_norm": 2.078125, "learning_rate": 9.747935222763542e-06, "loss": 0.9882, "step": 1613 }, { "epoch": 0.32263061892506434, "grad_norm": 2.09375, "learning_rate": 9.747604746895303e-06, "loss": 1.0114, "step": 1614 }, { "epoch": 0.32283051398016044, "grad_norm": 2.125, "learning_rate": 9.747274060138971e-06, "loss": 1.0804, "step": 1615 }, { "epoch": 0.3230304090352565, "grad_norm": 2.015625, "learning_rate": 9.74694316250923e-06, "loss": 0.9537, "step": 1616 }, { "epoch": 0.3232303040903526, "grad_norm": 2.140625, "learning_rate": 9.74661205402078e-06, "loss": 1.1196, "step": 1617 }, { "epoch": 0.3234301991454486, "grad_norm": 2.09375, "learning_rate": 9.746280734688332e-06, "loss": 1.01, "step": 1618 }, { "epoch": 0.3236300942005447, "grad_norm": 2.03125, "learning_rate": 9.7459492045266e-06, "loss": 1.1002, "step": 1619 }, { "epoch": 0.32382998925564077, "grad_norm": 2.09375, "learning_rate": 9.74561746355031e-06, "loss": 1.0775, "step": 1620 }, { "epoch": 0.32402988431073687, "grad_norm": 2.21875, "learning_rate": 9.7452855117742e-06, "loss": 1.1096, "step": 1621 }, { "epoch": 0.3242297793658329, "grad_norm": 2.15625, "learning_rate": 9.744953349213016e-06, "loss": 1.0339, "step": 1622 }, { "epoch": 0.324429674420929, "grad_norm": 2.125, "learning_rate": 9.74462097588151e-06, "loss": 1.0775, "step": 1623 }, { "epoch": 0.32462956947602506, "grad_norm": 2.046875, "learning_rate": 9.744288391794446e-06, "loss": 1.091, "step": 1624 }, { "epoch": 0.32482946453112116, "grad_norm": 2.09375, "learning_rate": 9.743955596966597e-06, "loss": 1.037, "step": 1625 }, { "epoch": 0.32502935958621726, "grad_norm": 2.125, "learning_rate": 9.743622591412749e-06, "loss": 1.0736, "step": 1626 }, { "epoch": 0.3252292546413133, "grad_norm": 2.046875, "learning_rate": 9.743289375147693e-06, "loss": 1.0584, "step": 1627 }, { "epoch": 0.3254291496964094, "grad_norm": 2.125, "learning_rate": 9.742955948186228e-06, "loss": 1.0357, "step": 1628 }, { "epoch": 0.32562904475150545, "grad_norm": 2.109375, "learning_rate": 9.742622310543165e-06, "loss": 1.0085, "step": 1629 }, { "epoch": 0.32582893980660155, "grad_norm": 2.15625, "learning_rate": 9.742288462233329e-06, "loss": 1.0853, "step": 1630 }, { "epoch": 0.3260288348616976, "grad_norm": 2.078125, "learning_rate": 9.741954403271543e-06, "loss": 1.0205, "step": 1631 }, { "epoch": 0.3262287299167937, "grad_norm": 2.109375, "learning_rate": 9.741620133672651e-06, "loss": 1.0081, "step": 1632 }, { "epoch": 0.32642862497188974, "grad_norm": 2.140625, "learning_rate": 9.741285653451497e-06, "loss": 1.0474, "step": 1633 }, { "epoch": 0.32662852002698584, "grad_norm": 2.1875, "learning_rate": 9.740950962622943e-06, "loss": 1.1232, "step": 1634 }, { "epoch": 0.3268284150820819, "grad_norm": 2.125, "learning_rate": 9.740616061201852e-06, "loss": 1.0578, "step": 1635 }, { "epoch": 0.327028310137178, "grad_norm": 2.1875, "learning_rate": 9.740280949203102e-06, "loss": 1.1024, "step": 1636 }, { "epoch": 0.3272282051922741, "grad_norm": 2.078125, "learning_rate": 9.739945626641579e-06, "loss": 1.1667, "step": 1637 }, { "epoch": 0.3274281002473701, "grad_norm": 2.140625, "learning_rate": 9.739610093532176e-06, "loss": 1.0927, "step": 1638 }, { "epoch": 0.3276279953024662, "grad_norm": 2.125, "learning_rate": 9.739274349889802e-06, "loss": 1.0491, "step": 1639 }, { "epoch": 0.32782789035756227, "grad_norm": 2.15625, "learning_rate": 9.738938395729364e-06, "loss": 0.9977, "step": 1640 }, { "epoch": 0.32802778541265837, "grad_norm": 2.03125, "learning_rate": 9.738602231065793e-06, "loss": 0.9795, "step": 1641 }, { "epoch": 0.3282276804677544, "grad_norm": 2.203125, "learning_rate": 9.738265855914014e-06, "loss": 1.1082, "step": 1642 }, { "epoch": 0.3284275755228505, "grad_norm": 2.265625, "learning_rate": 9.73792927028897e-06, "loss": 1.046, "step": 1643 }, { "epoch": 0.32862747057794656, "grad_norm": 2.03125, "learning_rate": 9.737592474205617e-06, "loss": 1.0181, "step": 1644 }, { "epoch": 0.32882736563304266, "grad_norm": 2.125, "learning_rate": 9.73725546767891e-06, "loss": 1.0592, "step": 1645 }, { "epoch": 0.3290272606881387, "grad_norm": 2.09375, "learning_rate": 9.736918250723823e-06, "loss": 1.0517, "step": 1646 }, { "epoch": 0.3292271557432348, "grad_norm": 2.21875, "learning_rate": 9.736580823355333e-06, "loss": 1.0778, "step": 1647 }, { "epoch": 0.3294270507983309, "grad_norm": 2.109375, "learning_rate": 9.736243185588428e-06, "loss": 1.038, "step": 1648 }, { "epoch": 0.32962694585342694, "grad_norm": 2.078125, "learning_rate": 9.735905337438107e-06, "loss": 1.0171, "step": 1649 }, { "epoch": 0.32982684090852304, "grad_norm": 2.1875, "learning_rate": 9.735567278919376e-06, "loss": 1.1386, "step": 1650 }, { "epoch": 0.3300267359636191, "grad_norm": 2.15625, "learning_rate": 9.735229010047253e-06, "loss": 1.0989, "step": 1651 }, { "epoch": 0.3302266310187152, "grad_norm": 2.03125, "learning_rate": 9.734890530836763e-06, "loss": 1.0239, "step": 1652 }, { "epoch": 0.33042652607381123, "grad_norm": 2.0625, "learning_rate": 9.734551841302941e-06, "loss": 1.0697, "step": 1653 }, { "epoch": 0.33062642112890733, "grad_norm": 2.15625, "learning_rate": 9.734212941460833e-06, "loss": 1.1375, "step": 1654 }, { "epoch": 0.3308263161840034, "grad_norm": 2.15625, "learning_rate": 9.73387383132549e-06, "loss": 0.9819, "step": 1655 }, { "epoch": 0.3310262112390995, "grad_norm": 1.9375, "learning_rate": 9.733534510911977e-06, "loss": 0.9949, "step": 1656 }, { "epoch": 0.3312261062941955, "grad_norm": 2.03125, "learning_rate": 9.733194980235367e-06, "loss": 1.0075, "step": 1657 }, { "epoch": 0.3314260013492916, "grad_norm": 2.390625, "learning_rate": 9.732855239310743e-06, "loss": 1.1145, "step": 1658 }, { "epoch": 0.3316258964043877, "grad_norm": 2.25, "learning_rate": 9.732515288153193e-06, "loss": 1.0824, "step": 1659 }, { "epoch": 0.33182579145948377, "grad_norm": 2.265625, "learning_rate": 9.732175126777821e-06, "loss": 1.1481, "step": 1660 }, { "epoch": 0.33202568651457987, "grad_norm": 2.03125, "learning_rate": 9.731834755199734e-06, "loss": 0.9968, "step": 1661 }, { "epoch": 0.3322255815696759, "grad_norm": 2.1875, "learning_rate": 9.731494173434053e-06, "loss": 1.053, "step": 1662 }, { "epoch": 0.332425476624772, "grad_norm": 1.9453125, "learning_rate": 9.731153381495905e-06, "loss": 1.0219, "step": 1663 }, { "epoch": 0.33262537167986805, "grad_norm": 2.09375, "learning_rate": 9.730812379400432e-06, "loss": 1.0924, "step": 1664 }, { "epoch": 0.33282526673496415, "grad_norm": 2.203125, "learning_rate": 9.730471167162776e-06, "loss": 1.0849, "step": 1665 }, { "epoch": 0.3330251617900602, "grad_norm": 2.125, "learning_rate": 9.730129744798096e-06, "loss": 1.2105, "step": 1666 }, { "epoch": 0.3332250568451563, "grad_norm": 2.046875, "learning_rate": 9.729788112321558e-06, "loss": 1.0258, "step": 1667 }, { "epoch": 0.33342495190025234, "grad_norm": 2.109375, "learning_rate": 9.729446269748338e-06, "loss": 1.1197, "step": 1668 }, { "epoch": 0.33362484695534844, "grad_norm": 2.09375, "learning_rate": 9.729104217093618e-06, "loss": 1.0676, "step": 1669 }, { "epoch": 0.33382474201044454, "grad_norm": 2.09375, "learning_rate": 9.728761954372597e-06, "loss": 1.0351, "step": 1670 }, { "epoch": 0.3340246370655406, "grad_norm": 2.25, "learning_rate": 9.728419481600472e-06, "loss": 1.1705, "step": 1671 }, { "epoch": 0.3342245321206367, "grad_norm": 2.09375, "learning_rate": 9.728076798792461e-06, "loss": 1.1647, "step": 1672 }, { "epoch": 0.33442442717573273, "grad_norm": 2.078125, "learning_rate": 9.727733905963783e-06, "loss": 1.006, "step": 1673 }, { "epoch": 0.33462432223082883, "grad_norm": 2.109375, "learning_rate": 9.727390803129668e-06, "loss": 1.0953, "step": 1674 }, { "epoch": 0.3348242172859249, "grad_norm": 2.171875, "learning_rate": 9.72704749030536e-06, "loss": 1.0583, "step": 1675 }, { "epoch": 0.335024112341021, "grad_norm": 2.1875, "learning_rate": 9.726703967506107e-06, "loss": 1.0887, "step": 1676 }, { "epoch": 0.335224007396117, "grad_norm": 2.125, "learning_rate": 9.726360234747168e-06, "loss": 0.9653, "step": 1677 }, { "epoch": 0.3354239024512131, "grad_norm": 2.125, "learning_rate": 9.726016292043814e-06, "loss": 0.9459, "step": 1678 }, { "epoch": 0.33562379750630916, "grad_norm": 2.140625, "learning_rate": 9.725672139411319e-06, "loss": 1.0816, "step": 1679 }, { "epoch": 0.33582369256140526, "grad_norm": 2.109375, "learning_rate": 9.725327776864974e-06, "loss": 1.0573, "step": 1680 }, { "epoch": 0.33602358761650136, "grad_norm": 2.25, "learning_rate": 9.724983204420073e-06, "loss": 1.0713, "step": 1681 }, { "epoch": 0.3362234826715974, "grad_norm": 2.140625, "learning_rate": 9.724638422091922e-06, "loss": 1.0342, "step": 1682 }, { "epoch": 0.3364233777266935, "grad_norm": 2.109375, "learning_rate": 9.724293429895836e-06, "loss": 1.111, "step": 1683 }, { "epoch": 0.33662327278178955, "grad_norm": 2.15625, "learning_rate": 9.723948227847145e-06, "loss": 0.9647, "step": 1684 }, { "epoch": 0.33682316783688565, "grad_norm": 2.28125, "learning_rate": 9.723602815961173e-06, "loss": 1.0749, "step": 1685 }, { "epoch": 0.3370230628919817, "grad_norm": 2.125, "learning_rate": 9.723257194253272e-06, "loss": 1.0911, "step": 1686 }, { "epoch": 0.3372229579470778, "grad_norm": 2.09375, "learning_rate": 9.72291136273879e-06, "loss": 1.0592, "step": 1687 }, { "epoch": 0.33742285300217384, "grad_norm": 2.03125, "learning_rate": 9.722565321433088e-06, "loss": 1.043, "step": 1688 }, { "epoch": 0.33762274805726994, "grad_norm": 2.03125, "learning_rate": 9.722219070351541e-06, "loss": 1.1058, "step": 1689 }, { "epoch": 0.337822643112366, "grad_norm": 2.15625, "learning_rate": 9.721872609509526e-06, "loss": 1.1402, "step": 1690 }, { "epoch": 0.3380225381674621, "grad_norm": 2.140625, "learning_rate": 9.721525938922434e-06, "loss": 1.1047, "step": 1691 }, { "epoch": 0.3382224332225582, "grad_norm": 2.09375, "learning_rate": 9.721179058605664e-06, "loss": 1.0193, "step": 1692 }, { "epoch": 0.3384223282776542, "grad_norm": 2.140625, "learning_rate": 9.720831968574625e-06, "loss": 1.0501, "step": 1693 }, { "epoch": 0.3386222233327503, "grad_norm": 2.28125, "learning_rate": 9.720484668844734e-06, "loss": 1.0406, "step": 1694 }, { "epoch": 0.33882211838784637, "grad_norm": 2.0625, "learning_rate": 9.720137159431418e-06, "loss": 1.0744, "step": 1695 }, { "epoch": 0.33902201344294247, "grad_norm": 2.125, "learning_rate": 9.719789440350113e-06, "loss": 1.0918, "step": 1696 }, { "epoch": 0.3392219084980385, "grad_norm": 2.15625, "learning_rate": 9.719441511616266e-06, "loss": 1.0955, "step": 1697 }, { "epoch": 0.3394218035531346, "grad_norm": 2.0625, "learning_rate": 9.719093373245331e-06, "loss": 1.0195, "step": 1698 }, { "epoch": 0.33962169860823066, "grad_norm": 2.171875, "learning_rate": 9.718745025252773e-06, "loss": 1.0618, "step": 1699 }, { "epoch": 0.33982159366332676, "grad_norm": 2.140625, "learning_rate": 9.718396467654064e-06, "loss": 1.1286, "step": 1700 }, { "epoch": 0.3400214887184228, "grad_norm": 2.03125, "learning_rate": 9.718047700464688e-06, "loss": 1.0813, "step": 1701 }, { "epoch": 0.3402213837735189, "grad_norm": 2.125, "learning_rate": 9.717698723700137e-06, "loss": 1.0753, "step": 1702 }, { "epoch": 0.340421278828615, "grad_norm": 2.21875, "learning_rate": 9.717349537375913e-06, "loss": 1.1358, "step": 1703 }, { "epoch": 0.34062117388371105, "grad_norm": 2.171875, "learning_rate": 9.717000141507527e-06, "loss": 0.9364, "step": 1704 }, { "epoch": 0.34082106893880715, "grad_norm": 2.078125, "learning_rate": 9.716650536110496e-06, "loss": 1.0648, "step": 1705 }, { "epoch": 0.3410209639939032, "grad_norm": 2.140625, "learning_rate": 9.716300721200354e-06, "loss": 1.0959, "step": 1706 }, { "epoch": 0.3412208590489993, "grad_norm": 2.1875, "learning_rate": 9.715950696792638e-06, "loss": 1.009, "step": 1707 }, { "epoch": 0.34142075410409534, "grad_norm": 2.390625, "learning_rate": 9.715600462902895e-06, "loss": 1.051, "step": 1708 }, { "epoch": 0.34162064915919144, "grad_norm": 2.109375, "learning_rate": 9.715250019546683e-06, "loss": 0.999, "step": 1709 }, { "epoch": 0.3418205442142875, "grad_norm": 1.9921875, "learning_rate": 9.714899366739569e-06, "loss": 0.9646, "step": 1710 }, { "epoch": 0.3420204392693836, "grad_norm": 2.0625, "learning_rate": 9.714548504497128e-06, "loss": 1.0364, "step": 1711 }, { "epoch": 0.3422203343244796, "grad_norm": 2.09375, "learning_rate": 9.714197432834947e-06, "loss": 1.0647, "step": 1712 }, { "epoch": 0.3424202293795757, "grad_norm": 2.0625, "learning_rate": 9.713846151768618e-06, "loss": 1.12, "step": 1713 }, { "epoch": 0.3426201244346718, "grad_norm": 1.9921875, "learning_rate": 9.713494661313746e-06, "loss": 1.033, "step": 1714 }, { "epoch": 0.34282001948976787, "grad_norm": 2.109375, "learning_rate": 9.713142961485947e-06, "loss": 1.0881, "step": 1715 }, { "epoch": 0.34301991454486397, "grad_norm": 2.21875, "learning_rate": 9.71279105230084e-06, "loss": 1.1303, "step": 1716 }, { "epoch": 0.34321980959996, "grad_norm": 2.109375, "learning_rate": 9.712438933774057e-06, "loss": 1.1371, "step": 1717 }, { "epoch": 0.3434197046550561, "grad_norm": 2.0625, "learning_rate": 9.71208660592124e-06, "loss": 0.968, "step": 1718 }, { "epoch": 0.34361959971015216, "grad_norm": 2.1875, "learning_rate": 9.71173406875804e-06, "loss": 1.0274, "step": 1719 }, { "epoch": 0.34381949476524826, "grad_norm": 2.25, "learning_rate": 9.711381322300117e-06, "loss": 0.9893, "step": 1720 }, { "epoch": 0.3440193898203443, "grad_norm": 2.296875, "learning_rate": 9.711028366563138e-06, "loss": 1.171, "step": 1721 }, { "epoch": 0.3442192848754404, "grad_norm": 2.140625, "learning_rate": 9.71067520156278e-06, "loss": 1.1283, "step": 1722 }, { "epoch": 0.34441917993053645, "grad_norm": 2.078125, "learning_rate": 9.710321827314735e-06, "loss": 1.0001, "step": 1723 }, { "epoch": 0.34461907498563255, "grad_norm": 2.40625, "learning_rate": 9.709968243834698e-06, "loss": 1.0845, "step": 1724 }, { "epoch": 0.34481897004072865, "grad_norm": 2.125, "learning_rate": 9.709614451138373e-06, "loss": 1.0823, "step": 1725 }, { "epoch": 0.3450188650958247, "grad_norm": 2.015625, "learning_rate": 9.709260449241478e-06, "loss": 1.058, "step": 1726 }, { "epoch": 0.3452187601509208, "grad_norm": 2.15625, "learning_rate": 9.708906238159736e-06, "loss": 1.1112, "step": 1727 }, { "epoch": 0.34541865520601683, "grad_norm": 2.140625, "learning_rate": 9.708551817908883e-06, "loss": 1.1158, "step": 1728 }, { "epoch": 0.34561855026111293, "grad_norm": 2.21875, "learning_rate": 9.70819718850466e-06, "loss": 1.0908, "step": 1729 }, { "epoch": 0.345818445316209, "grad_norm": 2.3125, "learning_rate": 9.707842349962821e-06, "loss": 1.1913, "step": 1730 }, { "epoch": 0.3460183403713051, "grad_norm": 2.09375, "learning_rate": 9.707487302299128e-06, "loss": 1.0317, "step": 1731 }, { "epoch": 0.3462182354264011, "grad_norm": 2.203125, "learning_rate": 9.707132045529352e-06, "loss": 1.1217, "step": 1732 }, { "epoch": 0.3464181304814972, "grad_norm": 2.015625, "learning_rate": 9.706776579669272e-06, "loss": 1.0175, "step": 1733 }, { "epoch": 0.34661802553659327, "grad_norm": 2.140625, "learning_rate": 9.70642090473468e-06, "loss": 1.1629, "step": 1734 }, { "epoch": 0.34681792059168937, "grad_norm": 2.09375, "learning_rate": 9.706065020741375e-06, "loss": 1.0368, "step": 1735 }, { "epoch": 0.3470178156467854, "grad_norm": 2.078125, "learning_rate": 9.705708927705163e-06, "loss": 1.0294, "step": 1736 }, { "epoch": 0.3472177107018815, "grad_norm": 2.140625, "learning_rate": 9.705352625641863e-06, "loss": 1.0309, "step": 1737 }, { "epoch": 0.3474176057569776, "grad_norm": 2.171875, "learning_rate": 9.704996114567302e-06, "loss": 1.079, "step": 1738 }, { "epoch": 0.34761750081207365, "grad_norm": 2.03125, "learning_rate": 9.704639394497317e-06, "loss": 1.0315, "step": 1739 }, { "epoch": 0.34781739586716975, "grad_norm": 2.390625, "learning_rate": 9.704282465447752e-06, "loss": 1.1318, "step": 1740 }, { "epoch": 0.3480172909222658, "grad_norm": 2.140625, "learning_rate": 9.703925327434462e-06, "loss": 1.0676, "step": 1741 }, { "epoch": 0.3482171859773619, "grad_norm": 2.140625, "learning_rate": 9.703567980473311e-06, "loss": 1.0814, "step": 1742 }, { "epoch": 0.34841708103245794, "grad_norm": 2.171875, "learning_rate": 9.703210424580174e-06, "loss": 1.0813, "step": 1743 }, { "epoch": 0.34861697608755404, "grad_norm": 2.203125, "learning_rate": 9.702852659770932e-06, "loss": 1.1354, "step": 1744 }, { "epoch": 0.3488168711426501, "grad_norm": 2.125, "learning_rate": 9.702494686061477e-06, "loss": 0.9704, "step": 1745 }, { "epoch": 0.3490167661977462, "grad_norm": 2.125, "learning_rate": 9.70213650346771e-06, "loss": 1.0792, "step": 1746 }, { "epoch": 0.34921666125284223, "grad_norm": 2.1875, "learning_rate": 9.70177811200554e-06, "loss": 1.0346, "step": 1747 }, { "epoch": 0.34941655630793833, "grad_norm": 2.203125, "learning_rate": 9.701419511690891e-06, "loss": 1.1517, "step": 1748 }, { "epoch": 0.34961645136303443, "grad_norm": 2.09375, "learning_rate": 9.701060702539689e-06, "loss": 1.1122, "step": 1749 }, { "epoch": 0.3498163464181305, "grad_norm": 2.296875, "learning_rate": 9.700701684567872e-06, "loss": 0.995, "step": 1750 }, { "epoch": 0.3500162414732266, "grad_norm": 2.03125, "learning_rate": 9.700342457791387e-06, "loss": 1.0151, "step": 1751 }, { "epoch": 0.3502161365283226, "grad_norm": 2.15625, "learning_rate": 9.699983022226194e-06, "loss": 1.1168, "step": 1752 }, { "epoch": 0.3504160315834187, "grad_norm": 2.046875, "learning_rate": 9.699623377888256e-06, "loss": 1.0673, "step": 1753 }, { "epoch": 0.35061592663851476, "grad_norm": 1.984375, "learning_rate": 9.699263524793551e-06, "loss": 1.0326, "step": 1754 }, { "epoch": 0.35081582169361086, "grad_norm": 2.15625, "learning_rate": 9.69890346295806e-06, "loss": 1.1526, "step": 1755 }, { "epoch": 0.3510157167487069, "grad_norm": 2.15625, "learning_rate": 9.69854319239778e-06, "loss": 1.101, "step": 1756 }, { "epoch": 0.351215611803803, "grad_norm": 2.171875, "learning_rate": 9.698182713128714e-06, "loss": 1.1748, "step": 1757 }, { "epoch": 0.35141550685889905, "grad_norm": 2.234375, "learning_rate": 9.697822025166874e-06, "loss": 1.1522, "step": 1758 }, { "epoch": 0.35161540191399515, "grad_norm": 2.25, "learning_rate": 9.697461128528281e-06, "loss": 1.0798, "step": 1759 }, { "epoch": 0.35181529696909125, "grad_norm": 2.140625, "learning_rate": 9.697100023228966e-06, "loss": 1.0741, "step": 1760 }, { "epoch": 0.3520151920241873, "grad_norm": 2.03125, "learning_rate": 9.69673870928497e-06, "loss": 1.0234, "step": 1761 }, { "epoch": 0.3522150870792834, "grad_norm": 2.140625, "learning_rate": 9.69637718671234e-06, "loss": 1.1072, "step": 1762 }, { "epoch": 0.35241498213437944, "grad_norm": 2.140625, "learning_rate": 9.696015455527139e-06, "loss": 1.0557, "step": 1763 }, { "epoch": 0.35261487718947554, "grad_norm": 2.203125, "learning_rate": 9.695653515745433e-06, "loss": 1.0192, "step": 1764 }, { "epoch": 0.3528147722445716, "grad_norm": 2.1875, "learning_rate": 9.6952913673833e-06, "loss": 1.0337, "step": 1765 }, { "epoch": 0.3530146672996677, "grad_norm": 2.0625, "learning_rate": 9.694929010456824e-06, "loss": 1.0215, "step": 1766 }, { "epoch": 0.35321456235476373, "grad_norm": 2.140625, "learning_rate": 9.694566444982104e-06, "loss": 1.1584, "step": 1767 }, { "epoch": 0.35341445740985983, "grad_norm": 2.03125, "learning_rate": 9.694203670975244e-06, "loss": 1.0854, "step": 1768 }, { "epoch": 0.3536143524649559, "grad_norm": 2.15625, "learning_rate": 9.693840688452358e-06, "loss": 1.1391, "step": 1769 }, { "epoch": 0.35381424752005197, "grad_norm": 2.25, "learning_rate": 9.69347749742957e-06, "loss": 0.9615, "step": 1770 }, { "epoch": 0.35401414257514807, "grad_norm": 2.265625, "learning_rate": 9.693114097923013e-06, "loss": 1.0806, "step": 1771 }, { "epoch": 0.3542140376302441, "grad_norm": 2.046875, "learning_rate": 9.692750489948829e-06, "loss": 1.1137, "step": 1772 }, { "epoch": 0.3544139326853402, "grad_norm": 2.09375, "learning_rate": 9.692386673523168e-06, "loss": 1.004, "step": 1773 }, { "epoch": 0.35461382774043626, "grad_norm": 2.203125, "learning_rate": 9.692022648662193e-06, "loss": 1.0561, "step": 1774 }, { "epoch": 0.35481372279553236, "grad_norm": 2.171875, "learning_rate": 9.691658415382073e-06, "loss": 1.003, "step": 1775 }, { "epoch": 0.3550136178506284, "grad_norm": 2.015625, "learning_rate": 9.691293973698988e-06, "loss": 1.0291, "step": 1776 }, { "epoch": 0.3552135129057245, "grad_norm": 2.046875, "learning_rate": 9.690929323629126e-06, "loss": 1.0943, "step": 1777 }, { "epoch": 0.35541340796082055, "grad_norm": 2.125, "learning_rate": 9.690564465188684e-06, "loss": 1.1177, "step": 1778 }, { "epoch": 0.35561330301591665, "grad_norm": 2.09375, "learning_rate": 9.69019939839387e-06, "loss": 1.0692, "step": 1779 }, { "epoch": 0.3558131980710127, "grad_norm": 2.09375, "learning_rate": 9.689834123260896e-06, "loss": 1.1017, "step": 1780 }, { "epoch": 0.3560130931261088, "grad_norm": 2.25, "learning_rate": 9.689468639805996e-06, "loss": 1.0774, "step": 1781 }, { "epoch": 0.3562129881812049, "grad_norm": 2.03125, "learning_rate": 9.689102948045398e-06, "loss": 1.0658, "step": 1782 }, { "epoch": 0.35641288323630094, "grad_norm": 2.078125, "learning_rate": 9.688737047995349e-06, "loss": 1.0039, "step": 1783 }, { "epoch": 0.35661277829139704, "grad_norm": 2.078125, "learning_rate": 9.688370939672099e-06, "loss": 1.0924, "step": 1784 }, { "epoch": 0.3568126733464931, "grad_norm": 2.15625, "learning_rate": 9.688004623091914e-06, "loss": 1.1315, "step": 1785 }, { "epoch": 0.3570125684015892, "grad_norm": 2.25, "learning_rate": 9.687638098271064e-06, "loss": 1.1243, "step": 1786 }, { "epoch": 0.3572124634566852, "grad_norm": 2.140625, "learning_rate": 9.68727136522583e-06, "loss": 1.0848, "step": 1787 }, { "epoch": 0.3574123585117813, "grad_norm": 2.203125, "learning_rate": 9.686904423972502e-06, "loss": 1.0546, "step": 1788 }, { "epoch": 0.35761225356687737, "grad_norm": 2.140625, "learning_rate": 9.68653727452738e-06, "loss": 1.0223, "step": 1789 }, { "epoch": 0.35781214862197347, "grad_norm": 2.046875, "learning_rate": 9.686169916906773e-06, "loss": 1.0758, "step": 1790 }, { "epoch": 0.3580120436770695, "grad_norm": 2.078125, "learning_rate": 9.685802351127e-06, "loss": 1.0396, "step": 1791 }, { "epoch": 0.3582119387321656, "grad_norm": 2.21875, "learning_rate": 9.685434577204387e-06, "loss": 1.0373, "step": 1792 }, { "epoch": 0.3584118337872617, "grad_norm": 2.03125, "learning_rate": 9.68506659515527e-06, "loss": 1.0745, "step": 1793 }, { "epoch": 0.35861172884235776, "grad_norm": 2.25, "learning_rate": 9.684698404995995e-06, "loss": 1.0851, "step": 1794 }, { "epoch": 0.35881162389745386, "grad_norm": 2.171875, "learning_rate": 9.684330006742916e-06, "loss": 1.0869, "step": 1795 }, { "epoch": 0.3590115189525499, "grad_norm": 2.328125, "learning_rate": 9.6839614004124e-06, "loss": 1.1189, "step": 1796 }, { "epoch": 0.359211414007646, "grad_norm": 2.09375, "learning_rate": 9.68359258602082e-06, "loss": 1.0061, "step": 1797 }, { "epoch": 0.35941130906274205, "grad_norm": 2.3125, "learning_rate": 9.683223563584556e-06, "loss": 1.0797, "step": 1798 }, { "epoch": 0.35961120411783815, "grad_norm": 2.046875, "learning_rate": 9.682854333120002e-06, "loss": 1.0724, "step": 1799 }, { "epoch": 0.3598110991729342, "grad_norm": 2.046875, "learning_rate": 9.682484894643558e-06, "loss": 1.0145, "step": 1800 }, { "epoch": 0.3600109942280303, "grad_norm": 2.359375, "learning_rate": 9.682115248171635e-06, "loss": 1.1822, "step": 1801 }, { "epoch": 0.36021088928312633, "grad_norm": 2.203125, "learning_rate": 9.681745393720654e-06, "loss": 1.1737, "step": 1802 }, { "epoch": 0.36041078433822243, "grad_norm": 2.203125, "learning_rate": 9.68137533130704e-06, "loss": 1.1308, "step": 1803 }, { "epoch": 0.36061067939331853, "grad_norm": 2.140625, "learning_rate": 9.681005060947237e-06, "loss": 0.9999, "step": 1804 }, { "epoch": 0.3608105744484146, "grad_norm": 1.984375, "learning_rate": 9.680634582657688e-06, "loss": 1.0311, "step": 1805 }, { "epoch": 0.3610104695035107, "grad_norm": 2.234375, "learning_rate": 9.68026389645485e-06, "loss": 1.0794, "step": 1806 }, { "epoch": 0.3612103645586067, "grad_norm": 2.203125, "learning_rate": 9.67989300235519e-06, "loss": 1.0, "step": 1807 }, { "epoch": 0.3614102596137028, "grad_norm": 2.0625, "learning_rate": 9.679521900375183e-06, "loss": 1.0749, "step": 1808 }, { "epoch": 0.36161015466879887, "grad_norm": 2.109375, "learning_rate": 9.679150590531312e-06, "loss": 1.1228, "step": 1809 }, { "epoch": 0.36181004972389497, "grad_norm": 2.125, "learning_rate": 9.678779072840072e-06, "loss": 1.0755, "step": 1810 }, { "epoch": 0.362009944778991, "grad_norm": 2.234375, "learning_rate": 9.678407347317967e-06, "loss": 1.1757, "step": 1811 }, { "epoch": 0.3622098398340871, "grad_norm": 2.09375, "learning_rate": 9.678035413981505e-06, "loss": 0.97, "step": 1812 }, { "epoch": 0.36240973488918315, "grad_norm": 2.21875, "learning_rate": 9.677663272847211e-06, "loss": 0.9848, "step": 1813 }, { "epoch": 0.36260962994427925, "grad_norm": 2.125, "learning_rate": 9.677290923931613e-06, "loss": 1.1265, "step": 1814 }, { "epoch": 0.36280952499937535, "grad_norm": 2.21875, "learning_rate": 9.676918367251251e-06, "loss": 1.0995, "step": 1815 }, { "epoch": 0.3630094200544714, "grad_norm": 2.0625, "learning_rate": 9.676545602822675e-06, "loss": 1.0715, "step": 1816 }, { "epoch": 0.3632093151095675, "grad_norm": 2.125, "learning_rate": 9.676172630662446e-06, "loss": 1.0607, "step": 1817 }, { "epoch": 0.36340921016466354, "grad_norm": 2.046875, "learning_rate": 9.675799450787125e-06, "loss": 1.0595, "step": 1818 }, { "epoch": 0.36360910521975964, "grad_norm": 2.140625, "learning_rate": 9.675426063213291e-06, "loss": 1.0687, "step": 1819 }, { "epoch": 0.3638090002748557, "grad_norm": 2.109375, "learning_rate": 9.675052467957533e-06, "loss": 1.0903, "step": 1820 }, { "epoch": 0.3640088953299518, "grad_norm": 2.171875, "learning_rate": 9.674678665036443e-06, "loss": 1.1392, "step": 1821 }, { "epoch": 0.36420879038504783, "grad_norm": 2.40625, "learning_rate": 9.674304654466625e-06, "loss": 1.0846, "step": 1822 }, { "epoch": 0.36440868544014393, "grad_norm": 2.296875, "learning_rate": 9.673930436264695e-06, "loss": 1.1428, "step": 1823 }, { "epoch": 0.36460858049524, "grad_norm": 2.265625, "learning_rate": 9.673556010447274e-06, "loss": 1.0965, "step": 1824 }, { "epoch": 0.3648084755503361, "grad_norm": 2.046875, "learning_rate": 9.673181377030993e-06, "loss": 1.0701, "step": 1825 }, { "epoch": 0.3650083706054322, "grad_norm": 2.03125, "learning_rate": 9.672806536032495e-06, "loss": 1.0332, "step": 1826 }, { "epoch": 0.3652082656605282, "grad_norm": 1.9765625, "learning_rate": 9.67243148746843e-06, "loss": 1.0372, "step": 1827 }, { "epoch": 0.3654081607156243, "grad_norm": 2.234375, "learning_rate": 9.672056231355455e-06, "loss": 1.1678, "step": 1828 }, { "epoch": 0.36560805577072036, "grad_norm": 2.078125, "learning_rate": 9.671680767710244e-06, "loss": 1.0088, "step": 1829 }, { "epoch": 0.36580795082581646, "grad_norm": 2.21875, "learning_rate": 9.671305096549473e-06, "loss": 1.1343, "step": 1830 }, { "epoch": 0.3660078458809125, "grad_norm": 2.234375, "learning_rate": 9.670929217889825e-06, "loss": 0.9761, "step": 1831 }, { "epoch": 0.3662077409360086, "grad_norm": 2.125, "learning_rate": 9.670553131748003e-06, "loss": 1.0866, "step": 1832 }, { "epoch": 0.36640763599110465, "grad_norm": 2.25, "learning_rate": 9.670176838140708e-06, "loss": 1.1274, "step": 1833 }, { "epoch": 0.36660753104620075, "grad_norm": 2.09375, "learning_rate": 9.669800337084658e-06, "loss": 1.1767, "step": 1834 }, { "epoch": 0.3668074261012968, "grad_norm": 2.25, "learning_rate": 9.669423628596574e-06, "loss": 1.0194, "step": 1835 }, { "epoch": 0.3670073211563929, "grad_norm": 2.0625, "learning_rate": 9.669046712693192e-06, "loss": 0.9397, "step": 1836 }, { "epoch": 0.367207216211489, "grad_norm": 2.1875, "learning_rate": 9.668669589391256e-06, "loss": 1.0062, "step": 1837 }, { "epoch": 0.36740711126658504, "grad_norm": 2.078125, "learning_rate": 9.668292258707513e-06, "loss": 1.0522, "step": 1838 }, { "epoch": 0.36760700632168114, "grad_norm": 2.15625, "learning_rate": 9.667914720658726e-06, "loss": 1.0841, "step": 1839 }, { "epoch": 0.3678069013767772, "grad_norm": 2.078125, "learning_rate": 9.667536975261667e-06, "loss": 1.1204, "step": 1840 }, { "epoch": 0.3680067964318733, "grad_norm": 2.265625, "learning_rate": 9.667159022533115e-06, "loss": 1.0638, "step": 1841 }, { "epoch": 0.36820669148696933, "grad_norm": 2.234375, "learning_rate": 9.666780862489856e-06, "loss": 1.035, "step": 1842 }, { "epoch": 0.36840658654206543, "grad_norm": 2.125, "learning_rate": 9.66640249514869e-06, "loss": 1.2266, "step": 1843 }, { "epoch": 0.3686064815971615, "grad_norm": 2.171875, "learning_rate": 9.666023920526423e-06, "loss": 1.0598, "step": 1844 }, { "epoch": 0.3688063766522576, "grad_norm": 2.125, "learning_rate": 9.665645138639872e-06, "loss": 1.0532, "step": 1845 }, { "epoch": 0.3690062717073536, "grad_norm": 2.1875, "learning_rate": 9.665266149505863e-06, "loss": 1.0488, "step": 1846 }, { "epoch": 0.3692061667624497, "grad_norm": 2.078125, "learning_rate": 9.664886953141228e-06, "loss": 1.0693, "step": 1847 }, { "epoch": 0.36940606181754576, "grad_norm": 2.125, "learning_rate": 9.664507549562814e-06, "loss": 1.016, "step": 1848 }, { "epoch": 0.36960595687264186, "grad_norm": 2.140625, "learning_rate": 9.664127938787473e-06, "loss": 1.0602, "step": 1849 }, { "epoch": 0.36980585192773796, "grad_norm": 2.265625, "learning_rate": 9.663748120832069e-06, "loss": 1.1527, "step": 1850 }, { "epoch": 0.370005746982834, "grad_norm": 2.125, "learning_rate": 9.663368095713468e-06, "loss": 0.9813, "step": 1851 }, { "epoch": 0.3702056420379301, "grad_norm": 2.09375, "learning_rate": 9.662987863448556e-06, "loss": 1.065, "step": 1852 }, { "epoch": 0.37040553709302615, "grad_norm": 2.296875, "learning_rate": 9.662607424054221e-06, "loss": 1.0928, "step": 1853 }, { "epoch": 0.37060543214812225, "grad_norm": 2.125, "learning_rate": 9.662226777547363e-06, "loss": 1.0619, "step": 1854 }, { "epoch": 0.3708053272032183, "grad_norm": 2.125, "learning_rate": 9.661845923944888e-06, "loss": 1.1729, "step": 1855 }, { "epoch": 0.3710052222583144, "grad_norm": 2.15625, "learning_rate": 9.661464863263717e-06, "loss": 1.0977, "step": 1856 }, { "epoch": 0.37120511731341044, "grad_norm": 2.25, "learning_rate": 9.661083595520772e-06, "loss": 1.0552, "step": 1857 }, { "epoch": 0.37140501236850654, "grad_norm": 2.15625, "learning_rate": 9.660702120732992e-06, "loss": 1.1044, "step": 1858 }, { "epoch": 0.3716049074236026, "grad_norm": 2.09375, "learning_rate": 9.660320438917323e-06, "loss": 1.0601, "step": 1859 }, { "epoch": 0.3718048024786987, "grad_norm": 2.09375, "learning_rate": 9.659938550090717e-06, "loss": 1.0558, "step": 1860 }, { "epoch": 0.3720046975337948, "grad_norm": 2.140625, "learning_rate": 9.659556454270139e-06, "loss": 1.0663, "step": 1861 }, { "epoch": 0.3722045925888908, "grad_norm": 2.0625, "learning_rate": 9.659174151472562e-06, "loss": 1.0182, "step": 1862 }, { "epoch": 0.3724044876439869, "grad_norm": 2.015625, "learning_rate": 9.658791641714965e-06, "loss": 1.0283, "step": 1863 }, { "epoch": 0.37260438269908297, "grad_norm": 2.125, "learning_rate": 9.65840892501434e-06, "loss": 1.0972, "step": 1864 }, { "epoch": 0.37280427775417907, "grad_norm": 2.171875, "learning_rate": 9.65802600138769e-06, "loss": 1.0751, "step": 1865 }, { "epoch": 0.3730041728092751, "grad_norm": 2.046875, "learning_rate": 9.65764287085202e-06, "loss": 1.076, "step": 1866 }, { "epoch": 0.3732040678643712, "grad_norm": 2.0625, "learning_rate": 9.657259533424355e-06, "loss": 1.0158, "step": 1867 }, { "epoch": 0.37340396291946726, "grad_norm": 2.125, "learning_rate": 9.656875989121715e-06, "loss": 1.0534, "step": 1868 }, { "epoch": 0.37360385797456336, "grad_norm": 1.9609375, "learning_rate": 9.656492237961143e-06, "loss": 1.062, "step": 1869 }, { "epoch": 0.3738037530296594, "grad_norm": 2.140625, "learning_rate": 9.656108279959684e-06, "loss": 1.111, "step": 1870 }, { "epoch": 0.3740036480847555, "grad_norm": 2.1875, "learning_rate": 9.65572411513439e-06, "loss": 1.1052, "step": 1871 }, { "epoch": 0.3742035431398516, "grad_norm": 2.390625, "learning_rate": 9.65533974350233e-06, "loss": 1.0701, "step": 1872 }, { "epoch": 0.37440343819494765, "grad_norm": 2.09375, "learning_rate": 9.654955165080575e-06, "loss": 1.0559, "step": 1873 }, { "epoch": 0.37460333325004375, "grad_norm": 1.890625, "learning_rate": 9.65457037988621e-06, "loss": 0.9771, "step": 1874 }, { "epoch": 0.3748032283051398, "grad_norm": 2.28125, "learning_rate": 9.654185387936323e-06, "loss": 1.0589, "step": 1875 }, { "epoch": 0.3750031233602359, "grad_norm": 2.171875, "learning_rate": 9.653800189248022e-06, "loss": 1.0885, "step": 1876 }, { "epoch": 0.37520301841533193, "grad_norm": 2.03125, "learning_rate": 9.65341478383841e-06, "loss": 0.9952, "step": 1877 }, { "epoch": 0.37540291347042803, "grad_norm": 2.171875, "learning_rate": 9.653029171724612e-06, "loss": 1.0742, "step": 1878 }, { "epoch": 0.3756028085255241, "grad_norm": 2.046875, "learning_rate": 9.652643352923756e-06, "loss": 1.0288, "step": 1879 }, { "epoch": 0.3758027035806202, "grad_norm": 2.078125, "learning_rate": 9.652257327452978e-06, "loss": 1.1885, "step": 1880 }, { "epoch": 0.3760025986357162, "grad_norm": 2.109375, "learning_rate": 9.651871095329425e-06, "loss": 1.1295, "step": 1881 }, { "epoch": 0.3762024936908123, "grad_norm": 2.203125, "learning_rate": 9.651484656570257e-06, "loss": 1.0479, "step": 1882 }, { "epoch": 0.3764023887459084, "grad_norm": 2.09375, "learning_rate": 9.651098011192638e-06, "loss": 1.0396, "step": 1883 }, { "epoch": 0.37660228380100447, "grad_norm": 2.21875, "learning_rate": 9.65071115921374e-06, "loss": 1.1473, "step": 1884 }, { "epoch": 0.37680217885610057, "grad_norm": 2.109375, "learning_rate": 9.65032410065075e-06, "loss": 1.0292, "step": 1885 }, { "epoch": 0.3770020739111966, "grad_norm": 2.109375, "learning_rate": 9.64993683552086e-06, "loss": 1.1086, "step": 1886 }, { "epoch": 0.3772019689662927, "grad_norm": 2.109375, "learning_rate": 9.649549363841273e-06, "loss": 0.9923, "step": 1887 }, { "epoch": 0.37740186402138876, "grad_norm": 2.140625, "learning_rate": 9.649161685629199e-06, "loss": 1.0775, "step": 1888 }, { "epoch": 0.37760175907648486, "grad_norm": 2.21875, "learning_rate": 9.64877380090186e-06, "loss": 1.1234, "step": 1889 }, { "epoch": 0.3778016541315809, "grad_norm": 2.0, "learning_rate": 9.648385709676485e-06, "loss": 1.0188, "step": 1890 }, { "epoch": 0.378001549186677, "grad_norm": 2.140625, "learning_rate": 9.647997411970313e-06, "loss": 0.9725, "step": 1891 }, { "epoch": 0.37820144424177304, "grad_norm": 2.171875, "learning_rate": 9.647608907800593e-06, "loss": 1.0811, "step": 1892 }, { "epoch": 0.37840133929686914, "grad_norm": 2.109375, "learning_rate": 9.647220197184582e-06, "loss": 0.9877, "step": 1893 }, { "epoch": 0.37860123435196524, "grad_norm": 2.140625, "learning_rate": 9.646831280139544e-06, "loss": 1.1433, "step": 1894 }, { "epoch": 0.3788011294070613, "grad_norm": 2.140625, "learning_rate": 9.646442156682758e-06, "loss": 1.1211, "step": 1895 }, { "epoch": 0.3790010244621574, "grad_norm": 2.140625, "learning_rate": 9.646052826831509e-06, "loss": 0.996, "step": 1896 }, { "epoch": 0.37920091951725343, "grad_norm": 2.109375, "learning_rate": 9.64566329060309e-06, "loss": 1.088, "step": 1897 }, { "epoch": 0.37940081457234953, "grad_norm": 2.171875, "learning_rate": 9.6452735480148e-06, "loss": 1.0102, "step": 1898 }, { "epoch": 0.3796007096274456, "grad_norm": 2.140625, "learning_rate": 9.644883599083959e-06, "loss": 1.072, "step": 1899 }, { "epoch": 0.3798006046825417, "grad_norm": 2.078125, "learning_rate": 9.644493443827883e-06, "loss": 1.113, "step": 1900 }, { "epoch": 0.3800004997376377, "grad_norm": 2.28125, "learning_rate": 9.644103082263904e-06, "loss": 1.0365, "step": 1901 }, { "epoch": 0.3802003947927338, "grad_norm": 2.171875, "learning_rate": 9.643712514409362e-06, "loss": 1.1374, "step": 1902 }, { "epoch": 0.38040028984782986, "grad_norm": 2.03125, "learning_rate": 9.643321740281606e-06, "loss": 1.091, "step": 1903 }, { "epoch": 0.38060018490292596, "grad_norm": 2.171875, "learning_rate": 9.642930759897995e-06, "loss": 1.1503, "step": 1904 }, { "epoch": 0.38080007995802206, "grad_norm": 2.1875, "learning_rate": 9.642539573275895e-06, "loss": 1.1326, "step": 1905 }, { "epoch": 0.3809999750131181, "grad_norm": 1.96875, "learning_rate": 9.642148180432685e-06, "loss": 1.0329, "step": 1906 }, { "epoch": 0.3811998700682142, "grad_norm": 2.109375, "learning_rate": 9.641756581385745e-06, "loss": 1.1499, "step": 1907 }, { "epoch": 0.38139976512331025, "grad_norm": 2.265625, "learning_rate": 9.641364776152477e-06, "loss": 1.1574, "step": 1908 }, { "epoch": 0.38159966017840635, "grad_norm": 2.078125, "learning_rate": 9.64097276475028e-06, "loss": 1.0513, "step": 1909 }, { "epoch": 0.3817995552335024, "grad_norm": 2.109375, "learning_rate": 9.640580547196568e-06, "loss": 1.0772, "step": 1910 }, { "epoch": 0.3819994502885985, "grad_norm": 2.15625, "learning_rate": 9.640188123508764e-06, "loss": 1.0826, "step": 1911 }, { "epoch": 0.38219934534369454, "grad_norm": 2.171875, "learning_rate": 9.639795493704299e-06, "loss": 1.1318, "step": 1912 }, { "epoch": 0.38239924039879064, "grad_norm": 2.109375, "learning_rate": 9.639402657800613e-06, "loss": 1.0901, "step": 1913 }, { "epoch": 0.3825991354538867, "grad_norm": 2.109375, "learning_rate": 9.639009615815158e-06, "loss": 1.07, "step": 1914 }, { "epoch": 0.3827990305089828, "grad_norm": 2.0, "learning_rate": 9.63861636776539e-06, "loss": 1.103, "step": 1915 }, { "epoch": 0.3829989255640789, "grad_norm": 2.15625, "learning_rate": 9.63822291366878e-06, "loss": 1.1312, "step": 1916 }, { "epoch": 0.38319882061917493, "grad_norm": 2.078125, "learning_rate": 9.637829253542801e-06, "loss": 1.0867, "step": 1917 }, { "epoch": 0.38339871567427103, "grad_norm": 2.0625, "learning_rate": 9.637435387404943e-06, "loss": 1.1388, "step": 1918 }, { "epoch": 0.3835986107293671, "grad_norm": 2.15625, "learning_rate": 9.637041315272702e-06, "loss": 1.1109, "step": 1919 }, { "epoch": 0.3837985057844632, "grad_norm": 2.015625, "learning_rate": 9.63664703716358e-06, "loss": 0.9584, "step": 1920 }, { "epoch": 0.3839984008395592, "grad_norm": 2.078125, "learning_rate": 9.636252553095091e-06, "loss": 1.1159, "step": 1921 }, { "epoch": 0.3841982958946553, "grad_norm": 2.171875, "learning_rate": 9.635857863084758e-06, "loss": 1.0773, "step": 1922 }, { "epoch": 0.38439819094975136, "grad_norm": 2.15625, "learning_rate": 9.635462967150116e-06, "loss": 1.0794, "step": 1923 }, { "epoch": 0.38459808600484746, "grad_norm": 2.109375, "learning_rate": 9.635067865308703e-06, "loss": 1.0855, "step": 1924 }, { "epoch": 0.3847979810599435, "grad_norm": 2.140625, "learning_rate": 9.634672557578072e-06, "loss": 1.0506, "step": 1925 }, { "epoch": 0.3849978761150396, "grad_norm": 2.125, "learning_rate": 9.63427704397578e-06, "loss": 0.9756, "step": 1926 }, { "epoch": 0.3851977711701357, "grad_norm": 2.140625, "learning_rate": 9.633881324519397e-06, "loss": 1.0076, "step": 1927 }, { "epoch": 0.38539766622523175, "grad_norm": 2.296875, "learning_rate": 9.633485399226499e-06, "loss": 1.0761, "step": 1928 }, { "epoch": 0.38559756128032785, "grad_norm": 2.078125, "learning_rate": 9.633089268114675e-06, "loss": 1.0176, "step": 1929 }, { "epoch": 0.3857974563354239, "grad_norm": 2.078125, "learning_rate": 9.632692931201522e-06, "loss": 1.0506, "step": 1930 }, { "epoch": 0.38599735139052, "grad_norm": 2.046875, "learning_rate": 9.632296388504641e-06, "loss": 1.0752, "step": 1931 }, { "epoch": 0.38619724644561604, "grad_norm": 2.1875, "learning_rate": 9.631899640041652e-06, "loss": 1.1649, "step": 1932 }, { "epoch": 0.38639714150071214, "grad_norm": 2.328125, "learning_rate": 9.631502685830175e-06, "loss": 1.0854, "step": 1933 }, { "epoch": 0.3865970365558082, "grad_norm": 2.078125, "learning_rate": 9.631105525887841e-06, "loss": 1.0688, "step": 1934 }, { "epoch": 0.3867969316109043, "grad_norm": 2.09375, "learning_rate": 9.630708160232297e-06, "loss": 1.0149, "step": 1935 }, { "epoch": 0.3869968266660003, "grad_norm": 2.21875, "learning_rate": 9.63031058888119e-06, "loss": 1.0706, "step": 1936 }, { "epoch": 0.3871967217210964, "grad_norm": 2.0625, "learning_rate": 9.629912811852181e-06, "loss": 0.984, "step": 1937 }, { "epoch": 0.3873966167761925, "grad_norm": 2.140625, "learning_rate": 9.62951482916294e-06, "loss": 1.07, "step": 1938 }, { "epoch": 0.38759651183128857, "grad_norm": 2.265625, "learning_rate": 9.629116640831144e-06, "loss": 1.1219, "step": 1939 }, { "epoch": 0.38779640688638467, "grad_norm": 2.09375, "learning_rate": 9.628718246874482e-06, "loss": 1.1367, "step": 1940 }, { "epoch": 0.3879963019414807, "grad_norm": 2.078125, "learning_rate": 9.628319647310648e-06, "loss": 1.0883, "step": 1941 }, { "epoch": 0.3881961969965768, "grad_norm": 2.171875, "learning_rate": 9.627920842157352e-06, "loss": 1.1226, "step": 1942 }, { "epoch": 0.38839609205167286, "grad_norm": 2.21875, "learning_rate": 9.627521831432308e-06, "loss": 1.1547, "step": 1943 }, { "epoch": 0.38859598710676896, "grad_norm": 1.9453125, "learning_rate": 9.627122615153234e-06, "loss": 1.0724, "step": 1944 }, { "epoch": 0.388795882161865, "grad_norm": 2.1875, "learning_rate": 9.62672319333787e-06, "loss": 1.1552, "step": 1945 }, { "epoch": 0.3889957772169611, "grad_norm": 2.046875, "learning_rate": 9.626323566003955e-06, "loss": 1.1128, "step": 1946 }, { "epoch": 0.38919567227205715, "grad_norm": 2.265625, "learning_rate": 9.625923733169242e-06, "loss": 1.1447, "step": 1947 }, { "epoch": 0.38939556732715325, "grad_norm": 2.03125, "learning_rate": 9.62552369485149e-06, "loss": 1.0015, "step": 1948 }, { "epoch": 0.38959546238224935, "grad_norm": 2.21875, "learning_rate": 9.625123451068472e-06, "loss": 1.267, "step": 1949 }, { "epoch": 0.3897953574373454, "grad_norm": 2.234375, "learning_rate": 9.62472300183796e-06, "loss": 1.0929, "step": 1950 }, { "epoch": 0.3899952524924415, "grad_norm": 2.046875, "learning_rate": 9.62432234717775e-06, "loss": 0.9937, "step": 1951 }, { "epoch": 0.39019514754753754, "grad_norm": 2.078125, "learning_rate": 9.623921487105634e-06, "loss": 1.1026, "step": 1952 }, { "epoch": 0.39039504260263364, "grad_norm": 2.203125, "learning_rate": 9.62352042163942e-06, "loss": 1.0733, "step": 1953 }, { "epoch": 0.3905949376577297, "grad_norm": 2.171875, "learning_rate": 9.623119150796923e-06, "loss": 1.0681, "step": 1954 }, { "epoch": 0.3907948327128258, "grad_norm": 2.078125, "learning_rate": 9.622717674595965e-06, "loss": 1.0314, "step": 1955 }, { "epoch": 0.3909947277679218, "grad_norm": 2.234375, "learning_rate": 9.622315993054384e-06, "loss": 1.1474, "step": 1956 }, { "epoch": 0.3911946228230179, "grad_norm": 2.046875, "learning_rate": 9.621914106190019e-06, "loss": 1.0736, "step": 1957 }, { "epoch": 0.39139451787811397, "grad_norm": 2.125, "learning_rate": 9.621512014020722e-06, "loss": 1.0334, "step": 1958 }, { "epoch": 0.39159441293321007, "grad_norm": 2.15625, "learning_rate": 9.621109716564357e-06, "loss": 1.1237, "step": 1959 }, { "epoch": 0.3917943079883061, "grad_norm": 2.09375, "learning_rate": 9.620707213838789e-06, "loss": 1.0922, "step": 1960 }, { "epoch": 0.3919942030434022, "grad_norm": 2.109375, "learning_rate": 9.620304505861902e-06, "loss": 1.0834, "step": 1961 }, { "epoch": 0.3921940980984983, "grad_norm": 2.015625, "learning_rate": 9.619901592651582e-06, "loss": 1.0662, "step": 1962 }, { "epoch": 0.39239399315359436, "grad_norm": 2.0625, "learning_rate": 9.619498474225729e-06, "loss": 1.0044, "step": 1963 }, { "epoch": 0.39259388820869046, "grad_norm": 2.171875, "learning_rate": 9.619095150602243e-06, "loss": 1.0423, "step": 1964 }, { "epoch": 0.3927937832637865, "grad_norm": 2.15625, "learning_rate": 9.618691621799047e-06, "loss": 1.0499, "step": 1965 }, { "epoch": 0.3929936783188826, "grad_norm": 2.09375, "learning_rate": 9.618287887834062e-06, "loss": 0.9927, "step": 1966 }, { "epoch": 0.39319357337397864, "grad_norm": 2.109375, "learning_rate": 9.617883948725219e-06, "loss": 0.9855, "step": 1967 }, { "epoch": 0.39339346842907474, "grad_norm": 2.25, "learning_rate": 9.617479804490468e-06, "loss": 1.0272, "step": 1968 }, { "epoch": 0.3935933634841708, "grad_norm": 2.171875, "learning_rate": 9.617075455147757e-06, "loss": 1.1507, "step": 1969 }, { "epoch": 0.3937932585392669, "grad_norm": 2.0625, "learning_rate": 9.616670900715046e-06, "loss": 1.0953, "step": 1970 }, { "epoch": 0.39399315359436293, "grad_norm": 2.109375, "learning_rate": 9.616266141210306e-06, "loss": 1.0569, "step": 1971 }, { "epoch": 0.39419304864945903, "grad_norm": 2.125, "learning_rate": 9.615861176651519e-06, "loss": 1.1121, "step": 1972 }, { "epoch": 0.39439294370455513, "grad_norm": 2.078125, "learning_rate": 9.61545600705667e-06, "loss": 1.1301, "step": 1973 }, { "epoch": 0.3945928387596512, "grad_norm": 2.203125, "learning_rate": 9.615050632443759e-06, "loss": 1.0539, "step": 1974 }, { "epoch": 0.3947927338147473, "grad_norm": 2.0625, "learning_rate": 9.614645052830791e-06, "loss": 1.0629, "step": 1975 }, { "epoch": 0.3949926288698433, "grad_norm": 2.015625, "learning_rate": 9.614239268235783e-06, "loss": 1.0353, "step": 1976 }, { "epoch": 0.3951925239249394, "grad_norm": 2.34375, "learning_rate": 9.613833278676762e-06, "loss": 1.1782, "step": 1977 }, { "epoch": 0.39539241898003546, "grad_norm": 2.25, "learning_rate": 9.613427084171755e-06, "loss": 1.0323, "step": 1978 }, { "epoch": 0.39559231403513156, "grad_norm": 2.109375, "learning_rate": 9.613020684738813e-06, "loss": 1.146, "step": 1979 }, { "epoch": 0.3957922090902276, "grad_norm": 2.046875, "learning_rate": 9.612614080395983e-06, "loss": 1.0603, "step": 1980 }, { "epoch": 0.3959921041453237, "grad_norm": 2.046875, "learning_rate": 9.612207271161328e-06, "loss": 1.0065, "step": 1981 }, { "epoch": 0.39619199920041975, "grad_norm": 2.21875, "learning_rate": 9.61180025705292e-06, "loss": 1.0248, "step": 1982 }, { "epoch": 0.39639189425551585, "grad_norm": 2.234375, "learning_rate": 9.611393038088839e-06, "loss": 1.0946, "step": 1983 }, { "epoch": 0.39659178931061195, "grad_norm": 2.25, "learning_rate": 9.610985614287168e-06, "loss": 1.049, "step": 1984 }, { "epoch": 0.396791684365708, "grad_norm": 2.09375, "learning_rate": 9.61057798566601e-06, "loss": 1.0451, "step": 1985 }, { "epoch": 0.3969915794208041, "grad_norm": 2.1875, "learning_rate": 9.61017015224347e-06, "loss": 1.1273, "step": 1986 }, { "epoch": 0.39719147447590014, "grad_norm": 2.109375, "learning_rate": 9.609762114037665e-06, "loss": 1.1022, "step": 1987 }, { "epoch": 0.39739136953099624, "grad_norm": 2.234375, "learning_rate": 9.609353871066719e-06, "loss": 0.9584, "step": 1988 }, { "epoch": 0.3975912645860923, "grad_norm": 2.25, "learning_rate": 9.608945423348766e-06, "loss": 1.1338, "step": 1989 }, { "epoch": 0.3977911596411884, "grad_norm": 2.265625, "learning_rate": 9.60853677090195e-06, "loss": 1.0808, "step": 1990 }, { "epoch": 0.39799105469628443, "grad_norm": 2.171875, "learning_rate": 9.608127913744421e-06, "loss": 1.0717, "step": 1991 }, { "epoch": 0.39819094975138053, "grad_norm": 2.046875, "learning_rate": 9.607718851894346e-06, "loss": 1.066, "step": 1992 }, { "epoch": 0.3983908448064766, "grad_norm": 2.171875, "learning_rate": 9.607309585369889e-06, "loss": 1.1569, "step": 1993 }, { "epoch": 0.3985907398615727, "grad_norm": 2.171875, "learning_rate": 9.606900114189231e-06, "loss": 1.0631, "step": 1994 }, { "epoch": 0.3987906349166688, "grad_norm": 2.09375, "learning_rate": 9.606490438370563e-06, "loss": 1.0811, "step": 1995 }, { "epoch": 0.3989905299717648, "grad_norm": 2.203125, "learning_rate": 9.606080557932084e-06, "loss": 1.15, "step": 1996 }, { "epoch": 0.3991904250268609, "grad_norm": 2.140625, "learning_rate": 9.605670472891998e-06, "loss": 1.0575, "step": 1997 }, { "epoch": 0.39939032008195696, "grad_norm": 2.0625, "learning_rate": 9.605260183268519e-06, "loss": 0.9726, "step": 1998 }, { "epoch": 0.39959021513705306, "grad_norm": 2.8125, "learning_rate": 9.604849689079875e-06, "loss": 1.0419, "step": 1999 }, { "epoch": 0.3997901101921491, "grad_norm": 2.109375, "learning_rate": 9.604438990344303e-06, "loss": 1.0341, "step": 2000 }, { "epoch": 0.3999900052472452, "grad_norm": 2.21875, "learning_rate": 9.60402808708004e-06, "loss": 1.0606, "step": 2001 }, { "epoch": 0.40018990030234125, "grad_norm": 2.0625, "learning_rate": 9.60361697930534e-06, "loss": 1.011, "step": 2002 }, { "epoch": 0.40038979535743735, "grad_norm": 2.09375, "learning_rate": 9.603205667038468e-06, "loss": 1.0795, "step": 2003 }, { "epoch": 0.4005896904125334, "grad_norm": 2.171875, "learning_rate": 9.60279415029769e-06, "loss": 0.9848, "step": 2004 }, { "epoch": 0.4007895854676295, "grad_norm": 1.96875, "learning_rate": 9.60238242910129e-06, "loss": 0.9808, "step": 2005 }, { "epoch": 0.4009894805227256, "grad_norm": 2.03125, "learning_rate": 9.601970503467551e-06, "loss": 1.0222, "step": 2006 }, { "epoch": 0.40118937557782164, "grad_norm": 2.15625, "learning_rate": 9.601558373414776e-06, "loss": 1.0831, "step": 2007 }, { "epoch": 0.40138927063291774, "grad_norm": 2.015625, "learning_rate": 9.601146038961267e-06, "loss": 0.9672, "step": 2008 }, { "epoch": 0.4015891656880138, "grad_norm": 2.0625, "learning_rate": 9.600733500125345e-06, "loss": 1.0311, "step": 2009 }, { "epoch": 0.4017890607431099, "grad_norm": 2.140625, "learning_rate": 9.600320756925332e-06, "loss": 1.1057, "step": 2010 }, { "epoch": 0.4019889557982059, "grad_norm": 2.296875, "learning_rate": 9.59990780937956e-06, "loss": 1.1822, "step": 2011 }, { "epoch": 0.402188850853302, "grad_norm": 2.1875, "learning_rate": 9.599494657506376e-06, "loss": 1.1193, "step": 2012 }, { "epoch": 0.40238874590839807, "grad_norm": 2.078125, "learning_rate": 9.599081301324132e-06, "loss": 1.1015, "step": 2013 }, { "epoch": 0.40258864096349417, "grad_norm": 1.9921875, "learning_rate": 9.598667740851187e-06, "loss": 1.0014, "step": 2014 }, { "epoch": 0.4027885360185902, "grad_norm": 2.125, "learning_rate": 9.59825397610591e-06, "loss": 1.0768, "step": 2015 }, { "epoch": 0.4029884310736863, "grad_norm": 3.21875, "learning_rate": 9.597840007106685e-06, "loss": 1.0659, "step": 2016 }, { "epoch": 0.4031883261287824, "grad_norm": 2.109375, "learning_rate": 9.597425833871896e-06, "loss": 1.0412, "step": 2017 }, { "epoch": 0.40338822118387846, "grad_norm": 2.0625, "learning_rate": 9.597011456419943e-06, "loss": 1.0398, "step": 2018 }, { "epoch": 0.40358811623897456, "grad_norm": 2.1875, "learning_rate": 9.596596874769232e-06, "loss": 1.1117, "step": 2019 }, { "epoch": 0.4037880112940706, "grad_norm": 2.0, "learning_rate": 9.59618208893818e-06, "loss": 1.0276, "step": 2020 }, { "epoch": 0.4039879063491667, "grad_norm": 2.171875, "learning_rate": 9.59576709894521e-06, "loss": 1.1292, "step": 2021 }, { "epoch": 0.40418780140426275, "grad_norm": 2.015625, "learning_rate": 9.595351904808757e-06, "loss": 1.0813, "step": 2022 }, { "epoch": 0.40438769645935885, "grad_norm": 2.234375, "learning_rate": 9.594936506547262e-06, "loss": 1.1305, "step": 2023 }, { "epoch": 0.4045875915144549, "grad_norm": 2.1875, "learning_rate": 9.594520904179176e-06, "loss": 1.1371, "step": 2024 }, { "epoch": 0.404787486569551, "grad_norm": 2.03125, "learning_rate": 9.594105097722966e-06, "loss": 0.9341, "step": 2025 }, { "epoch": 0.40498738162464704, "grad_norm": 2.09375, "learning_rate": 9.593689087197096e-06, "loss": 1.1288, "step": 2026 }, { "epoch": 0.40518727667974314, "grad_norm": 2.203125, "learning_rate": 9.59327287262005e-06, "loss": 1.0997, "step": 2027 }, { "epoch": 0.40538717173483924, "grad_norm": 2.09375, "learning_rate": 9.592856454010309e-06, "loss": 1.0629, "step": 2028 }, { "epoch": 0.4055870667899353, "grad_norm": 2.0625, "learning_rate": 9.592439831386378e-06, "loss": 1.0659, "step": 2029 }, { "epoch": 0.4057869618450314, "grad_norm": 2.1875, "learning_rate": 9.59202300476676e-06, "loss": 1.1291, "step": 2030 }, { "epoch": 0.4059868569001274, "grad_norm": 2.046875, "learning_rate": 9.59160597416997e-06, "loss": 1.0006, "step": 2031 }, { "epoch": 0.4061867519552235, "grad_norm": 2.15625, "learning_rate": 9.591188739614534e-06, "loss": 1.1197, "step": 2032 }, { "epoch": 0.40638664701031957, "grad_norm": 2.21875, "learning_rate": 9.590771301118983e-06, "loss": 1.1276, "step": 2033 }, { "epoch": 0.40658654206541567, "grad_norm": 2.140625, "learning_rate": 9.590353658701863e-06, "loss": 1.0585, "step": 2034 }, { "epoch": 0.4067864371205117, "grad_norm": 2.125, "learning_rate": 9.589935812381722e-06, "loss": 1.0337, "step": 2035 }, { "epoch": 0.4069863321756078, "grad_norm": 2.078125, "learning_rate": 9.589517762177122e-06, "loss": 1.0947, "step": 2036 }, { "epoch": 0.40718622723070386, "grad_norm": 2.296875, "learning_rate": 9.589099508106637e-06, "loss": 1.167, "step": 2037 }, { "epoch": 0.40738612228579996, "grad_norm": 2.21875, "learning_rate": 9.588681050188837e-06, "loss": 1.1224, "step": 2038 }, { "epoch": 0.40758601734089606, "grad_norm": 2.125, "learning_rate": 9.588262388442317e-06, "loss": 1.0462, "step": 2039 }, { "epoch": 0.4077859123959921, "grad_norm": 2.265625, "learning_rate": 9.587843522885674e-06, "loss": 1.0847, "step": 2040 }, { "epoch": 0.4079858074510882, "grad_norm": 2.140625, "learning_rate": 9.587424453537508e-06, "loss": 1.0729, "step": 2041 }, { "epoch": 0.40818570250618424, "grad_norm": 2.125, "learning_rate": 9.587005180416439e-06, "loss": 1.0238, "step": 2042 }, { "epoch": 0.40838559756128034, "grad_norm": 2.09375, "learning_rate": 9.586585703541092e-06, "loss": 1.0113, "step": 2043 }, { "epoch": 0.4085854926163764, "grad_norm": 2.125, "learning_rate": 9.586166022930095e-06, "loss": 1.0278, "step": 2044 }, { "epoch": 0.4087853876714725, "grad_norm": 2.109375, "learning_rate": 9.585746138602095e-06, "loss": 1.0631, "step": 2045 }, { "epoch": 0.40898528272656853, "grad_norm": 2.28125, "learning_rate": 9.58532605057574e-06, "loss": 1.1036, "step": 2046 }, { "epoch": 0.40918517778166463, "grad_norm": 2.234375, "learning_rate": 9.584905758869691e-06, "loss": 1.1368, "step": 2047 }, { "epoch": 0.4093850728367607, "grad_norm": 2.265625, "learning_rate": 9.584485263502619e-06, "loss": 1.1605, "step": 2048 }, { "epoch": 0.4095849678918568, "grad_norm": 2.203125, "learning_rate": 9.5840645644932e-06, "loss": 1.0755, "step": 2049 }, { "epoch": 0.4097848629469529, "grad_norm": 2.15625, "learning_rate": 9.583643661860125e-06, "loss": 1.0774, "step": 2050 }, { "epoch": 0.4099847580020489, "grad_norm": 2.0625, "learning_rate": 9.583222555622087e-06, "loss": 0.9607, "step": 2051 }, { "epoch": 0.410184653057145, "grad_norm": 2.125, "learning_rate": 9.582801245797793e-06, "loss": 1.1346, "step": 2052 }, { "epoch": 0.41038454811224107, "grad_norm": 2.078125, "learning_rate": 9.582379732405955e-06, "loss": 1.0721, "step": 2053 }, { "epoch": 0.41058444316733717, "grad_norm": 2.171875, "learning_rate": 9.5819580154653e-06, "loss": 1.1277, "step": 2054 }, { "epoch": 0.4107843382224332, "grad_norm": 2.15625, "learning_rate": 9.581536094994562e-06, "loss": 1.0082, "step": 2055 }, { "epoch": 0.4109842332775293, "grad_norm": 2.125, "learning_rate": 9.581113971012476e-06, "loss": 1.0861, "step": 2056 }, { "epoch": 0.41118412833262535, "grad_norm": 2.15625, "learning_rate": 9.580691643537798e-06, "loss": 1.1013, "step": 2057 }, { "epoch": 0.41138402338772145, "grad_norm": 2.125, "learning_rate": 9.580269112589287e-06, "loss": 1.0299, "step": 2058 }, { "epoch": 0.4115839184428175, "grad_norm": 2.203125, "learning_rate": 9.579846378185712e-06, "loss": 1.0958, "step": 2059 }, { "epoch": 0.4117838134979136, "grad_norm": 2.09375, "learning_rate": 9.579423440345847e-06, "loss": 1.1165, "step": 2060 }, { "epoch": 0.4119837085530097, "grad_norm": 2.1875, "learning_rate": 9.579000299088485e-06, "loss": 1.0798, "step": 2061 }, { "epoch": 0.41218360360810574, "grad_norm": 2.140625, "learning_rate": 9.578576954432416e-06, "loss": 1.0372, "step": 2062 }, { "epoch": 0.41238349866320184, "grad_norm": 2.15625, "learning_rate": 9.57815340639645e-06, "loss": 1.0837, "step": 2063 }, { "epoch": 0.4125833937182979, "grad_norm": 2.046875, "learning_rate": 9.577729654999396e-06, "loss": 1.0542, "step": 2064 }, { "epoch": 0.412783288773394, "grad_norm": 2.046875, "learning_rate": 9.577305700260084e-06, "loss": 1.1162, "step": 2065 }, { "epoch": 0.41298318382849003, "grad_norm": 2.09375, "learning_rate": 9.576881542197337e-06, "loss": 1.0897, "step": 2066 }, { "epoch": 0.41318307888358613, "grad_norm": 2.234375, "learning_rate": 9.576457180830004e-06, "loss": 1.16, "step": 2067 }, { "epoch": 0.4133829739386822, "grad_norm": 2.15625, "learning_rate": 9.576032616176932e-06, "loss": 1.0592, "step": 2068 }, { "epoch": 0.4135828689937783, "grad_norm": 2.0625, "learning_rate": 9.575607848256979e-06, "loss": 1.0421, "step": 2069 }, { "epoch": 0.4137827640488743, "grad_norm": 2.078125, "learning_rate": 9.575182877089014e-06, "loss": 1.0748, "step": 2070 }, { "epoch": 0.4139826591039704, "grad_norm": 2.21875, "learning_rate": 9.574757702691912e-06, "loss": 1.1479, "step": 2071 }, { "epoch": 0.41418255415906646, "grad_norm": 2.0625, "learning_rate": 9.574332325084564e-06, "loss": 0.9352, "step": 2072 }, { "epoch": 0.41438244921416256, "grad_norm": 2.109375, "learning_rate": 9.573906744285862e-06, "loss": 1.0908, "step": 2073 }, { "epoch": 0.41458234426925866, "grad_norm": 2.28125, "learning_rate": 9.573480960314711e-06, "loss": 1.0818, "step": 2074 }, { "epoch": 0.4147822393243547, "grad_norm": 2.140625, "learning_rate": 9.573054973190023e-06, "loss": 1.0848, "step": 2075 }, { "epoch": 0.4149821343794508, "grad_norm": 2.125, "learning_rate": 9.572628782930724e-06, "loss": 1.0214, "step": 2076 }, { "epoch": 0.41518202943454685, "grad_norm": 2.078125, "learning_rate": 9.572202389555741e-06, "loss": 1.1551, "step": 2077 }, { "epoch": 0.41538192448964295, "grad_norm": 2.34375, "learning_rate": 9.571775793084017e-06, "loss": 1.0506, "step": 2078 }, { "epoch": 0.415581819544739, "grad_norm": 2.078125, "learning_rate": 9.5713489935345e-06, "loss": 1.0266, "step": 2079 }, { "epoch": 0.4157817145998351, "grad_norm": 2.078125, "learning_rate": 9.57092199092615e-06, "loss": 1.0646, "step": 2080 }, { "epoch": 0.41598160965493114, "grad_norm": 2.15625, "learning_rate": 9.570494785277931e-06, "loss": 1.0513, "step": 2081 }, { "epoch": 0.41618150471002724, "grad_norm": 2.078125, "learning_rate": 9.570067376608826e-06, "loss": 1.0574, "step": 2082 }, { "epoch": 0.4163813997651233, "grad_norm": 2.140625, "learning_rate": 9.569639764937813e-06, "loss": 1.0695, "step": 2083 }, { "epoch": 0.4165812948202194, "grad_norm": 2.171875, "learning_rate": 9.56921195028389e-06, "loss": 1.1231, "step": 2084 }, { "epoch": 0.4167811898753155, "grad_norm": 2.21875, "learning_rate": 9.56878393266606e-06, "loss": 1.135, "step": 2085 }, { "epoch": 0.4169810849304115, "grad_norm": 2.140625, "learning_rate": 9.568355712103336e-06, "loss": 1.1205, "step": 2086 }, { "epoch": 0.4171809799855076, "grad_norm": 2.21875, "learning_rate": 9.56792728861474e-06, "loss": 1.1319, "step": 2087 }, { "epoch": 0.41738087504060367, "grad_norm": 2.109375, "learning_rate": 9.5674986622193e-06, "loss": 1.0088, "step": 2088 }, { "epoch": 0.41758077009569977, "grad_norm": 2.046875, "learning_rate": 9.567069832936058e-06, "loss": 1.076, "step": 2089 }, { "epoch": 0.4177806651507958, "grad_norm": 2.21875, "learning_rate": 9.566640800784061e-06, "loss": 1.1271, "step": 2090 }, { "epoch": 0.4179805602058919, "grad_norm": 2.09375, "learning_rate": 9.56621156578237e-06, "loss": 1.0794, "step": 2091 }, { "epoch": 0.41818045526098796, "grad_norm": 2.03125, "learning_rate": 9.565782127950047e-06, "loss": 1.0835, "step": 2092 }, { "epoch": 0.41838035031608406, "grad_norm": 2.28125, "learning_rate": 9.56535248730617e-06, "loss": 1.1816, "step": 2093 }, { "epoch": 0.4185802453711801, "grad_norm": 2.171875, "learning_rate": 9.564922643869822e-06, "loss": 1.0854, "step": 2094 }, { "epoch": 0.4187801404262762, "grad_norm": 2.203125, "learning_rate": 9.5644925976601e-06, "loss": 1.0796, "step": 2095 }, { "epoch": 0.4189800354813723, "grad_norm": 2.328125, "learning_rate": 9.564062348696103e-06, "loss": 1.1448, "step": 2096 }, { "epoch": 0.41917993053646835, "grad_norm": 2.265625, "learning_rate": 9.563631896996943e-06, "loss": 1.0599, "step": 2097 }, { "epoch": 0.41937982559156445, "grad_norm": 2.078125, "learning_rate": 9.563201242581743e-06, "loss": 1.0835, "step": 2098 }, { "epoch": 0.4195797206466605, "grad_norm": 2.09375, "learning_rate": 9.562770385469631e-06, "loss": 1.1198, "step": 2099 }, { "epoch": 0.4197796157017566, "grad_norm": 2.125, "learning_rate": 9.562339325679747e-06, "loss": 1.0479, "step": 2100 }, { "epoch": 0.41997951075685264, "grad_norm": 2.125, "learning_rate": 9.561908063231234e-06, "loss": 1.1437, "step": 2101 }, { "epoch": 0.42017940581194874, "grad_norm": 2.171875, "learning_rate": 9.561476598143255e-06, "loss": 1.0515, "step": 2102 }, { "epoch": 0.4203793008670448, "grad_norm": 2.28125, "learning_rate": 9.56104493043497e-06, "loss": 1.0693, "step": 2103 }, { "epoch": 0.4205791959221409, "grad_norm": 2.125, "learning_rate": 9.560613060125558e-06, "loss": 1.1833, "step": 2104 }, { "epoch": 0.4207790909772369, "grad_norm": 1.9453125, "learning_rate": 9.560180987234202e-06, "loss": 1.058, "step": 2105 }, { "epoch": 0.420978986032333, "grad_norm": 2.234375, "learning_rate": 9.559748711780092e-06, "loss": 1.0376, "step": 2106 }, { "epoch": 0.4211788810874291, "grad_norm": 2.171875, "learning_rate": 9.559316233782432e-06, "loss": 1.096, "step": 2107 }, { "epoch": 0.42137877614252517, "grad_norm": 2.015625, "learning_rate": 9.558883553260431e-06, "loss": 1.059, "step": 2108 }, { "epoch": 0.42157867119762127, "grad_norm": 2.0625, "learning_rate": 9.55845067023331e-06, "loss": 1.0785, "step": 2109 }, { "epoch": 0.4217785662527173, "grad_norm": 2.0, "learning_rate": 9.558017584720298e-06, "loss": 1.014, "step": 2110 }, { "epoch": 0.4219784613078134, "grad_norm": 2.25, "learning_rate": 9.55758429674063e-06, "loss": 1.0571, "step": 2111 }, { "epoch": 0.42217835636290946, "grad_norm": 2.140625, "learning_rate": 9.557150806313555e-06, "loss": 1.0677, "step": 2112 }, { "epoch": 0.42237825141800556, "grad_norm": 2.0625, "learning_rate": 9.55671711345833e-06, "loss": 1.0337, "step": 2113 }, { "epoch": 0.4225781464731016, "grad_norm": 2.078125, "learning_rate": 9.556283218194214e-06, "loss": 1.0861, "step": 2114 }, { "epoch": 0.4227780415281977, "grad_norm": 2.171875, "learning_rate": 9.555849120540486e-06, "loss": 1.0313, "step": 2115 }, { "epoch": 0.42297793658329375, "grad_norm": 2.015625, "learning_rate": 9.555414820516427e-06, "loss": 0.9798, "step": 2116 }, { "epoch": 0.42317783163838985, "grad_norm": 2.03125, "learning_rate": 9.554980318141327e-06, "loss": 1.1204, "step": 2117 }, { "epoch": 0.42337772669348595, "grad_norm": 2.203125, "learning_rate": 9.554545613434489e-06, "loss": 1.0616, "step": 2118 }, { "epoch": 0.423577621748582, "grad_norm": 2.015625, "learning_rate": 9.55411070641522e-06, "loss": 1.0907, "step": 2119 }, { "epoch": 0.4237775168036781, "grad_norm": 2.390625, "learning_rate": 9.55367559710284e-06, "loss": 1.1348, "step": 2120 }, { "epoch": 0.42397741185877413, "grad_norm": 2.078125, "learning_rate": 9.553240285516676e-06, "loss": 0.9642, "step": 2121 }, { "epoch": 0.42417730691387023, "grad_norm": 2.171875, "learning_rate": 9.552804771676067e-06, "loss": 1.1451, "step": 2122 }, { "epoch": 0.4243772019689663, "grad_norm": 2.140625, "learning_rate": 9.552369055600354e-06, "loss": 1.0617, "step": 2123 }, { "epoch": 0.4245770970240624, "grad_norm": 2.109375, "learning_rate": 9.551933137308895e-06, "loss": 1.111, "step": 2124 }, { "epoch": 0.4247769920791584, "grad_norm": 2.09375, "learning_rate": 9.551497016821051e-06, "loss": 1.0553, "step": 2125 }, { "epoch": 0.4249768871342545, "grad_norm": 2.0625, "learning_rate": 9.551060694156197e-06, "loss": 1.1344, "step": 2126 }, { "epoch": 0.42517678218935057, "grad_norm": 2.140625, "learning_rate": 9.550624169333713e-06, "loss": 0.9748, "step": 2127 }, { "epoch": 0.42537667724444667, "grad_norm": 2.140625, "learning_rate": 9.550187442372987e-06, "loss": 1.0298, "step": 2128 }, { "epoch": 0.42557657229954277, "grad_norm": 2.015625, "learning_rate": 9.549750513293422e-06, "loss": 1.0584, "step": 2129 }, { "epoch": 0.4257764673546388, "grad_norm": 2.125, "learning_rate": 9.549313382114427e-06, "loss": 1.0891, "step": 2130 }, { "epoch": 0.4259763624097349, "grad_norm": 2.421875, "learning_rate": 9.548876048855417e-06, "loss": 1.1657, "step": 2131 }, { "epoch": 0.42617625746483095, "grad_norm": 2.171875, "learning_rate": 9.548438513535819e-06, "loss": 1.0, "step": 2132 }, { "epoch": 0.42637615251992705, "grad_norm": 2.234375, "learning_rate": 9.548000776175066e-06, "loss": 1.2088, "step": 2133 }, { "epoch": 0.4265760475750231, "grad_norm": 2.1875, "learning_rate": 9.547562836792606e-06, "loss": 1.1251, "step": 2134 }, { "epoch": 0.4267759426301192, "grad_norm": 2.171875, "learning_rate": 9.547124695407888e-06, "loss": 1.1375, "step": 2135 }, { "epoch": 0.42697583768521524, "grad_norm": 2.171875, "learning_rate": 9.546686352040379e-06, "loss": 1.0466, "step": 2136 }, { "epoch": 0.42717573274031134, "grad_norm": 2.203125, "learning_rate": 9.546247806709548e-06, "loss": 1.0535, "step": 2137 }, { "epoch": 0.4273756277954074, "grad_norm": 2.140625, "learning_rate": 9.545809059434874e-06, "loss": 1.0398, "step": 2138 }, { "epoch": 0.4275755228505035, "grad_norm": 2.234375, "learning_rate": 9.545370110235847e-06, "loss": 1.0722, "step": 2139 }, { "epoch": 0.4277754179055996, "grad_norm": 2.09375, "learning_rate": 9.544930959131967e-06, "loss": 1.0896, "step": 2140 }, { "epoch": 0.42797531296069563, "grad_norm": 2.09375, "learning_rate": 9.544491606142737e-06, "loss": 1.0248, "step": 2141 }, { "epoch": 0.42817520801579173, "grad_norm": 2.03125, "learning_rate": 9.544052051287675e-06, "loss": 1.0491, "step": 2142 }, { "epoch": 0.4283751030708878, "grad_norm": 2.125, "learning_rate": 9.543612294586306e-06, "loss": 1.0873, "step": 2143 }, { "epoch": 0.4285749981259839, "grad_norm": 2.140625, "learning_rate": 9.543172336058166e-06, "loss": 1.0051, "step": 2144 }, { "epoch": 0.4287748931810799, "grad_norm": 2.234375, "learning_rate": 9.542732175722796e-06, "loss": 1.1224, "step": 2145 }, { "epoch": 0.428974788236176, "grad_norm": 2.171875, "learning_rate": 9.542291813599746e-06, "loss": 1.1181, "step": 2146 }, { "epoch": 0.42917468329127206, "grad_norm": 2.03125, "learning_rate": 9.541851249708581e-06, "loss": 1.0761, "step": 2147 }, { "epoch": 0.42937457834636816, "grad_norm": 2.03125, "learning_rate": 9.541410484068867e-06, "loss": 1.0671, "step": 2148 }, { "epoch": 0.4295744734014642, "grad_norm": 2.234375, "learning_rate": 9.540969516700185e-06, "loss": 1.1324, "step": 2149 }, { "epoch": 0.4297743684565603, "grad_norm": 2.21875, "learning_rate": 9.540528347622123e-06, "loss": 1.0483, "step": 2150 }, { "epoch": 0.4299742635116564, "grad_norm": 2.15625, "learning_rate": 9.540086976854274e-06, "loss": 1.0794, "step": 2151 }, { "epoch": 0.43017415856675245, "grad_norm": 2.1875, "learning_rate": 9.539645404416249e-06, "loss": 1.108, "step": 2152 }, { "epoch": 0.43037405362184855, "grad_norm": 2.03125, "learning_rate": 9.53920363032766e-06, "loss": 0.9816, "step": 2153 }, { "epoch": 0.4305739486769446, "grad_norm": 2.078125, "learning_rate": 9.538761654608128e-06, "loss": 1.0624, "step": 2154 }, { "epoch": 0.4307738437320407, "grad_norm": 2.0, "learning_rate": 9.53831947727729e-06, "loss": 1.0004, "step": 2155 }, { "epoch": 0.43097373878713674, "grad_norm": 2.109375, "learning_rate": 9.537877098354787e-06, "loss": 1.0317, "step": 2156 }, { "epoch": 0.43117363384223284, "grad_norm": 2.1875, "learning_rate": 9.537434517860265e-06, "loss": 1.058, "step": 2157 }, { "epoch": 0.4313735288973289, "grad_norm": 2.078125, "learning_rate": 9.536991735813388e-06, "loss": 1.0414, "step": 2158 }, { "epoch": 0.431573423952425, "grad_norm": 2.1875, "learning_rate": 9.536548752233822e-06, "loss": 1.1674, "step": 2159 }, { "epoch": 0.43177331900752103, "grad_norm": 2.109375, "learning_rate": 9.536105567141246e-06, "loss": 1.0639, "step": 2160 }, { "epoch": 0.43197321406261713, "grad_norm": 2.15625, "learning_rate": 9.535662180555342e-06, "loss": 1.0177, "step": 2161 }, { "epoch": 0.43217310911771323, "grad_norm": 2.171875, "learning_rate": 9.535218592495812e-06, "loss": 1.145, "step": 2162 }, { "epoch": 0.43237300417280927, "grad_norm": 2.15625, "learning_rate": 9.534774802982356e-06, "loss": 1.0808, "step": 2163 }, { "epoch": 0.43257289922790537, "grad_norm": 2.140625, "learning_rate": 9.534330812034686e-06, "loss": 1.0745, "step": 2164 }, { "epoch": 0.4327727942830014, "grad_norm": 2.046875, "learning_rate": 9.533886619672527e-06, "loss": 0.9411, "step": 2165 }, { "epoch": 0.4329726893380975, "grad_norm": 2.078125, "learning_rate": 9.533442225915607e-06, "loss": 1.0548, "step": 2166 }, { "epoch": 0.43317258439319356, "grad_norm": 2.0625, "learning_rate": 9.532997630783669e-06, "loss": 1.0255, "step": 2167 }, { "epoch": 0.43337247944828966, "grad_norm": 2.25, "learning_rate": 9.53255283429646e-06, "loss": 1.0143, "step": 2168 }, { "epoch": 0.4335723745033857, "grad_norm": 2.21875, "learning_rate": 9.532107836473739e-06, "loss": 1.0807, "step": 2169 }, { "epoch": 0.4337722695584818, "grad_norm": 2.203125, "learning_rate": 9.53166263733527e-06, "loss": 1.1443, "step": 2170 }, { "epoch": 0.43397216461357785, "grad_norm": 2.25, "learning_rate": 9.531217236900832e-06, "loss": 1.1225, "step": 2171 }, { "epoch": 0.43417205966867395, "grad_norm": 2.171875, "learning_rate": 9.530771635190208e-06, "loss": 1.0479, "step": 2172 }, { "epoch": 0.43437195472377005, "grad_norm": 2.078125, "learning_rate": 9.53032583222319e-06, "loss": 1.0765, "step": 2173 }, { "epoch": 0.4345718497788661, "grad_norm": 2.203125, "learning_rate": 9.529879828019586e-06, "loss": 1.1234, "step": 2174 }, { "epoch": 0.4347717448339622, "grad_norm": 2.125, "learning_rate": 9.5294336225992e-06, "loss": 1.088, "step": 2175 }, { "epoch": 0.43497163988905824, "grad_norm": 2.09375, "learning_rate": 9.528987215981859e-06, "loss": 1.065, "step": 2176 }, { "epoch": 0.43517153494415434, "grad_norm": 2.109375, "learning_rate": 9.52854060818739e-06, "loss": 1.0823, "step": 2177 }, { "epoch": 0.4353714299992504, "grad_norm": 2.109375, "learning_rate": 9.528093799235629e-06, "loss": 1.0597, "step": 2178 }, { "epoch": 0.4355713250543465, "grad_norm": 2.140625, "learning_rate": 9.527646789146427e-06, "loss": 1.0214, "step": 2179 }, { "epoch": 0.4357712201094425, "grad_norm": 2.171875, "learning_rate": 9.527199577939636e-06, "loss": 1.0689, "step": 2180 }, { "epoch": 0.4359711151645386, "grad_norm": 2.078125, "learning_rate": 9.526752165635125e-06, "loss": 1.0881, "step": 2181 }, { "epoch": 0.43617101021963467, "grad_norm": 1.9765625, "learning_rate": 9.526304552252766e-06, "loss": 0.9681, "step": 2182 }, { "epoch": 0.43637090527473077, "grad_norm": 2.1875, "learning_rate": 9.52585673781244e-06, "loss": 1.0983, "step": 2183 }, { "epoch": 0.4365708003298268, "grad_norm": 2.0625, "learning_rate": 9.525408722334045e-06, "loss": 1.1078, "step": 2184 }, { "epoch": 0.4367706953849229, "grad_norm": 2.015625, "learning_rate": 9.524960505837475e-06, "loss": 0.992, "step": 2185 }, { "epoch": 0.436970590440019, "grad_norm": 1.96875, "learning_rate": 9.524512088342642e-06, "loss": 1.0459, "step": 2186 }, { "epoch": 0.43717048549511506, "grad_norm": 2.078125, "learning_rate": 9.524063469869467e-06, "loss": 1.0761, "step": 2187 }, { "epoch": 0.43737038055021116, "grad_norm": 2.171875, "learning_rate": 9.523614650437876e-06, "loss": 1.1383, "step": 2188 }, { "epoch": 0.4375702756053072, "grad_norm": 2.171875, "learning_rate": 9.523165630067804e-06, "loss": 1.0518, "step": 2189 }, { "epoch": 0.4377701706604033, "grad_norm": 2.109375, "learning_rate": 9.522716408779198e-06, "loss": 1.0729, "step": 2190 }, { "epoch": 0.43797006571549935, "grad_norm": 2.15625, "learning_rate": 9.522266986592012e-06, "loss": 1.0924, "step": 2191 }, { "epoch": 0.43816996077059545, "grad_norm": 2.25, "learning_rate": 9.521817363526211e-06, "loss": 1.0138, "step": 2192 }, { "epoch": 0.4383698558256915, "grad_norm": 2.0625, "learning_rate": 9.521367539601766e-06, "loss": 1.0787, "step": 2193 }, { "epoch": 0.4385697508807876, "grad_norm": 1.9921875, "learning_rate": 9.520917514838657e-06, "loss": 0.9883, "step": 2194 }, { "epoch": 0.43876964593588363, "grad_norm": 2.03125, "learning_rate": 9.520467289256874e-06, "loss": 0.9786, "step": 2195 }, { "epoch": 0.43896954099097973, "grad_norm": 2.109375, "learning_rate": 9.520016862876416e-06, "loss": 1.03, "step": 2196 }, { "epoch": 0.43916943604607583, "grad_norm": 2.140625, "learning_rate": 9.519566235717295e-06, "loss": 1.0743, "step": 2197 }, { "epoch": 0.4393693311011719, "grad_norm": 2.09375, "learning_rate": 9.519115407799523e-06, "loss": 1.0433, "step": 2198 }, { "epoch": 0.439569226156268, "grad_norm": 2.140625, "learning_rate": 9.518664379143125e-06, "loss": 1.0033, "step": 2199 }, { "epoch": 0.439769121211364, "grad_norm": 2.0, "learning_rate": 9.518213149768141e-06, "loss": 1.0444, "step": 2200 }, { "epoch": 0.4399690162664601, "grad_norm": 2.046875, "learning_rate": 9.517761719694609e-06, "loss": 1.0285, "step": 2201 }, { "epoch": 0.44016891132155617, "grad_norm": 2.28125, "learning_rate": 9.517310088942585e-06, "loss": 1.1743, "step": 2202 }, { "epoch": 0.44036880637665227, "grad_norm": 1.9921875, "learning_rate": 9.516858257532128e-06, "loss": 0.973, "step": 2203 }, { "epoch": 0.4405687014317483, "grad_norm": 2.234375, "learning_rate": 9.516406225483312e-06, "loss": 1.074, "step": 2204 }, { "epoch": 0.4407685964868444, "grad_norm": 2.046875, "learning_rate": 9.515953992816215e-06, "loss": 1.0519, "step": 2205 }, { "epoch": 0.44096849154194045, "grad_norm": 2.125, "learning_rate": 9.515501559550919e-06, "loss": 1.0592, "step": 2206 }, { "epoch": 0.44116838659703655, "grad_norm": 2.09375, "learning_rate": 9.51504892570753e-06, "loss": 1.0736, "step": 2207 }, { "epoch": 0.44136828165213265, "grad_norm": 2.078125, "learning_rate": 9.514596091306148e-06, "loss": 1.1223, "step": 2208 }, { "epoch": 0.4415681767072287, "grad_norm": 2.09375, "learning_rate": 9.514143056366892e-06, "loss": 1.0031, "step": 2209 }, { "epoch": 0.4417680717623248, "grad_norm": 2.0, "learning_rate": 9.513689820909882e-06, "loss": 1.0427, "step": 2210 }, { "epoch": 0.44196796681742084, "grad_norm": 2.078125, "learning_rate": 9.51323638495525e-06, "loss": 1.0908, "step": 2211 }, { "epoch": 0.44216786187251694, "grad_norm": 2.0, "learning_rate": 9.512782748523143e-06, "loss": 0.9979, "step": 2212 }, { "epoch": 0.442367756927613, "grad_norm": 2.140625, "learning_rate": 9.512328911633708e-06, "loss": 1.104, "step": 2213 }, { "epoch": 0.4425676519827091, "grad_norm": 2.171875, "learning_rate": 9.511874874307103e-06, "loss": 1.0095, "step": 2214 }, { "epoch": 0.44276754703780513, "grad_norm": 2.09375, "learning_rate": 9.5114206365635e-06, "loss": 1.1195, "step": 2215 }, { "epoch": 0.44296744209290123, "grad_norm": 2.109375, "learning_rate": 9.510966198423073e-06, "loss": 1.0611, "step": 2216 }, { "epoch": 0.4431673371479973, "grad_norm": 2.140625, "learning_rate": 9.51051155990601e-06, "loss": 1.0874, "step": 2217 }, { "epoch": 0.4433672322030934, "grad_norm": 2.109375, "learning_rate": 9.510056721032504e-06, "loss": 1.1258, "step": 2218 }, { "epoch": 0.4435671272581895, "grad_norm": 2.0625, "learning_rate": 9.509601681822761e-06, "loss": 1.0456, "step": 2219 }, { "epoch": 0.4437670223132855, "grad_norm": 2.125, "learning_rate": 9.509146442296992e-06, "loss": 1.048, "step": 2220 }, { "epoch": 0.4439669173683816, "grad_norm": 2.109375, "learning_rate": 9.508691002475421e-06, "loss": 1.1, "step": 2221 }, { "epoch": 0.44416681242347766, "grad_norm": 2.140625, "learning_rate": 9.508235362378278e-06, "loss": 1.1121, "step": 2222 }, { "epoch": 0.44436670747857376, "grad_norm": 2.125, "learning_rate": 9.507779522025799e-06, "loss": 0.9907, "step": 2223 }, { "epoch": 0.4445666025336698, "grad_norm": 2.265625, "learning_rate": 9.507323481438236e-06, "loss": 1.071, "step": 2224 }, { "epoch": 0.4447664975887659, "grad_norm": 2.09375, "learning_rate": 9.506867240635847e-06, "loss": 1.0096, "step": 2225 }, { "epoch": 0.44496639264386195, "grad_norm": 2.125, "learning_rate": 9.506410799638894e-06, "loss": 1.1819, "step": 2226 }, { "epoch": 0.44516628769895805, "grad_norm": 2.09375, "learning_rate": 9.505954158467656e-06, "loss": 1.1682, "step": 2227 }, { "epoch": 0.4453661827540541, "grad_norm": 2.046875, "learning_rate": 9.505497317142416e-06, "loss": 1.0369, "step": 2228 }, { "epoch": 0.4455660778091502, "grad_norm": 2.09375, "learning_rate": 9.505040275683465e-06, "loss": 1.1164, "step": 2229 }, { "epoch": 0.4457659728642463, "grad_norm": 2.21875, "learning_rate": 9.504583034111108e-06, "loss": 1.1338, "step": 2230 }, { "epoch": 0.44596586791934234, "grad_norm": 2.09375, "learning_rate": 9.504125592445653e-06, "loss": 1.074, "step": 2231 }, { "epoch": 0.44616576297443844, "grad_norm": 2.140625, "learning_rate": 9.50366795070742e-06, "loss": 1.0827, "step": 2232 }, { "epoch": 0.4463656580295345, "grad_norm": 2.125, "learning_rate": 9.503210108916736e-06, "loss": 0.9933, "step": 2233 }, { "epoch": 0.4465655530846306, "grad_norm": 2.140625, "learning_rate": 9.502752067093942e-06, "loss": 1.0505, "step": 2234 }, { "epoch": 0.44676544813972663, "grad_norm": 2.09375, "learning_rate": 9.50229382525938e-06, "loss": 1.0627, "step": 2235 }, { "epoch": 0.44696534319482273, "grad_norm": 2.15625, "learning_rate": 9.50183538343341e-06, "loss": 1.0896, "step": 2236 }, { "epoch": 0.4471652382499188, "grad_norm": 2.21875, "learning_rate": 9.501376741636392e-06, "loss": 1.0012, "step": 2237 }, { "epoch": 0.4473651333050149, "grad_norm": 2.109375, "learning_rate": 9.5009178998887e-06, "loss": 1.1222, "step": 2238 }, { "epoch": 0.4475650283601109, "grad_norm": 2.234375, "learning_rate": 9.500458858210714e-06, "loss": 1.1579, "step": 2239 }, { "epoch": 0.447764923415207, "grad_norm": 2.21875, "learning_rate": 9.499999616622828e-06, "loss": 1.105, "step": 2240 }, { "epoch": 0.4479648184703031, "grad_norm": 2.109375, "learning_rate": 9.499540175145438e-06, "loss": 1.0628, "step": 2241 }, { "epoch": 0.44816471352539916, "grad_norm": 2.015625, "learning_rate": 9.499080533798956e-06, "loss": 0.9828, "step": 2242 }, { "epoch": 0.44836460858049526, "grad_norm": 2.171875, "learning_rate": 9.498620692603797e-06, "loss": 1.0454, "step": 2243 }, { "epoch": 0.4485645036355913, "grad_norm": 2.21875, "learning_rate": 9.498160651580387e-06, "loss": 1.063, "step": 2244 }, { "epoch": 0.4487643986906874, "grad_norm": 2.0, "learning_rate": 9.49770041074916e-06, "loss": 1.0674, "step": 2245 }, { "epoch": 0.44896429374578345, "grad_norm": 2.15625, "learning_rate": 9.497239970130561e-06, "loss": 0.9574, "step": 2246 }, { "epoch": 0.44916418880087955, "grad_norm": 2.171875, "learning_rate": 9.496779329745045e-06, "loss": 1.0144, "step": 2247 }, { "epoch": 0.4493640838559756, "grad_norm": 2.25, "learning_rate": 9.49631848961307e-06, "loss": 1.1661, "step": 2248 }, { "epoch": 0.4495639789110717, "grad_norm": 2.109375, "learning_rate": 9.495857449755109e-06, "loss": 0.9927, "step": 2249 }, { "epoch": 0.44976387396616774, "grad_norm": 2.125, "learning_rate": 9.495396210191639e-06, "loss": 1.0636, "step": 2250 }, { "epoch": 0.44996376902126384, "grad_norm": 2.109375, "learning_rate": 9.49493477094315e-06, "loss": 1.0666, "step": 2251 }, { "epoch": 0.45016366407635994, "grad_norm": 2.09375, "learning_rate": 9.494473132030137e-06, "loss": 1.0508, "step": 2252 }, { "epoch": 0.450363559131456, "grad_norm": 2.125, "learning_rate": 9.494011293473109e-06, "loss": 1.0578, "step": 2253 }, { "epoch": 0.4505634541865521, "grad_norm": 2.15625, "learning_rate": 9.49354925529258e-06, "loss": 1.0574, "step": 2254 }, { "epoch": 0.4507633492416481, "grad_norm": 2.140625, "learning_rate": 9.493087017509072e-06, "loss": 1.1154, "step": 2255 }, { "epoch": 0.4509632442967442, "grad_norm": 2.1875, "learning_rate": 9.49262458014312e-06, "loss": 1.1417, "step": 2256 }, { "epoch": 0.45116313935184027, "grad_norm": 2.046875, "learning_rate": 9.492161943215262e-06, "loss": 1.1611, "step": 2257 }, { "epoch": 0.45136303440693637, "grad_norm": 2.03125, "learning_rate": 9.491699106746051e-06, "loss": 1.0836, "step": 2258 }, { "epoch": 0.4515629294620324, "grad_norm": 2.125, "learning_rate": 9.491236070756045e-06, "loss": 1.1178, "step": 2259 }, { "epoch": 0.4517628245171285, "grad_norm": 2.109375, "learning_rate": 9.490772835265814e-06, "loss": 1.0072, "step": 2260 }, { "epoch": 0.45196271957222456, "grad_norm": 2.171875, "learning_rate": 9.490309400295932e-06, "loss": 1.0635, "step": 2261 }, { "epoch": 0.45216261462732066, "grad_norm": 2.1875, "learning_rate": 9.489845765866986e-06, "loss": 0.9716, "step": 2262 }, { "epoch": 0.45236250968241676, "grad_norm": 2.09375, "learning_rate": 9.48938193199957e-06, "loss": 1.0294, "step": 2263 }, { "epoch": 0.4525624047375128, "grad_norm": 2.28125, "learning_rate": 9.48891789871429e-06, "loss": 1.0198, "step": 2264 }, { "epoch": 0.4527622997926089, "grad_norm": 2.046875, "learning_rate": 9.488453666031755e-06, "loss": 1.0776, "step": 2265 }, { "epoch": 0.45296219484770495, "grad_norm": 2.0, "learning_rate": 9.487989233972587e-06, "loss": 1.0584, "step": 2266 }, { "epoch": 0.45316208990280105, "grad_norm": 2.109375, "learning_rate": 9.487524602557417e-06, "loss": 1.0575, "step": 2267 }, { "epoch": 0.4533619849578971, "grad_norm": 2.125, "learning_rate": 9.487059771806883e-06, "loss": 1.0741, "step": 2268 }, { "epoch": 0.4535618800129932, "grad_norm": 2.15625, "learning_rate": 9.486594741741634e-06, "loss": 1.0547, "step": 2269 }, { "epoch": 0.45376177506808923, "grad_norm": 2.21875, "learning_rate": 9.486129512382327e-06, "loss": 1.0859, "step": 2270 }, { "epoch": 0.45396167012318533, "grad_norm": 2.03125, "learning_rate": 9.485664083749623e-06, "loss": 1.1005, "step": 2271 }, { "epoch": 0.4541615651782814, "grad_norm": 2.21875, "learning_rate": 9.485198455864203e-06, "loss": 1.2026, "step": 2272 }, { "epoch": 0.4543614602333775, "grad_norm": 2.203125, "learning_rate": 9.484732628746744e-06, "loss": 1.0519, "step": 2273 }, { "epoch": 0.4545613552884736, "grad_norm": 2.21875, "learning_rate": 9.484266602417942e-06, "loss": 1.0333, "step": 2274 }, { "epoch": 0.4547612503435696, "grad_norm": 2.15625, "learning_rate": 9.483800376898496e-06, "loss": 1.1083, "step": 2275 }, { "epoch": 0.4549611453986657, "grad_norm": 2.125, "learning_rate": 9.483333952209118e-06, "loss": 1.071, "step": 2276 }, { "epoch": 0.45516104045376177, "grad_norm": 2.03125, "learning_rate": 9.482867328370521e-06, "loss": 0.9757, "step": 2277 }, { "epoch": 0.45536093550885787, "grad_norm": 2.265625, "learning_rate": 9.482400505403439e-06, "loss": 1.1338, "step": 2278 }, { "epoch": 0.4555608305639539, "grad_norm": 2.15625, "learning_rate": 9.481933483328604e-06, "loss": 1.1481, "step": 2279 }, { "epoch": 0.45576072561905, "grad_norm": 2.125, "learning_rate": 9.481466262166763e-06, "loss": 0.9722, "step": 2280 }, { "epoch": 0.45596062067414606, "grad_norm": 2.09375, "learning_rate": 9.480998841938668e-06, "loss": 1.0343, "step": 2281 }, { "epoch": 0.45616051572924216, "grad_norm": 2.171875, "learning_rate": 9.480531222665084e-06, "loss": 1.089, "step": 2282 }, { "epoch": 0.4563604107843382, "grad_norm": 2.03125, "learning_rate": 9.480063404366781e-06, "loss": 1.1038, "step": 2283 }, { "epoch": 0.4565603058394343, "grad_norm": 2.15625, "learning_rate": 9.479595387064542e-06, "loss": 1.0458, "step": 2284 }, { "epoch": 0.4567602008945304, "grad_norm": 2.3125, "learning_rate": 9.479127170779151e-06, "loss": 1.1301, "step": 2285 }, { "epoch": 0.45696009594962644, "grad_norm": 2.203125, "learning_rate": 9.478658755531413e-06, "loss": 1.1147, "step": 2286 }, { "epoch": 0.45715999100472254, "grad_norm": 2.15625, "learning_rate": 9.47819014134213e-06, "loss": 1.0767, "step": 2287 }, { "epoch": 0.4573598860598186, "grad_norm": 2.171875, "learning_rate": 9.47772132823212e-06, "loss": 1.1587, "step": 2288 }, { "epoch": 0.4575597811149147, "grad_norm": 2.09375, "learning_rate": 9.477252316222204e-06, "loss": 1.1138, "step": 2289 }, { "epoch": 0.45775967617001073, "grad_norm": 2.125, "learning_rate": 9.47678310533322e-06, "loss": 0.9616, "step": 2290 }, { "epoch": 0.45795957122510683, "grad_norm": 2.015625, "learning_rate": 9.47631369558601e-06, "loss": 1.029, "step": 2291 }, { "epoch": 0.4581594662802029, "grad_norm": 2.265625, "learning_rate": 9.475844087001423e-06, "loss": 1.0689, "step": 2292 }, { "epoch": 0.458359361335299, "grad_norm": 2.25, "learning_rate": 9.475374279600317e-06, "loss": 1.1476, "step": 2293 }, { "epoch": 0.458559256390395, "grad_norm": 2.21875, "learning_rate": 9.474904273403567e-06, "loss": 1.0776, "step": 2294 }, { "epoch": 0.4587591514454911, "grad_norm": 2.15625, "learning_rate": 9.474434068432046e-06, "loss": 1.0983, "step": 2295 }, { "epoch": 0.45895904650058716, "grad_norm": 2.234375, "learning_rate": 9.47396366470664e-06, "loss": 1.0944, "step": 2296 }, { "epoch": 0.45915894155568326, "grad_norm": 2.109375, "learning_rate": 9.473493062248247e-06, "loss": 1.0984, "step": 2297 }, { "epoch": 0.45935883661077936, "grad_norm": 2.09375, "learning_rate": 9.473022261077771e-06, "loss": 0.881, "step": 2298 }, { "epoch": 0.4595587316658754, "grad_norm": 1.8984375, "learning_rate": 9.472551261216124e-06, "loss": 0.9679, "step": 2299 }, { "epoch": 0.4597586267209715, "grad_norm": 2.265625, "learning_rate": 9.472080062684225e-06, "loss": 1.0331, "step": 2300 }, { "epoch": 0.45995852177606755, "grad_norm": 2.171875, "learning_rate": 9.471608665503008e-06, "loss": 1.1127, "step": 2301 }, { "epoch": 0.46015841683116365, "grad_norm": 2.03125, "learning_rate": 9.471137069693415e-06, "loss": 1.1487, "step": 2302 }, { "epoch": 0.4603583118862597, "grad_norm": 2.09375, "learning_rate": 9.470665275276387e-06, "loss": 1.0939, "step": 2303 }, { "epoch": 0.4605582069413558, "grad_norm": 2.171875, "learning_rate": 9.470193282272886e-06, "loss": 1.0531, "step": 2304 }, { "epoch": 0.46075810199645184, "grad_norm": 2.15625, "learning_rate": 9.469721090703879e-06, "loss": 1.0568, "step": 2305 }, { "epoch": 0.46095799705154794, "grad_norm": 2.15625, "learning_rate": 9.469248700590336e-06, "loss": 1.0505, "step": 2306 }, { "epoch": 0.461157892106644, "grad_norm": 2.09375, "learning_rate": 9.468776111953243e-06, "loss": 1.0995, "step": 2307 }, { "epoch": 0.4613577871617401, "grad_norm": 2.1875, "learning_rate": 9.468303324813595e-06, "loss": 1.1443, "step": 2308 }, { "epoch": 0.4615576822168362, "grad_norm": 2.15625, "learning_rate": 9.467830339192387e-06, "loss": 1.101, "step": 2309 }, { "epoch": 0.46175757727193223, "grad_norm": 2.09375, "learning_rate": 9.467357155110636e-06, "loss": 1.08, "step": 2310 }, { "epoch": 0.46195747232702833, "grad_norm": 2.234375, "learning_rate": 9.466883772589355e-06, "loss": 1.0662, "step": 2311 }, { "epoch": 0.4621573673821244, "grad_norm": 2.171875, "learning_rate": 9.466410191649575e-06, "loss": 1.0828, "step": 2312 }, { "epoch": 0.4623572624372205, "grad_norm": 2.125, "learning_rate": 9.46593641231233e-06, "loss": 1.0318, "step": 2313 }, { "epoch": 0.4625571574923165, "grad_norm": 2.109375, "learning_rate": 9.465462434598669e-06, "loss": 1.0425, "step": 2314 }, { "epoch": 0.4627570525474126, "grad_norm": 2.03125, "learning_rate": 9.464988258529642e-06, "loss": 1.0506, "step": 2315 }, { "epoch": 0.46295694760250866, "grad_norm": 2.125, "learning_rate": 9.464513884126312e-06, "loss": 1.1342, "step": 2316 }, { "epoch": 0.46315684265760476, "grad_norm": 2.015625, "learning_rate": 9.464039311409753e-06, "loss": 1.0277, "step": 2317 }, { "epoch": 0.4633567377127008, "grad_norm": 2.0625, "learning_rate": 9.463564540401046e-06, "loss": 1.0571, "step": 2318 }, { "epoch": 0.4635566327677969, "grad_norm": 2.15625, "learning_rate": 9.463089571121278e-06, "loss": 1.1149, "step": 2319 }, { "epoch": 0.463756527822893, "grad_norm": 2.078125, "learning_rate": 9.462614403591548e-06, "loss": 1.0607, "step": 2320 }, { "epoch": 0.46395642287798905, "grad_norm": 2.0625, "learning_rate": 9.462139037832963e-06, "loss": 1.0649, "step": 2321 }, { "epoch": 0.46415631793308515, "grad_norm": 2.375, "learning_rate": 9.461663473866638e-06, "loss": 1.1467, "step": 2322 }, { "epoch": 0.4643562129881812, "grad_norm": 2.140625, "learning_rate": 9.461187711713697e-06, "loss": 1.1511, "step": 2323 }, { "epoch": 0.4645561080432773, "grad_norm": 2.1875, "learning_rate": 9.460711751395276e-06, "loss": 1.0512, "step": 2324 }, { "epoch": 0.46475600309837334, "grad_norm": 2.125, "learning_rate": 9.460235592932515e-06, "loss": 1.0769, "step": 2325 }, { "epoch": 0.46495589815346944, "grad_norm": 2.140625, "learning_rate": 9.459759236346565e-06, "loss": 1.1492, "step": 2326 }, { "epoch": 0.4651557932085655, "grad_norm": 2.140625, "learning_rate": 9.459282681658585e-06, "loss": 1.0587, "step": 2327 }, { "epoch": 0.4653556882636616, "grad_norm": 2.140625, "learning_rate": 9.458805928889747e-06, "loss": 1.0467, "step": 2328 }, { "epoch": 0.4655555833187576, "grad_norm": 2.296875, "learning_rate": 9.458328978061225e-06, "loss": 1.0781, "step": 2329 }, { "epoch": 0.4657554783738537, "grad_norm": 2.21875, "learning_rate": 9.457851829194205e-06, "loss": 1.0948, "step": 2330 }, { "epoch": 0.4659553734289498, "grad_norm": 2.171875, "learning_rate": 9.457374482309885e-06, "loss": 1.141, "step": 2331 }, { "epoch": 0.46615526848404587, "grad_norm": 2.15625, "learning_rate": 9.456896937429465e-06, "loss": 0.9686, "step": 2332 }, { "epoch": 0.46635516353914197, "grad_norm": 2.125, "learning_rate": 9.456419194574158e-06, "loss": 1.0933, "step": 2333 }, { "epoch": 0.466555058594238, "grad_norm": 2.125, "learning_rate": 9.455941253765188e-06, "loss": 1.048, "step": 2334 }, { "epoch": 0.4667549536493341, "grad_norm": 2.140625, "learning_rate": 9.455463115023783e-06, "loss": 1.105, "step": 2335 }, { "epoch": 0.46695484870443016, "grad_norm": 1.9453125, "learning_rate": 9.45498477837118e-06, "loss": 1.0048, "step": 2336 }, { "epoch": 0.46715474375952626, "grad_norm": 2.171875, "learning_rate": 9.454506243828633e-06, "loss": 0.9989, "step": 2337 }, { "epoch": 0.4673546388146223, "grad_norm": 2.25, "learning_rate": 9.454027511417392e-06, "loss": 1.0726, "step": 2338 }, { "epoch": 0.4675545338697184, "grad_norm": 2.046875, "learning_rate": 9.453548581158726e-06, "loss": 1.1318, "step": 2339 }, { "epoch": 0.46775442892481445, "grad_norm": 2.046875, "learning_rate": 9.453069453073906e-06, "loss": 1.064, "step": 2340 }, { "epoch": 0.46795432397991055, "grad_norm": 2.015625, "learning_rate": 9.452590127184217e-06, "loss": 1.0699, "step": 2341 }, { "epoch": 0.46815421903500665, "grad_norm": 2.09375, "learning_rate": 9.45211060351095e-06, "loss": 1.0564, "step": 2342 }, { "epoch": 0.4683541140901027, "grad_norm": 2.1875, "learning_rate": 9.451630882075407e-06, "loss": 1.086, "step": 2343 }, { "epoch": 0.4685540091451988, "grad_norm": 2.21875, "learning_rate": 9.451150962898894e-06, "loss": 1.1009, "step": 2344 }, { "epoch": 0.46875390420029484, "grad_norm": 2.21875, "learning_rate": 9.450670846002732e-06, "loss": 1.1218, "step": 2345 }, { "epoch": 0.46895379925539094, "grad_norm": 2.203125, "learning_rate": 9.450190531408245e-06, "loss": 1.067, "step": 2346 }, { "epoch": 0.469153694310487, "grad_norm": 2.0625, "learning_rate": 9.44971001913677e-06, "loss": 1.0177, "step": 2347 }, { "epoch": 0.4693535893655831, "grad_norm": 2.03125, "learning_rate": 9.449229309209654e-06, "loss": 0.9842, "step": 2348 }, { "epoch": 0.4695534844206791, "grad_norm": 2.25, "learning_rate": 9.448748401648244e-06, "loss": 1.0315, "step": 2349 }, { "epoch": 0.4697533794757752, "grad_norm": 2.234375, "learning_rate": 9.448267296473905e-06, "loss": 1.1507, "step": 2350 }, { "epoch": 0.46995327453087127, "grad_norm": 1.9609375, "learning_rate": 9.44778599370801e-06, "loss": 1.0273, "step": 2351 }, { "epoch": 0.47015316958596737, "grad_norm": 2.125, "learning_rate": 9.447304493371934e-06, "loss": 1.0988, "step": 2352 }, { "epoch": 0.47035306464106347, "grad_norm": 2.03125, "learning_rate": 9.44682279548707e-06, "loss": 1.1306, "step": 2353 }, { "epoch": 0.4705529596961595, "grad_norm": 2.015625, "learning_rate": 9.44634090007481e-06, "loss": 0.9867, "step": 2354 }, { "epoch": 0.4707528547512556, "grad_norm": 2.046875, "learning_rate": 9.445858807156563e-06, "loss": 1.0614, "step": 2355 }, { "epoch": 0.47095274980635166, "grad_norm": 2.015625, "learning_rate": 9.445376516753743e-06, "loss": 0.9955, "step": 2356 }, { "epoch": 0.47115264486144776, "grad_norm": 2.1875, "learning_rate": 9.444894028887773e-06, "loss": 1.0919, "step": 2357 }, { "epoch": 0.4713525399165438, "grad_norm": 2.078125, "learning_rate": 9.444411343580083e-06, "loss": 0.9668, "step": 2358 }, { "epoch": 0.4715524349716399, "grad_norm": 2.015625, "learning_rate": 9.443928460852118e-06, "loss": 1.089, "step": 2359 }, { "epoch": 0.47175233002673594, "grad_norm": 2.375, "learning_rate": 9.443445380725324e-06, "loss": 1.1675, "step": 2360 }, { "epoch": 0.47195222508183204, "grad_norm": 2.203125, "learning_rate": 9.442962103221161e-06, "loss": 1.0798, "step": 2361 }, { "epoch": 0.4721521201369281, "grad_norm": 2.109375, "learning_rate": 9.442478628361098e-06, "loss": 1.0142, "step": 2362 }, { "epoch": 0.4723520151920242, "grad_norm": 2.09375, "learning_rate": 9.441994956166607e-06, "loss": 1.0302, "step": 2363 }, { "epoch": 0.4725519102471203, "grad_norm": 2.03125, "learning_rate": 9.441511086659175e-06, "loss": 1.0224, "step": 2364 }, { "epoch": 0.47275180530221633, "grad_norm": 2.1875, "learning_rate": 9.441027019860294e-06, "loss": 1.013, "step": 2365 }, { "epoch": 0.47295170035731243, "grad_norm": 2.234375, "learning_rate": 9.440542755791467e-06, "loss": 1.1531, "step": 2366 }, { "epoch": 0.4731515954124085, "grad_norm": 2.140625, "learning_rate": 9.440058294474206e-06, "loss": 1.1263, "step": 2367 }, { "epoch": 0.4733514904675046, "grad_norm": 2.171875, "learning_rate": 9.439573635930029e-06, "loss": 1.2652, "step": 2368 }, { "epoch": 0.4735513855226006, "grad_norm": 2.03125, "learning_rate": 9.439088780180465e-06, "loss": 1.0479, "step": 2369 }, { "epoch": 0.4737512805776967, "grad_norm": 2.015625, "learning_rate": 9.438603727247053e-06, "loss": 0.9835, "step": 2370 }, { "epoch": 0.47395117563279276, "grad_norm": 2.15625, "learning_rate": 9.438118477151336e-06, "loss": 1.105, "step": 2371 }, { "epoch": 0.47415107068788886, "grad_norm": 2.109375, "learning_rate": 9.43763302991487e-06, "loss": 1.094, "step": 2372 }, { "epoch": 0.4743509657429849, "grad_norm": 2.1875, "learning_rate": 9.43714738555922e-06, "loss": 1.0631, "step": 2373 }, { "epoch": 0.474550860798081, "grad_norm": 2.0625, "learning_rate": 9.436661544105958e-06, "loss": 1.0402, "step": 2374 }, { "epoch": 0.4747507558531771, "grad_norm": 2.109375, "learning_rate": 9.436175505576663e-06, "loss": 1.0733, "step": 2375 }, { "epoch": 0.47495065090827315, "grad_norm": 2.046875, "learning_rate": 9.435689269992924e-06, "loss": 0.9708, "step": 2376 }, { "epoch": 0.47515054596336925, "grad_norm": 2.21875, "learning_rate": 9.435202837376344e-06, "loss": 1.0567, "step": 2377 }, { "epoch": 0.4753504410184653, "grad_norm": 2.078125, "learning_rate": 9.434716207748527e-06, "loss": 1.0168, "step": 2378 }, { "epoch": 0.4755503360735614, "grad_norm": 2.140625, "learning_rate": 9.434229381131088e-06, "loss": 1.0316, "step": 2379 }, { "epoch": 0.47575023112865744, "grad_norm": 2.078125, "learning_rate": 9.433742357545655e-06, "loss": 1.0635, "step": 2380 }, { "epoch": 0.47595012618375354, "grad_norm": 2.0625, "learning_rate": 9.433255137013861e-06, "loss": 1.0482, "step": 2381 }, { "epoch": 0.4761500212388496, "grad_norm": 2.15625, "learning_rate": 9.432767719557345e-06, "loss": 1.0387, "step": 2382 }, { "epoch": 0.4763499162939457, "grad_norm": 2.140625, "learning_rate": 9.432280105197761e-06, "loss": 1.0403, "step": 2383 }, { "epoch": 0.47654981134904173, "grad_norm": 2.328125, "learning_rate": 9.43179229395677e-06, "loss": 1.186, "step": 2384 }, { "epoch": 0.47674970640413783, "grad_norm": 2.125, "learning_rate": 9.431304285856037e-06, "loss": 1.0996, "step": 2385 }, { "epoch": 0.47694960145923393, "grad_norm": 2.15625, "learning_rate": 9.43081608091724e-06, "loss": 1.0019, "step": 2386 }, { "epoch": 0.47714949651433, "grad_norm": 2.0625, "learning_rate": 9.430327679162068e-06, "loss": 1.0486, "step": 2387 }, { "epoch": 0.4773493915694261, "grad_norm": 2.03125, "learning_rate": 9.429839080612213e-06, "loss": 1.0941, "step": 2388 }, { "epoch": 0.4775492866245221, "grad_norm": 2.0625, "learning_rate": 9.429350285289378e-06, "loss": 1.1118, "step": 2389 }, { "epoch": 0.4777491816796182, "grad_norm": 2.21875, "learning_rate": 9.428861293215278e-06, "loss": 1.0182, "step": 2390 }, { "epoch": 0.47794907673471426, "grad_norm": 2.234375, "learning_rate": 9.428372104411632e-06, "loss": 1.0864, "step": 2391 }, { "epoch": 0.47814897178981036, "grad_norm": 2.09375, "learning_rate": 9.427882718900168e-06, "loss": 1.0834, "step": 2392 }, { "epoch": 0.4783488668449064, "grad_norm": 2.078125, "learning_rate": 9.42739313670263e-06, "loss": 1.0204, "step": 2393 }, { "epoch": 0.4785487619000025, "grad_norm": 2.0625, "learning_rate": 9.42690335784076e-06, "loss": 0.9799, "step": 2394 }, { "epoch": 0.47874865695509855, "grad_norm": 2.25, "learning_rate": 9.426413382336314e-06, "loss": 1.16, "step": 2395 }, { "epoch": 0.47894855201019465, "grad_norm": 2.1875, "learning_rate": 9.42592321021106e-06, "loss": 1.1539, "step": 2396 }, { "epoch": 0.4791484470652907, "grad_norm": 2.078125, "learning_rate": 9.42543284148677e-06, "loss": 1.108, "step": 2397 }, { "epoch": 0.4793483421203868, "grad_norm": 2.09375, "learning_rate": 9.424942276185226e-06, "loss": 1.0172, "step": 2398 }, { "epoch": 0.4795482371754829, "grad_norm": 2.109375, "learning_rate": 9.424451514328218e-06, "loss": 1.0551, "step": 2399 }, { "epoch": 0.47974813223057894, "grad_norm": 2.171875, "learning_rate": 9.423960555937546e-06, "loss": 1.054, "step": 2400 }, { "epoch": 0.47994802728567504, "grad_norm": 1.953125, "learning_rate": 9.423469401035019e-06, "loss": 0.9714, "step": 2401 }, { "epoch": 0.4801479223407711, "grad_norm": 2.15625, "learning_rate": 9.422978049642456e-06, "loss": 1.084, "step": 2402 }, { "epoch": 0.4803478173958672, "grad_norm": 2.171875, "learning_rate": 9.422486501781676e-06, "loss": 1.0797, "step": 2403 }, { "epoch": 0.4805477124509632, "grad_norm": 2.078125, "learning_rate": 9.421994757474522e-06, "loss": 1.0522, "step": 2404 }, { "epoch": 0.4807476075060593, "grad_norm": 2.171875, "learning_rate": 9.421502816742829e-06, "loss": 1.1385, "step": 2405 }, { "epoch": 0.48094750256115537, "grad_norm": 2.0625, "learning_rate": 9.421010679608455e-06, "loss": 1.0394, "step": 2406 }, { "epoch": 0.48114739761625147, "grad_norm": 2.09375, "learning_rate": 9.42051834609326e-06, "loss": 1.0059, "step": 2407 }, { "epoch": 0.4813472926713475, "grad_norm": 2.078125, "learning_rate": 9.420025816219111e-06, "loss": 1.1555, "step": 2408 }, { "epoch": 0.4815471877264436, "grad_norm": 2.140625, "learning_rate": 9.419533090007888e-06, "loss": 1.0863, "step": 2409 }, { "epoch": 0.4817470827815397, "grad_norm": 2.1875, "learning_rate": 9.419040167481477e-06, "loss": 1.1842, "step": 2410 }, { "epoch": 0.48194697783663576, "grad_norm": 2.15625, "learning_rate": 9.418547048661772e-06, "loss": 1.1178, "step": 2411 }, { "epoch": 0.48214687289173186, "grad_norm": 2.109375, "learning_rate": 9.418053733570682e-06, "loss": 1.0866, "step": 2412 }, { "epoch": 0.4823467679468279, "grad_norm": 2.078125, "learning_rate": 9.417560222230115e-06, "loss": 1.1088, "step": 2413 }, { "epoch": 0.482546663001924, "grad_norm": 2.15625, "learning_rate": 9.417066514661995e-06, "loss": 1.0889, "step": 2414 }, { "epoch": 0.48274655805702005, "grad_norm": 2.125, "learning_rate": 9.416572610888253e-06, "loss": 1.0295, "step": 2415 }, { "epoch": 0.48294645311211615, "grad_norm": 2.0, "learning_rate": 9.416078510930827e-06, "loss": 1.0452, "step": 2416 }, { "epoch": 0.4831463481672122, "grad_norm": 2.171875, "learning_rate": 9.415584214811665e-06, "loss": 1.0287, "step": 2417 }, { "epoch": 0.4833462432223083, "grad_norm": 2.109375, "learning_rate": 9.415089722552723e-06, "loss": 1.0204, "step": 2418 }, { "epoch": 0.48354613827740434, "grad_norm": 2.0625, "learning_rate": 9.414595034175968e-06, "loss": 1.031, "step": 2419 }, { "epoch": 0.48374603333250044, "grad_norm": 2.1875, "learning_rate": 9.414100149703373e-06, "loss": 1.1251, "step": 2420 }, { "epoch": 0.48394592838759654, "grad_norm": 1.9921875, "learning_rate": 9.413605069156921e-06, "loss": 1.0836, "step": 2421 }, { "epoch": 0.4841458234426926, "grad_norm": 2.015625, "learning_rate": 9.413109792558603e-06, "loss": 1.0684, "step": 2422 }, { "epoch": 0.4843457184977887, "grad_norm": 2.109375, "learning_rate": 9.41261431993042e-06, "loss": 1.091, "step": 2423 }, { "epoch": 0.4845456135528847, "grad_norm": 2.234375, "learning_rate": 9.41211865129438e-06, "loss": 1.1022, "step": 2424 }, { "epoch": 0.4847455086079808, "grad_norm": 2.1875, "learning_rate": 9.411622786672499e-06, "loss": 1.1228, "step": 2425 }, { "epoch": 0.48494540366307687, "grad_norm": 2.171875, "learning_rate": 9.411126726086807e-06, "loss": 1.1744, "step": 2426 }, { "epoch": 0.48514529871817297, "grad_norm": 2.03125, "learning_rate": 9.410630469559336e-06, "loss": 1.0278, "step": 2427 }, { "epoch": 0.485345193773269, "grad_norm": 1.96875, "learning_rate": 9.41013401711213e-06, "loss": 1.0577, "step": 2428 }, { "epoch": 0.4855450888283651, "grad_norm": 2.109375, "learning_rate": 9.409637368767244e-06, "loss": 1.0304, "step": 2429 }, { "epoch": 0.48574498388346116, "grad_norm": 2.09375, "learning_rate": 9.409140524546736e-06, "loss": 0.9207, "step": 2430 }, { "epoch": 0.48594487893855726, "grad_norm": 2.109375, "learning_rate": 9.408643484472676e-06, "loss": 1.0671, "step": 2431 }, { "epoch": 0.48614477399365336, "grad_norm": 2.09375, "learning_rate": 9.408146248567143e-06, "loss": 1.044, "step": 2432 }, { "epoch": 0.4863446690487494, "grad_norm": 2.25, "learning_rate": 9.407648816852226e-06, "loss": 1.0707, "step": 2433 }, { "epoch": 0.4865445641038455, "grad_norm": 2.015625, "learning_rate": 9.407151189350019e-06, "loss": 1.0925, "step": 2434 }, { "epoch": 0.48674445915894154, "grad_norm": 2.125, "learning_rate": 9.406653366082626e-06, "loss": 1.0222, "step": 2435 }, { "epoch": 0.48694435421403764, "grad_norm": 2.140625, "learning_rate": 9.406155347072162e-06, "loss": 1.0815, "step": 2436 }, { "epoch": 0.4871442492691337, "grad_norm": 2.25, "learning_rate": 9.405657132340746e-06, "loss": 1.0399, "step": 2437 }, { "epoch": 0.4873441443242298, "grad_norm": 2.125, "learning_rate": 9.405158721910514e-06, "loss": 1.0846, "step": 2438 }, { "epoch": 0.48754403937932583, "grad_norm": 2.09375, "learning_rate": 9.4046601158036e-06, "loss": 1.0979, "step": 2439 }, { "epoch": 0.48774393443442193, "grad_norm": 2.28125, "learning_rate": 9.404161314042155e-06, "loss": 1.0644, "step": 2440 }, { "epoch": 0.487943829489518, "grad_norm": 2.03125, "learning_rate": 9.403662316648335e-06, "loss": 1.0271, "step": 2441 }, { "epoch": 0.4881437245446141, "grad_norm": 2.109375, "learning_rate": 9.403163123644303e-06, "loss": 1.0608, "step": 2442 }, { "epoch": 0.4883436195997102, "grad_norm": 2.125, "learning_rate": 9.402663735052238e-06, "loss": 1.0577, "step": 2443 }, { "epoch": 0.4885435146548062, "grad_norm": 2.15625, "learning_rate": 9.402164150894318e-06, "loss": 1.1031, "step": 2444 }, { "epoch": 0.4887434097099023, "grad_norm": 2.296875, "learning_rate": 9.401664371192738e-06, "loss": 0.9917, "step": 2445 }, { "epoch": 0.48894330476499837, "grad_norm": 2.234375, "learning_rate": 9.401164395969697e-06, "loss": 1.1124, "step": 2446 }, { "epoch": 0.48914319982009447, "grad_norm": 2.09375, "learning_rate": 9.400664225247402e-06, "loss": 1.1387, "step": 2447 }, { "epoch": 0.4893430948751905, "grad_norm": 2.09375, "learning_rate": 9.400163859048073e-06, "loss": 1.0916, "step": 2448 }, { "epoch": 0.4895429899302866, "grad_norm": 2.0625, "learning_rate": 9.399663297393937e-06, "loss": 1.0525, "step": 2449 }, { "epoch": 0.48974288498538265, "grad_norm": 2.203125, "learning_rate": 9.399162540307225e-06, "loss": 1.0592, "step": 2450 }, { "epoch": 0.48994278004047875, "grad_norm": 2.046875, "learning_rate": 9.398661587810183e-06, "loss": 1.0823, "step": 2451 }, { "epoch": 0.4901426750955748, "grad_norm": 2.140625, "learning_rate": 9.398160439925064e-06, "loss": 0.9808, "step": 2452 }, { "epoch": 0.4903425701506709, "grad_norm": 2.25, "learning_rate": 9.397659096674128e-06, "loss": 1.1753, "step": 2453 }, { "epoch": 0.490542465205767, "grad_norm": 2.15625, "learning_rate": 9.397157558079644e-06, "loss": 1.1111, "step": 2454 }, { "epoch": 0.49074236026086304, "grad_norm": 2.15625, "learning_rate": 9.39665582416389e-06, "loss": 0.9791, "step": 2455 }, { "epoch": 0.49094225531595914, "grad_norm": 2.046875, "learning_rate": 9.396153894949155e-06, "loss": 1.0467, "step": 2456 }, { "epoch": 0.4911421503710552, "grad_norm": 2.125, "learning_rate": 9.395651770457735e-06, "loss": 1.0843, "step": 2457 }, { "epoch": 0.4913420454261513, "grad_norm": 2.140625, "learning_rate": 9.39514945071193e-06, "loss": 1.0924, "step": 2458 }, { "epoch": 0.49154194048124733, "grad_norm": 2.265625, "learning_rate": 9.394646935734057e-06, "loss": 1.1448, "step": 2459 }, { "epoch": 0.49174183553634343, "grad_norm": 2.25, "learning_rate": 9.394144225546436e-06, "loss": 1.0852, "step": 2460 }, { "epoch": 0.4919417305914395, "grad_norm": 2.25, "learning_rate": 9.393641320171398e-06, "loss": 1.1342, "step": 2461 }, { "epoch": 0.4921416256465356, "grad_norm": 2.1875, "learning_rate": 9.393138219631283e-06, "loss": 1.0038, "step": 2462 }, { "epoch": 0.4923415207016316, "grad_norm": 2.125, "learning_rate": 9.392634923948437e-06, "loss": 0.9803, "step": 2463 }, { "epoch": 0.4925414157567277, "grad_norm": 2.125, "learning_rate": 9.392131433145216e-06, "loss": 1.0426, "step": 2464 }, { "epoch": 0.4927413108118238, "grad_norm": 2.109375, "learning_rate": 9.391627747243986e-06, "loss": 1.0738, "step": 2465 }, { "epoch": 0.49294120586691986, "grad_norm": 2.0, "learning_rate": 9.391123866267121e-06, "loss": 0.9984, "step": 2466 }, { "epoch": 0.49314110092201596, "grad_norm": 2.125, "learning_rate": 9.390619790237003e-06, "loss": 1.0881, "step": 2467 }, { "epoch": 0.493340995977112, "grad_norm": 2.078125, "learning_rate": 9.390115519176022e-06, "loss": 1.0613, "step": 2468 }, { "epoch": 0.4935408910322081, "grad_norm": 2.109375, "learning_rate": 9.38961105310658e-06, "loss": 1.0991, "step": 2469 }, { "epoch": 0.49374078608730415, "grad_norm": 2.1875, "learning_rate": 9.389106392051083e-06, "loss": 1.0358, "step": 2470 }, { "epoch": 0.49394068114240025, "grad_norm": 2.125, "learning_rate": 9.388601536031949e-06, "loss": 1.0046, "step": 2471 }, { "epoch": 0.4941405761974963, "grad_norm": 2.171875, "learning_rate": 9.388096485071603e-06, "loss": 1.0651, "step": 2472 }, { "epoch": 0.4943404712525924, "grad_norm": 2.046875, "learning_rate": 9.387591239192479e-06, "loss": 1.0863, "step": 2473 }, { "epoch": 0.49454036630768844, "grad_norm": 2.265625, "learning_rate": 9.387085798417021e-06, "loss": 1.1749, "step": 2474 }, { "epoch": 0.49474026136278454, "grad_norm": 1.9765625, "learning_rate": 9.386580162767682e-06, "loss": 1.025, "step": 2475 }, { "epoch": 0.49494015641788064, "grad_norm": 2.0625, "learning_rate": 9.38607433226692e-06, "loss": 1.0639, "step": 2476 }, { "epoch": 0.4951400514729767, "grad_norm": 2.265625, "learning_rate": 9.385568306937204e-06, "loss": 1.1536, "step": 2477 }, { "epoch": 0.4953399465280728, "grad_norm": 2.03125, "learning_rate": 9.385062086801013e-06, "loss": 1.0838, "step": 2478 }, { "epoch": 0.4955398415831688, "grad_norm": 1.9765625, "learning_rate": 9.384555671880834e-06, "loss": 1.0367, "step": 2479 }, { "epoch": 0.4957397366382649, "grad_norm": 1.984375, "learning_rate": 9.384049062199157e-06, "loss": 1.0263, "step": 2480 }, { "epoch": 0.49593963169336097, "grad_norm": 1.9375, "learning_rate": 9.383542257778491e-06, "loss": 0.9039, "step": 2481 }, { "epoch": 0.49613952674845707, "grad_norm": 2.15625, "learning_rate": 9.383035258641345e-06, "loss": 1.0273, "step": 2482 }, { "epoch": 0.4963394218035531, "grad_norm": 2.109375, "learning_rate": 9.382528064810242e-06, "loss": 1.0644, "step": 2483 }, { "epoch": 0.4965393168586492, "grad_norm": 2.09375, "learning_rate": 9.382020676307708e-06, "loss": 1.1447, "step": 2484 }, { "epoch": 0.49673921191374526, "grad_norm": 2.25, "learning_rate": 9.381513093156286e-06, "loss": 1.0842, "step": 2485 }, { "epoch": 0.49693910696884136, "grad_norm": 2.265625, "learning_rate": 9.381005315378519e-06, "loss": 1.1256, "step": 2486 }, { "epoch": 0.49713900202393746, "grad_norm": 2.09375, "learning_rate": 9.380497342996966e-06, "loss": 1.0525, "step": 2487 }, { "epoch": 0.4973388970790335, "grad_norm": 2.046875, "learning_rate": 9.379989176034187e-06, "loss": 1.0753, "step": 2488 }, { "epoch": 0.4975387921341296, "grad_norm": 2.140625, "learning_rate": 9.379480814512756e-06, "loss": 1.0823, "step": 2489 }, { "epoch": 0.49773868718922565, "grad_norm": 2.125, "learning_rate": 9.378972258455256e-06, "loss": 1.0258, "step": 2490 }, { "epoch": 0.49793858224432175, "grad_norm": 2.09375, "learning_rate": 9.378463507884276e-06, "loss": 1.1388, "step": 2491 }, { "epoch": 0.4981384772994178, "grad_norm": 2.140625, "learning_rate": 9.377954562822416e-06, "loss": 1.1564, "step": 2492 }, { "epoch": 0.4983383723545139, "grad_norm": 2.21875, "learning_rate": 9.37744542329228e-06, "loss": 1.1278, "step": 2493 }, { "epoch": 0.49853826740960994, "grad_norm": 2.046875, "learning_rate": 9.376936089316487e-06, "loss": 1.0647, "step": 2494 }, { "epoch": 0.49873816246470604, "grad_norm": 2.1875, "learning_rate": 9.376426560917659e-06, "loss": 1.1887, "step": 2495 }, { "epoch": 0.4989380575198021, "grad_norm": 2.03125, "learning_rate": 9.37591683811843e-06, "loss": 1.0494, "step": 2496 }, { "epoch": 0.4991379525748982, "grad_norm": 2.078125, "learning_rate": 9.375406920941444e-06, "loss": 1.09, "step": 2497 }, { "epoch": 0.4993378476299943, "grad_norm": 2.03125, "learning_rate": 9.37489680940935e-06, "loss": 1.0159, "step": 2498 }, { "epoch": 0.4995377426850903, "grad_norm": 2.09375, "learning_rate": 9.374386503544805e-06, "loss": 1.0739, "step": 2499 }, { "epoch": 0.4997376377401864, "grad_norm": 2.03125, "learning_rate": 9.37387600337048e-06, "loss": 1.0217, "step": 2500 }, { "epoch": 0.49993753279528247, "grad_norm": 2.109375, "learning_rate": 9.373365308909052e-06, "loss": 1.0345, "step": 2501 }, { "epoch": 0.5001374278503785, "grad_norm": 2.1875, "learning_rate": 9.372854420183201e-06, "loss": 1.0266, "step": 2502 }, { "epoch": 0.5003373229054746, "grad_norm": 2.265625, "learning_rate": 9.372343337215627e-06, "loss": 1.1541, "step": 2503 }, { "epoch": 0.5005372179605707, "grad_norm": 2.109375, "learning_rate": 9.371832060029027e-06, "loss": 1.0033, "step": 2504 }, { "epoch": 0.5007371130156668, "grad_norm": 2.03125, "learning_rate": 9.371320588646113e-06, "loss": 1.0522, "step": 2505 }, { "epoch": 0.5009370080707628, "grad_norm": 2.1875, "learning_rate": 9.370808923089606e-06, "loss": 1.1124, "step": 2506 }, { "epoch": 0.5011369031258589, "grad_norm": 1.953125, "learning_rate": 9.370297063382235e-06, "loss": 1.0216, "step": 2507 }, { "epoch": 0.501336798180955, "grad_norm": 2.046875, "learning_rate": 9.369785009546732e-06, "loss": 1.0626, "step": 2508 }, { "epoch": 0.5015366932360511, "grad_norm": 2.140625, "learning_rate": 9.369272761605848e-06, "loss": 1.076, "step": 2509 }, { "epoch": 0.5017365882911472, "grad_norm": 2.15625, "learning_rate": 9.368760319582334e-06, "loss": 1.0161, "step": 2510 }, { "epoch": 0.5019364833462432, "grad_norm": 2.140625, "learning_rate": 9.368247683498952e-06, "loss": 1.1571, "step": 2511 }, { "epoch": 0.5021363784013393, "grad_norm": 2.34375, "learning_rate": 9.367734853378476e-06, "loss": 1.0536, "step": 2512 }, { "epoch": 0.5023362734564354, "grad_norm": 2.015625, "learning_rate": 9.367221829243685e-06, "loss": 0.9218, "step": 2513 }, { "epoch": 0.5025361685115315, "grad_norm": 2.40625, "learning_rate": 9.366708611117366e-06, "loss": 0.9943, "step": 2514 }, { "epoch": 0.5027360635666275, "grad_norm": 2.0625, "learning_rate": 9.366195199022315e-06, "loss": 1.0951, "step": 2515 }, { "epoch": 0.5029359586217236, "grad_norm": 2.15625, "learning_rate": 9.365681592981341e-06, "loss": 1.0603, "step": 2516 }, { "epoch": 0.5031358536768197, "grad_norm": 2.203125, "learning_rate": 9.365167793017258e-06, "loss": 1.0842, "step": 2517 }, { "epoch": 0.5033357487319158, "grad_norm": 2.140625, "learning_rate": 9.364653799152887e-06, "loss": 1.1236, "step": 2518 }, { "epoch": 0.5035356437870118, "grad_norm": 2.078125, "learning_rate": 9.36413961141106e-06, "loss": 0.9942, "step": 2519 }, { "epoch": 0.5037355388421079, "grad_norm": 2.203125, "learning_rate": 9.363625229814617e-06, "loss": 1.0299, "step": 2520 }, { "epoch": 0.503935433897204, "grad_norm": 2.0625, "learning_rate": 9.363110654386409e-06, "loss": 0.9862, "step": 2521 }, { "epoch": 0.5041353289523001, "grad_norm": 2.03125, "learning_rate": 9.36259588514929e-06, "loss": 1.0143, "step": 2522 }, { "epoch": 0.5043352240073962, "grad_norm": 2.09375, "learning_rate": 9.36208092212613e-06, "loss": 1.0184, "step": 2523 }, { "epoch": 0.5045351190624922, "grad_norm": 2.015625, "learning_rate": 9.361565765339799e-06, "loss": 1.0772, "step": 2524 }, { "epoch": 0.5047350141175883, "grad_norm": 2.125, "learning_rate": 9.361050414813184e-06, "loss": 1.0264, "step": 2525 }, { "epoch": 0.5049349091726844, "grad_norm": 2.15625, "learning_rate": 9.360534870569175e-06, "loss": 1.0159, "step": 2526 }, { "epoch": 0.5051348042277805, "grad_norm": 2.21875, "learning_rate": 9.360019132630672e-06, "loss": 1.1623, "step": 2527 }, { "epoch": 0.5053346992828764, "grad_norm": 2.140625, "learning_rate": 9.359503201020587e-06, "loss": 0.9668, "step": 2528 }, { "epoch": 0.5055345943379725, "grad_norm": 2.015625, "learning_rate": 9.358987075761834e-06, "loss": 1.0134, "step": 2529 }, { "epoch": 0.5057344893930686, "grad_norm": 2.203125, "learning_rate": 9.35847075687734e-06, "loss": 1.116, "step": 2530 }, { "epoch": 0.5059343844481647, "grad_norm": 1.96875, "learning_rate": 9.35795424439004e-06, "loss": 1.011, "step": 2531 }, { "epoch": 0.5061342795032608, "grad_norm": 2.1875, "learning_rate": 9.35743753832288e-06, "loss": 1.0904, "step": 2532 }, { "epoch": 0.5063341745583568, "grad_norm": 2.125, "learning_rate": 9.356920638698809e-06, "loss": 1.0521, "step": 2533 }, { "epoch": 0.5065340696134529, "grad_norm": 2.046875, "learning_rate": 9.35640354554079e-06, "loss": 0.9193, "step": 2534 }, { "epoch": 0.506733964668549, "grad_norm": 2.078125, "learning_rate": 9.355886258871786e-06, "loss": 1.0674, "step": 2535 }, { "epoch": 0.5069338597236451, "grad_norm": 2.0, "learning_rate": 9.355368778714784e-06, "loss": 1.0886, "step": 2536 }, { "epoch": 0.5071337547787411, "grad_norm": 2.078125, "learning_rate": 9.354851105092765e-06, "loss": 1.099, "step": 2537 }, { "epoch": 0.5073336498338372, "grad_norm": 2.109375, "learning_rate": 9.354333238028726e-06, "loss": 1.1305, "step": 2538 }, { "epoch": 0.5075335448889333, "grad_norm": 2.0625, "learning_rate": 9.353815177545666e-06, "loss": 1.0836, "step": 2539 }, { "epoch": 0.5077334399440294, "grad_norm": 2.140625, "learning_rate": 9.353296923666605e-06, "loss": 1.0657, "step": 2540 }, { "epoch": 0.5079333349991254, "grad_norm": 1.96875, "learning_rate": 9.352778476414556e-06, "loss": 1.0458, "step": 2541 }, { "epoch": 0.5081332300542215, "grad_norm": 2.046875, "learning_rate": 9.352259835812556e-06, "loss": 1.059, "step": 2542 }, { "epoch": 0.5083331251093176, "grad_norm": 2.140625, "learning_rate": 9.351741001883636e-06, "loss": 1.1243, "step": 2543 }, { "epoch": 0.5085330201644137, "grad_norm": 2.296875, "learning_rate": 9.351221974650846e-06, "loss": 1.1725, "step": 2544 }, { "epoch": 0.5087329152195098, "grad_norm": 2.0, "learning_rate": 9.350702754137242e-06, "loss": 1.0211, "step": 2545 }, { "epoch": 0.5089328102746058, "grad_norm": 2.34375, "learning_rate": 9.350183340365884e-06, "loss": 1.0711, "step": 2546 }, { "epoch": 0.5091327053297019, "grad_norm": 1.984375, "learning_rate": 9.349663733359848e-06, "loss": 1.0382, "step": 2547 }, { "epoch": 0.509332600384798, "grad_norm": 2.1875, "learning_rate": 9.349143933142214e-06, "loss": 1.1329, "step": 2548 }, { "epoch": 0.5095324954398941, "grad_norm": 2.328125, "learning_rate": 9.34862393973607e-06, "loss": 1.0075, "step": 2549 }, { "epoch": 0.5097323904949901, "grad_norm": 2.21875, "learning_rate": 9.348103753164515e-06, "loss": 1.0336, "step": 2550 }, { "epoch": 0.5099322855500862, "grad_norm": 2.171875, "learning_rate": 9.347583373450657e-06, "loss": 1.1463, "step": 2551 }, { "epoch": 0.5101321806051823, "grad_norm": 2.078125, "learning_rate": 9.347062800617609e-06, "loss": 1.0115, "step": 2552 }, { "epoch": 0.5103320756602784, "grad_norm": 2.09375, "learning_rate": 9.346542034688495e-06, "loss": 1.1991, "step": 2553 }, { "epoch": 0.5105319707153745, "grad_norm": 2.046875, "learning_rate": 9.346021075686448e-06, "loss": 0.978, "step": 2554 }, { "epoch": 0.5107318657704705, "grad_norm": 2.1875, "learning_rate": 9.345499923634612e-06, "loss": 1.0984, "step": 2555 }, { "epoch": 0.5109317608255666, "grad_norm": 1.9921875, "learning_rate": 9.34497857855613e-06, "loss": 1.0057, "step": 2556 }, { "epoch": 0.5111316558806627, "grad_norm": 2.15625, "learning_rate": 9.344457040474164e-06, "loss": 1.1259, "step": 2557 }, { "epoch": 0.5113315509357588, "grad_norm": 1.9609375, "learning_rate": 9.343935309411882e-06, "loss": 0.9573, "step": 2558 }, { "epoch": 0.5115314459908548, "grad_norm": 2.015625, "learning_rate": 9.343413385392457e-06, "loss": 1.0034, "step": 2559 }, { "epoch": 0.5117313410459509, "grad_norm": 2.15625, "learning_rate": 9.342891268439071e-06, "loss": 1.0915, "step": 2560 }, { "epoch": 0.511931236101047, "grad_norm": 2.015625, "learning_rate": 9.342368958574921e-06, "loss": 1.0353, "step": 2561 }, { "epoch": 0.5121311311561431, "grad_norm": 2.125, "learning_rate": 9.341846455823206e-06, "loss": 1.1109, "step": 2562 }, { "epoch": 0.512331026211239, "grad_norm": 2.171875, "learning_rate": 9.341323760207135e-06, "loss": 1.1072, "step": 2563 }, { "epoch": 0.5125309212663351, "grad_norm": 2.109375, "learning_rate": 9.340800871749925e-06, "loss": 1.0226, "step": 2564 }, { "epoch": 0.5127308163214312, "grad_norm": 2.046875, "learning_rate": 9.340277790474804e-06, "loss": 1.0575, "step": 2565 }, { "epoch": 0.5129307113765273, "grad_norm": 2.171875, "learning_rate": 9.33975451640501e-06, "loss": 1.0841, "step": 2566 }, { "epoch": 0.5131306064316234, "grad_norm": 2.125, "learning_rate": 9.339231049563779e-06, "loss": 1.141, "step": 2567 }, { "epoch": 0.5133305014867194, "grad_norm": 2.25, "learning_rate": 9.33870738997437e-06, "loss": 1.1355, "step": 2568 }, { "epoch": 0.5135303965418155, "grad_norm": 2.171875, "learning_rate": 9.338183537660043e-06, "loss": 1.0538, "step": 2569 }, { "epoch": 0.5137302915969116, "grad_norm": 2.234375, "learning_rate": 9.337659492644067e-06, "loss": 1.1484, "step": 2570 }, { "epoch": 0.5139301866520077, "grad_norm": 2.03125, "learning_rate": 9.337135254949719e-06, "loss": 1.0157, "step": 2571 }, { "epoch": 0.5141300817071037, "grad_norm": 2.203125, "learning_rate": 9.336610824600288e-06, "loss": 1.0814, "step": 2572 }, { "epoch": 0.5143299767621998, "grad_norm": 2.15625, "learning_rate": 9.336086201619065e-06, "loss": 0.9951, "step": 2573 }, { "epoch": 0.5145298718172959, "grad_norm": 2.109375, "learning_rate": 9.335561386029356e-06, "loss": 1.0877, "step": 2574 }, { "epoch": 0.514729766872392, "grad_norm": 2.15625, "learning_rate": 9.335036377854474e-06, "loss": 1.0248, "step": 2575 }, { "epoch": 0.514929661927488, "grad_norm": 2.046875, "learning_rate": 9.334511177117739e-06, "loss": 1.0022, "step": 2576 }, { "epoch": 0.5151295569825841, "grad_norm": 2.0625, "learning_rate": 9.333985783842482e-06, "loss": 1.0916, "step": 2577 }, { "epoch": 0.5153294520376802, "grad_norm": 1.9296875, "learning_rate": 9.333460198052036e-06, "loss": 1.0654, "step": 2578 }, { "epoch": 0.5155293470927763, "grad_norm": 2.171875, "learning_rate": 9.332934419769752e-06, "loss": 1.1317, "step": 2579 }, { "epoch": 0.5157292421478724, "grad_norm": 2.0625, "learning_rate": 9.332408449018987e-06, "loss": 1.0809, "step": 2580 }, { "epoch": 0.5159291372029684, "grad_norm": 2.03125, "learning_rate": 9.331882285823098e-06, "loss": 1.1266, "step": 2581 }, { "epoch": 0.5161290322580645, "grad_norm": 1.921875, "learning_rate": 9.33135593020546e-06, "loss": 1.0306, "step": 2582 }, { "epoch": 0.5163289273131606, "grad_norm": 2.140625, "learning_rate": 9.330829382189456e-06, "loss": 1.1622, "step": 2583 }, { "epoch": 0.5165288223682567, "grad_norm": 2.0625, "learning_rate": 9.330302641798473e-06, "loss": 1.014, "step": 2584 }, { "epoch": 0.5167287174233527, "grad_norm": 2.015625, "learning_rate": 9.32977570905591e-06, "loss": 1.0604, "step": 2585 }, { "epoch": 0.5169286124784488, "grad_norm": 2.015625, "learning_rate": 9.329248583985171e-06, "loss": 1.0563, "step": 2586 }, { "epoch": 0.5171285075335449, "grad_norm": 2.109375, "learning_rate": 9.328721266609673e-06, "loss": 1.1403, "step": 2587 }, { "epoch": 0.517328402588641, "grad_norm": 1.9921875, "learning_rate": 9.328193756952837e-06, "loss": 1.0036, "step": 2588 }, { "epoch": 0.5175282976437371, "grad_norm": 2.046875, "learning_rate": 9.3276660550381e-06, "loss": 0.8862, "step": 2589 }, { "epoch": 0.5177281926988331, "grad_norm": 2.140625, "learning_rate": 9.327138160888897e-06, "loss": 1.2041, "step": 2590 }, { "epoch": 0.5179280877539292, "grad_norm": 2.078125, "learning_rate": 9.32661007452868e-06, "loss": 1.0425, "step": 2591 }, { "epoch": 0.5181279828090253, "grad_norm": 2.109375, "learning_rate": 9.326081795980903e-06, "loss": 1.0795, "step": 2592 }, { "epoch": 0.5183278778641214, "grad_norm": 2.421875, "learning_rate": 9.325553325269036e-06, "loss": 1.0518, "step": 2593 }, { "epoch": 0.5185277729192174, "grad_norm": 2.140625, "learning_rate": 9.325024662416553e-06, "loss": 0.9939, "step": 2594 }, { "epoch": 0.5187276679743135, "grad_norm": 2.171875, "learning_rate": 9.324495807446935e-06, "loss": 0.9843, "step": 2595 }, { "epoch": 0.5189275630294096, "grad_norm": 2.1875, "learning_rate": 9.323966760383679e-06, "loss": 1.183, "step": 2596 }, { "epoch": 0.5191274580845057, "grad_norm": 2.15625, "learning_rate": 9.323437521250278e-06, "loss": 1.0339, "step": 2597 }, { "epoch": 0.5193273531396017, "grad_norm": 2.375, "learning_rate": 9.322908090070243e-06, "loss": 1.0489, "step": 2598 }, { "epoch": 0.5195272481946978, "grad_norm": 2.09375, "learning_rate": 9.322378466867095e-06, "loss": 1.2156, "step": 2599 }, { "epoch": 0.5197271432497939, "grad_norm": 2.046875, "learning_rate": 9.321848651664357e-06, "loss": 1.0942, "step": 2600 }, { "epoch": 0.51992703830489, "grad_norm": 2.140625, "learning_rate": 9.321318644485561e-06, "loss": 0.9874, "step": 2601 }, { "epoch": 0.520126933359986, "grad_norm": 2.015625, "learning_rate": 9.320788445354255e-06, "loss": 0.9747, "step": 2602 }, { "epoch": 0.520326828415082, "grad_norm": 2.078125, "learning_rate": 9.320258054293987e-06, "loss": 1.0756, "step": 2603 }, { "epoch": 0.5205267234701781, "grad_norm": 2.234375, "learning_rate": 9.319727471328318e-06, "loss": 1.1056, "step": 2604 }, { "epoch": 0.5207266185252742, "grad_norm": 2.125, "learning_rate": 9.319196696480814e-06, "loss": 1.0712, "step": 2605 }, { "epoch": 0.5209265135803703, "grad_norm": 2.15625, "learning_rate": 9.318665729775056e-06, "loss": 1.1167, "step": 2606 }, { "epoch": 0.5211264086354663, "grad_norm": 2.09375, "learning_rate": 9.318134571234626e-06, "loss": 1.0241, "step": 2607 }, { "epoch": 0.5213263036905624, "grad_norm": 2.046875, "learning_rate": 9.317603220883121e-06, "loss": 1.0953, "step": 2608 }, { "epoch": 0.5215261987456585, "grad_norm": 1.9609375, "learning_rate": 9.317071678744143e-06, "loss": 0.9876, "step": 2609 }, { "epoch": 0.5217260938007546, "grad_norm": 2.03125, "learning_rate": 9.3165399448413e-06, "loss": 1.016, "step": 2610 }, { "epoch": 0.5219259888558507, "grad_norm": 2.078125, "learning_rate": 9.316008019198216e-06, "loss": 0.9751, "step": 2611 }, { "epoch": 0.5221258839109467, "grad_norm": 2.171875, "learning_rate": 9.315475901838514e-06, "loss": 1.0808, "step": 2612 }, { "epoch": 0.5223257789660428, "grad_norm": 2.109375, "learning_rate": 9.314943592785834e-06, "loss": 1.0874, "step": 2613 }, { "epoch": 0.5225256740211389, "grad_norm": 2.171875, "learning_rate": 9.314411092063822e-06, "loss": 1.1452, "step": 2614 }, { "epoch": 0.522725569076235, "grad_norm": 2.046875, "learning_rate": 9.313878399696127e-06, "loss": 1.0408, "step": 2615 }, { "epoch": 0.522925464131331, "grad_norm": 2.078125, "learning_rate": 9.313345515706417e-06, "loss": 0.955, "step": 2616 }, { "epoch": 0.5231253591864271, "grad_norm": 2.015625, "learning_rate": 9.31281244011836e-06, "loss": 0.9659, "step": 2617 }, { "epoch": 0.5233252542415232, "grad_norm": 2.234375, "learning_rate": 9.312279172955634e-06, "loss": 1.0329, "step": 2618 }, { "epoch": 0.5235251492966193, "grad_norm": 2.046875, "learning_rate": 9.311745714241926e-06, "loss": 1.0165, "step": 2619 }, { "epoch": 0.5237250443517153, "grad_norm": 2.203125, "learning_rate": 9.311212064000936e-06, "loss": 1.134, "step": 2620 }, { "epoch": 0.5239249394068114, "grad_norm": 2.171875, "learning_rate": 9.310678222256367e-06, "loss": 1.1248, "step": 2621 }, { "epoch": 0.5241248344619075, "grad_norm": 2.203125, "learning_rate": 9.31014418903193e-06, "loss": 1.0595, "step": 2622 }, { "epoch": 0.5243247295170036, "grad_norm": 2.234375, "learning_rate": 9.30960996435135e-06, "loss": 1.0898, "step": 2623 }, { "epoch": 0.5245246245720997, "grad_norm": 2.21875, "learning_rate": 9.309075548238355e-06, "loss": 1.1028, "step": 2624 }, { "epoch": 0.5247245196271957, "grad_norm": 2.078125, "learning_rate": 9.308540940716685e-06, "loss": 1.0883, "step": 2625 }, { "epoch": 0.5249244146822918, "grad_norm": 2.015625, "learning_rate": 9.308006141810086e-06, "loss": 0.973, "step": 2626 }, { "epoch": 0.5251243097373879, "grad_norm": 2.03125, "learning_rate": 9.307471151542315e-06, "loss": 1.081, "step": 2627 }, { "epoch": 0.525324204792484, "grad_norm": 2.078125, "learning_rate": 9.306935969937135e-06, "loss": 1.0576, "step": 2628 }, { "epoch": 0.52552409984758, "grad_norm": 2.125, "learning_rate": 9.30640059701832e-06, "loss": 1.1406, "step": 2629 }, { "epoch": 0.5257239949026761, "grad_norm": 2.21875, "learning_rate": 9.30586503280965e-06, "loss": 1.162, "step": 2630 }, { "epoch": 0.5259238899577722, "grad_norm": 2.09375, "learning_rate": 9.305329277334914e-06, "loss": 1.106, "step": 2631 }, { "epoch": 0.5261237850128683, "grad_norm": 2.109375, "learning_rate": 9.304793330617912e-06, "loss": 1.0658, "step": 2632 }, { "epoch": 0.5263236800679644, "grad_norm": 2.0625, "learning_rate": 9.304257192682449e-06, "loss": 1.0368, "step": 2633 }, { "epoch": 0.5265235751230604, "grad_norm": 1.984375, "learning_rate": 9.303720863552343e-06, "loss": 1.0142, "step": 2634 }, { "epoch": 0.5267234701781565, "grad_norm": 2.25, "learning_rate": 9.303184343251415e-06, "loss": 1.0655, "step": 2635 }, { "epoch": 0.5269233652332526, "grad_norm": 2.125, "learning_rate": 9.302647631803498e-06, "loss": 1.0741, "step": 2636 }, { "epoch": 0.5271232602883487, "grad_norm": 2.109375, "learning_rate": 9.302110729232432e-06, "loss": 1.0317, "step": 2637 }, { "epoch": 0.5273231553434446, "grad_norm": 2.140625, "learning_rate": 9.301573635562068e-06, "loss": 1.0812, "step": 2638 }, { "epoch": 0.5275230503985407, "grad_norm": 1.9921875, "learning_rate": 9.301036350816264e-06, "loss": 1.0591, "step": 2639 }, { "epoch": 0.5277229454536368, "grad_norm": 2.171875, "learning_rate": 9.300498875018882e-06, "loss": 1.0668, "step": 2640 }, { "epoch": 0.527922840508733, "grad_norm": 2.171875, "learning_rate": 9.299961208193801e-06, "loss": 1.1023, "step": 2641 }, { "epoch": 0.5281227355638289, "grad_norm": 2.09375, "learning_rate": 9.299423350364903e-06, "loss": 1.0111, "step": 2642 }, { "epoch": 0.528322630618925, "grad_norm": 2.078125, "learning_rate": 9.298885301556075e-06, "loss": 1.1069, "step": 2643 }, { "epoch": 0.5285225256740211, "grad_norm": 1.96875, "learning_rate": 9.298347061791224e-06, "loss": 0.9699, "step": 2644 }, { "epoch": 0.5287224207291172, "grad_norm": 2.109375, "learning_rate": 9.297808631094257e-06, "loss": 1.0813, "step": 2645 }, { "epoch": 0.5289223157842133, "grad_norm": 2.0625, "learning_rate": 9.297270009489088e-06, "loss": 0.9616, "step": 2646 }, { "epoch": 0.5291222108393093, "grad_norm": 2.0625, "learning_rate": 9.296731196999643e-06, "loss": 1.0316, "step": 2647 }, { "epoch": 0.5293221058944054, "grad_norm": 1.9609375, "learning_rate": 9.296192193649857e-06, "loss": 0.9889, "step": 2648 }, { "epoch": 0.5295220009495015, "grad_norm": 2.09375, "learning_rate": 9.295652999463675e-06, "loss": 0.9873, "step": 2649 }, { "epoch": 0.5297218960045976, "grad_norm": 2.125, "learning_rate": 9.295113614465045e-06, "loss": 1.1143, "step": 2650 }, { "epoch": 0.5299217910596936, "grad_norm": 2.0625, "learning_rate": 9.294574038677926e-06, "loss": 1.0498, "step": 2651 }, { "epoch": 0.5301216861147897, "grad_norm": 2.140625, "learning_rate": 9.294034272126286e-06, "loss": 1.0765, "step": 2652 }, { "epoch": 0.5303215811698858, "grad_norm": 2.140625, "learning_rate": 9.293494314834105e-06, "loss": 1.0264, "step": 2653 }, { "epoch": 0.5305214762249819, "grad_norm": 2.046875, "learning_rate": 9.292954166825363e-06, "loss": 1.0044, "step": 2654 }, { "epoch": 0.530721371280078, "grad_norm": 2.03125, "learning_rate": 9.292413828124056e-06, "loss": 1.0188, "step": 2655 }, { "epoch": 0.530921266335174, "grad_norm": 1.984375, "learning_rate": 9.291873298754187e-06, "loss": 1.0454, "step": 2656 }, { "epoch": 0.5311211613902701, "grad_norm": 2.25, "learning_rate": 9.291332578739762e-06, "loss": 1.067, "step": 2657 }, { "epoch": 0.5313210564453662, "grad_norm": 2.140625, "learning_rate": 9.290791668104802e-06, "loss": 1.0037, "step": 2658 }, { "epoch": 0.5315209515004623, "grad_norm": 2.125, "learning_rate": 9.290250566873335e-06, "loss": 1.0557, "step": 2659 }, { "epoch": 0.5317208465555583, "grad_norm": 2.0625, "learning_rate": 9.289709275069396e-06, "loss": 1.0282, "step": 2660 }, { "epoch": 0.5319207416106544, "grad_norm": 2.109375, "learning_rate": 9.28916779271703e-06, "loss": 1.0812, "step": 2661 }, { "epoch": 0.5321206366657505, "grad_norm": 2.0625, "learning_rate": 9.288626119840287e-06, "loss": 1.0249, "step": 2662 }, { "epoch": 0.5323205317208466, "grad_norm": 2.125, "learning_rate": 9.288084256463233e-06, "loss": 1.0339, "step": 2663 }, { "epoch": 0.5325204267759426, "grad_norm": 1.9765625, "learning_rate": 9.287542202609932e-06, "loss": 1.0646, "step": 2664 }, { "epoch": 0.5327203218310387, "grad_norm": 2.234375, "learning_rate": 9.286999958304464e-06, "loss": 1.0251, "step": 2665 }, { "epoch": 0.5329202168861348, "grad_norm": 2.09375, "learning_rate": 9.286457523570915e-06, "loss": 1.1246, "step": 2666 }, { "epoch": 0.5331201119412309, "grad_norm": 2.109375, "learning_rate": 9.285914898433384e-06, "loss": 1.0116, "step": 2667 }, { "epoch": 0.533320006996327, "grad_norm": 2.078125, "learning_rate": 9.285372082915968e-06, "loss": 1.0878, "step": 2668 }, { "epoch": 0.533519902051423, "grad_norm": 2.234375, "learning_rate": 9.284829077042784e-06, "loss": 1.0951, "step": 2669 }, { "epoch": 0.5337197971065191, "grad_norm": 2.09375, "learning_rate": 9.284285880837947e-06, "loss": 1.1074, "step": 2670 }, { "epoch": 0.5339196921616152, "grad_norm": 2.15625, "learning_rate": 9.283742494325591e-06, "loss": 1.0042, "step": 2671 }, { "epoch": 0.5341195872167113, "grad_norm": 2.140625, "learning_rate": 9.28319891752985e-06, "loss": 1.064, "step": 2672 }, { "epoch": 0.5343194822718073, "grad_norm": 2.1875, "learning_rate": 9.282655150474871e-06, "loss": 0.9645, "step": 2673 }, { "epoch": 0.5345193773269034, "grad_norm": 2.140625, "learning_rate": 9.282111193184806e-06, "loss": 1.0087, "step": 2674 }, { "epoch": 0.5347192723819995, "grad_norm": 2.140625, "learning_rate": 9.281567045683822e-06, "loss": 1.1043, "step": 2675 }, { "epoch": 0.5349191674370956, "grad_norm": 2.15625, "learning_rate": 9.281022707996085e-06, "loss": 1.0959, "step": 2676 }, { "epoch": 0.5351190624921915, "grad_norm": 2.1875, "learning_rate": 9.280478180145778e-06, "loss": 1.1068, "step": 2677 }, { "epoch": 0.5353189575472876, "grad_norm": 2.21875, "learning_rate": 9.279933462157088e-06, "loss": 1.129, "step": 2678 }, { "epoch": 0.5355188526023837, "grad_norm": 2.34375, "learning_rate": 9.279388554054207e-06, "loss": 0.9898, "step": 2679 }, { "epoch": 0.5357187476574798, "grad_norm": 2.15625, "learning_rate": 9.278843455861346e-06, "loss": 1.0103, "step": 2680 }, { "epoch": 0.5359186427125759, "grad_norm": 2.5, "learning_rate": 9.278298167602716e-06, "loss": 1.1809, "step": 2681 }, { "epoch": 0.5361185377676719, "grad_norm": 2.078125, "learning_rate": 9.277752689302537e-06, "loss": 0.9956, "step": 2682 }, { "epoch": 0.536318432822768, "grad_norm": 2.21875, "learning_rate": 9.277207020985042e-06, "loss": 1.0903, "step": 2683 }, { "epoch": 0.5365183278778641, "grad_norm": 1.9921875, "learning_rate": 9.276661162674467e-06, "loss": 0.9674, "step": 2684 }, { "epoch": 0.5367182229329602, "grad_norm": 2.09375, "learning_rate": 9.27611511439506e-06, "loss": 1.0355, "step": 2685 }, { "epoch": 0.5369181179880562, "grad_norm": 2.0625, "learning_rate": 9.275568876171078e-06, "loss": 0.9648, "step": 2686 }, { "epoch": 0.5371180130431523, "grad_norm": 2.15625, "learning_rate": 9.275022448026782e-06, "loss": 0.9565, "step": 2687 }, { "epoch": 0.5373179080982484, "grad_norm": 2.140625, "learning_rate": 9.274475829986444e-06, "loss": 1.0516, "step": 2688 }, { "epoch": 0.5375178031533445, "grad_norm": 2.09375, "learning_rate": 9.273929022074348e-06, "loss": 1.0077, "step": 2689 }, { "epoch": 0.5377176982084406, "grad_norm": 2.0625, "learning_rate": 9.273382024314781e-06, "loss": 1.0389, "step": 2690 }, { "epoch": 0.5379175932635366, "grad_norm": 2.140625, "learning_rate": 9.272834836732039e-06, "loss": 0.9891, "step": 2691 }, { "epoch": 0.5381174883186327, "grad_norm": 2.0625, "learning_rate": 9.272287459350432e-06, "loss": 1.0165, "step": 2692 }, { "epoch": 0.5383173833737288, "grad_norm": 2.03125, "learning_rate": 9.271739892194272e-06, "loss": 0.9821, "step": 2693 }, { "epoch": 0.5385172784288249, "grad_norm": 2.0625, "learning_rate": 9.271192135287882e-06, "loss": 1.102, "step": 2694 }, { "epoch": 0.5387171734839209, "grad_norm": 1.984375, "learning_rate": 9.270644188655594e-06, "loss": 0.9087, "step": 2695 }, { "epoch": 0.538917068539017, "grad_norm": 2.078125, "learning_rate": 9.270096052321747e-06, "loss": 1.1452, "step": 2696 }, { "epoch": 0.5391169635941131, "grad_norm": 2.140625, "learning_rate": 9.269547726310688e-06, "loss": 1.0612, "step": 2697 }, { "epoch": 0.5393168586492092, "grad_norm": 2.140625, "learning_rate": 9.268999210646777e-06, "loss": 0.9784, "step": 2698 }, { "epoch": 0.5395167537043052, "grad_norm": 2.09375, "learning_rate": 9.268450505354375e-06, "loss": 0.9654, "step": 2699 }, { "epoch": 0.5397166487594013, "grad_norm": 2.171875, "learning_rate": 9.267901610457859e-06, "loss": 1.121, "step": 2700 }, { "epoch": 0.5399165438144974, "grad_norm": 2.109375, "learning_rate": 9.26735252598161e-06, "loss": 1.073, "step": 2701 }, { "epoch": 0.5401164388695935, "grad_norm": 2.265625, "learning_rate": 9.266803251950017e-06, "loss": 1.082, "step": 2702 }, { "epoch": 0.5403163339246896, "grad_norm": 2.09375, "learning_rate": 9.266253788387479e-06, "loss": 1.0063, "step": 2703 }, { "epoch": 0.5405162289797856, "grad_norm": 2.21875, "learning_rate": 9.2657041353184e-06, "loss": 1.1217, "step": 2704 }, { "epoch": 0.5407161240348817, "grad_norm": 2.1875, "learning_rate": 9.265154292767204e-06, "loss": 0.9752, "step": 2705 }, { "epoch": 0.5409160190899778, "grad_norm": 2.015625, "learning_rate": 9.264604260758307e-06, "loss": 1.02, "step": 2706 }, { "epoch": 0.5411159141450739, "grad_norm": 1.9609375, "learning_rate": 9.264054039316146e-06, "loss": 1.0283, "step": 2707 }, { "epoch": 0.5413158092001699, "grad_norm": 2.1875, "learning_rate": 9.263503628465159e-06, "loss": 1.1819, "step": 2708 }, { "epoch": 0.541515704255266, "grad_norm": 2.015625, "learning_rate": 9.262953028229794e-06, "loss": 1.0463, "step": 2709 }, { "epoch": 0.5417155993103621, "grad_norm": 2.03125, "learning_rate": 9.262402238634514e-06, "loss": 1.2011, "step": 2710 }, { "epoch": 0.5419154943654582, "grad_norm": 2.0, "learning_rate": 9.261851259703782e-06, "loss": 0.9946, "step": 2711 }, { "epoch": 0.5421153894205543, "grad_norm": 2.03125, "learning_rate": 9.261300091462071e-06, "loss": 1.0925, "step": 2712 }, { "epoch": 0.5423152844756502, "grad_norm": 2.0625, "learning_rate": 9.260748733933865e-06, "loss": 1.0614, "step": 2713 }, { "epoch": 0.5425151795307463, "grad_norm": 2.09375, "learning_rate": 9.260197187143656e-06, "loss": 1.0966, "step": 2714 }, { "epoch": 0.5427150745858424, "grad_norm": 2.125, "learning_rate": 9.259645451115941e-06, "loss": 1.1616, "step": 2715 }, { "epoch": 0.5429149696409385, "grad_norm": 2.109375, "learning_rate": 9.259093525875232e-06, "loss": 1.1168, "step": 2716 }, { "epoch": 0.5431148646960345, "grad_norm": 1.9375, "learning_rate": 9.258541411446042e-06, "loss": 1.0294, "step": 2717 }, { "epoch": 0.5433147597511306, "grad_norm": 1.96875, "learning_rate": 9.257989107852898e-06, "loss": 1.0106, "step": 2718 }, { "epoch": 0.5435146548062267, "grad_norm": 2.078125, "learning_rate": 9.257436615120335e-06, "loss": 0.9188, "step": 2719 }, { "epoch": 0.5437145498613228, "grad_norm": 2.140625, "learning_rate": 9.256883933272888e-06, "loss": 1.0516, "step": 2720 }, { "epoch": 0.5439144449164188, "grad_norm": 2.0625, "learning_rate": 9.256331062335114e-06, "loss": 1.1339, "step": 2721 }, { "epoch": 0.5441143399715149, "grad_norm": 2.0625, "learning_rate": 9.255778002331569e-06, "loss": 1.0622, "step": 2722 }, { "epoch": 0.544314235026611, "grad_norm": 2.21875, "learning_rate": 9.255224753286818e-06, "loss": 1.0338, "step": 2723 }, { "epoch": 0.5445141300817071, "grad_norm": 2.0, "learning_rate": 9.25467131522544e-06, "loss": 1.0663, "step": 2724 }, { "epoch": 0.5447140251368032, "grad_norm": 2.046875, "learning_rate": 9.254117688172014e-06, "loss": 1.0436, "step": 2725 }, { "epoch": 0.5449139201918992, "grad_norm": 2.125, "learning_rate": 9.253563872151136e-06, "loss": 1.0577, "step": 2726 }, { "epoch": 0.5451138152469953, "grad_norm": 2.09375, "learning_rate": 9.253009867187406e-06, "loss": 1.0489, "step": 2727 }, { "epoch": 0.5453137103020914, "grad_norm": 2.046875, "learning_rate": 9.252455673305431e-06, "loss": 0.9571, "step": 2728 }, { "epoch": 0.5455136053571875, "grad_norm": 2.109375, "learning_rate": 9.251901290529829e-06, "loss": 1.1165, "step": 2729 }, { "epoch": 0.5457135004122835, "grad_norm": 2.109375, "learning_rate": 9.251346718885226e-06, "loss": 1.0281, "step": 2730 }, { "epoch": 0.5459133954673796, "grad_norm": 2.015625, "learning_rate": 9.250791958396255e-06, "loss": 1.0421, "step": 2731 }, { "epoch": 0.5461132905224757, "grad_norm": 2.109375, "learning_rate": 9.25023700908756e-06, "loss": 1.0919, "step": 2732 }, { "epoch": 0.5463131855775718, "grad_norm": 2.015625, "learning_rate": 9.249681870983792e-06, "loss": 1.0162, "step": 2733 }, { "epoch": 0.5465130806326679, "grad_norm": 2.0625, "learning_rate": 9.249126544109608e-06, "loss": 1.0894, "step": 2734 }, { "epoch": 0.5467129756877639, "grad_norm": 2.0625, "learning_rate": 9.248571028489678e-06, "loss": 1.039, "step": 2735 }, { "epoch": 0.54691287074286, "grad_norm": 2.1875, "learning_rate": 9.248015324148673e-06, "loss": 1.1551, "step": 2736 }, { "epoch": 0.5471127657979561, "grad_norm": 2.09375, "learning_rate": 9.247459431111285e-06, "loss": 1.0632, "step": 2737 }, { "epoch": 0.5473126608530522, "grad_norm": 2.109375, "learning_rate": 9.246903349402201e-06, "loss": 1.1491, "step": 2738 }, { "epoch": 0.5475125559081482, "grad_norm": 2.296875, "learning_rate": 9.246347079046124e-06, "loss": 1.0886, "step": 2739 }, { "epoch": 0.5477124509632443, "grad_norm": 2.109375, "learning_rate": 9.245790620067763e-06, "loss": 1.0163, "step": 2740 }, { "epoch": 0.5479123460183404, "grad_norm": 2.140625, "learning_rate": 9.245233972491836e-06, "loss": 1.1689, "step": 2741 }, { "epoch": 0.5481122410734365, "grad_norm": 1.9609375, "learning_rate": 9.24467713634307e-06, "loss": 0.9898, "step": 2742 }, { "epoch": 0.5483121361285325, "grad_norm": 2.046875, "learning_rate": 9.2441201116462e-06, "loss": 1.0118, "step": 2743 }, { "epoch": 0.5485120311836286, "grad_norm": 2.03125, "learning_rate": 9.243562898425967e-06, "loss": 1.0321, "step": 2744 }, { "epoch": 0.5487119262387247, "grad_norm": 2.21875, "learning_rate": 9.243005496707123e-06, "loss": 1.1106, "step": 2745 }, { "epoch": 0.5489118212938208, "grad_norm": 2.203125, "learning_rate": 9.242447906514428e-06, "loss": 1.123, "step": 2746 }, { "epoch": 0.5491117163489169, "grad_norm": 2.171875, "learning_rate": 9.24189012787265e-06, "loss": 1.0215, "step": 2747 }, { "epoch": 0.5493116114040129, "grad_norm": 2.046875, "learning_rate": 9.241332160806566e-06, "loss": 1.0798, "step": 2748 }, { "epoch": 0.549511506459109, "grad_norm": 2.0625, "learning_rate": 9.24077400534096e-06, "loss": 1.0293, "step": 2749 }, { "epoch": 0.5497114015142051, "grad_norm": 2.09375, "learning_rate": 9.240215661500625e-06, "loss": 1.129, "step": 2750 }, { "epoch": 0.5499112965693012, "grad_norm": 2.453125, "learning_rate": 9.239657129310364e-06, "loss": 1.1994, "step": 2751 }, { "epoch": 0.5501111916243971, "grad_norm": 2.1875, "learning_rate": 9.239098408794986e-06, "loss": 1.0474, "step": 2752 }, { "epoch": 0.5503110866794932, "grad_norm": 2.015625, "learning_rate": 9.23853949997931e-06, "loss": 1.0514, "step": 2753 }, { "epoch": 0.5505109817345893, "grad_norm": 2.140625, "learning_rate": 9.23798040288816e-06, "loss": 1.0656, "step": 2754 }, { "epoch": 0.5507108767896854, "grad_norm": 2.125, "learning_rate": 9.237421117546375e-06, "loss": 1.0498, "step": 2755 }, { "epoch": 0.5509107718447815, "grad_norm": 2.015625, "learning_rate": 9.236861643978797e-06, "loss": 0.9487, "step": 2756 }, { "epoch": 0.5511106668998775, "grad_norm": 2.15625, "learning_rate": 9.236301982210276e-06, "loss": 1.0643, "step": 2757 }, { "epoch": 0.5513105619549736, "grad_norm": 2.078125, "learning_rate": 9.235742132265676e-06, "loss": 1.0824, "step": 2758 }, { "epoch": 0.5515104570100697, "grad_norm": 2.078125, "learning_rate": 9.235182094169859e-06, "loss": 1.0396, "step": 2759 }, { "epoch": 0.5517103520651658, "grad_norm": 2.15625, "learning_rate": 9.234621867947709e-06, "loss": 1.1565, "step": 2760 }, { "epoch": 0.5519102471202618, "grad_norm": 2.0, "learning_rate": 9.234061453624106e-06, "loss": 0.9588, "step": 2761 }, { "epoch": 0.5521101421753579, "grad_norm": 2.34375, "learning_rate": 9.233500851223945e-06, "loss": 1.0023, "step": 2762 }, { "epoch": 0.552310037230454, "grad_norm": 2.125, "learning_rate": 9.23294006077213e-06, "loss": 1.1064, "step": 2763 }, { "epoch": 0.5525099322855501, "grad_norm": 2.125, "learning_rate": 9.232379082293568e-06, "loss": 1.1066, "step": 2764 }, { "epoch": 0.5527098273406461, "grad_norm": 2.078125, "learning_rate": 9.23181791581318e-06, "loss": 1.0331, "step": 2765 }, { "epoch": 0.5529097223957422, "grad_norm": 2.125, "learning_rate": 9.231256561355892e-06, "loss": 0.9992, "step": 2766 }, { "epoch": 0.5531096174508383, "grad_norm": 2.171875, "learning_rate": 9.23069501894664e-06, "loss": 1.0246, "step": 2767 }, { "epoch": 0.5533095125059344, "grad_norm": 2.171875, "learning_rate": 9.230133288610366e-06, "loss": 0.978, "step": 2768 }, { "epoch": 0.5535094075610305, "grad_norm": 2.078125, "learning_rate": 9.229571370372023e-06, "loss": 1.0375, "step": 2769 }, { "epoch": 0.5537093026161265, "grad_norm": 2.09375, "learning_rate": 9.229009264256574e-06, "loss": 1.0868, "step": 2770 }, { "epoch": 0.5539091976712226, "grad_norm": 2.140625, "learning_rate": 9.228446970288983e-06, "loss": 1.1305, "step": 2771 }, { "epoch": 0.5541090927263187, "grad_norm": 2.078125, "learning_rate": 9.22788448849423e-06, "loss": 1.0305, "step": 2772 }, { "epoch": 0.5543089877814148, "grad_norm": 2.0625, "learning_rate": 9.2273218188973e-06, "loss": 1.1207, "step": 2773 }, { "epoch": 0.5545088828365108, "grad_norm": 2.125, "learning_rate": 9.226758961523185e-06, "loss": 1.0737, "step": 2774 }, { "epoch": 0.5547087778916069, "grad_norm": 2.03125, "learning_rate": 9.226195916396888e-06, "loss": 1.1121, "step": 2775 }, { "epoch": 0.554908672946703, "grad_norm": 2.140625, "learning_rate": 9.225632683543421e-06, "loss": 1.1163, "step": 2776 }, { "epoch": 0.5551085680017991, "grad_norm": 2.140625, "learning_rate": 9.2250692629878e-06, "loss": 1.1029, "step": 2777 }, { "epoch": 0.5553084630568952, "grad_norm": 2.0625, "learning_rate": 9.224505654755055e-06, "loss": 1.0431, "step": 2778 }, { "epoch": 0.5555083581119912, "grad_norm": 2.046875, "learning_rate": 9.22394185887022e-06, "loss": 1.0452, "step": 2779 }, { "epoch": 0.5557082531670873, "grad_norm": 2.078125, "learning_rate": 9.22337787535834e-06, "loss": 0.9932, "step": 2780 }, { "epoch": 0.5559081482221834, "grad_norm": 2.15625, "learning_rate": 9.222813704244464e-06, "loss": 1.145, "step": 2781 }, { "epoch": 0.5561080432772795, "grad_norm": 2.140625, "learning_rate": 9.222249345553654e-06, "loss": 1.0276, "step": 2782 }, { "epoch": 0.5563079383323755, "grad_norm": 2.046875, "learning_rate": 9.221684799310979e-06, "loss": 0.9823, "step": 2783 }, { "epoch": 0.5565078333874716, "grad_norm": 2.109375, "learning_rate": 9.221120065541517e-06, "loss": 1.0601, "step": 2784 }, { "epoch": 0.5567077284425677, "grad_norm": 2.078125, "learning_rate": 9.220555144270353e-06, "loss": 1.0849, "step": 2785 }, { "epoch": 0.5569076234976638, "grad_norm": 2.03125, "learning_rate": 9.219990035522579e-06, "loss": 0.9661, "step": 2786 }, { "epoch": 0.5571075185527598, "grad_norm": 2.09375, "learning_rate": 9.2194247393233e-06, "loss": 1.128, "step": 2787 }, { "epoch": 0.5573074136078559, "grad_norm": 1.9609375, "learning_rate": 9.218859255697622e-06, "loss": 1.0929, "step": 2788 }, { "epoch": 0.557507308662952, "grad_norm": 2.25, "learning_rate": 9.218293584670667e-06, "loss": 0.9534, "step": 2789 }, { "epoch": 0.557707203718048, "grad_norm": 2.078125, "learning_rate": 9.217727726267564e-06, "loss": 1.1144, "step": 2790 }, { "epoch": 0.5579070987731441, "grad_norm": 2.125, "learning_rate": 9.217161680513445e-06, "loss": 1.0807, "step": 2791 }, { "epoch": 0.5581069938282401, "grad_norm": 2.171875, "learning_rate": 9.216595447433454e-06, "loss": 1.1086, "step": 2792 }, { "epoch": 0.5583068888833362, "grad_norm": 2.109375, "learning_rate": 9.216029027052744e-06, "loss": 1.0106, "step": 2793 }, { "epoch": 0.5585067839384323, "grad_norm": 2.328125, "learning_rate": 9.215462419396474e-06, "loss": 1.0926, "step": 2794 }, { "epoch": 0.5587066789935284, "grad_norm": 2.078125, "learning_rate": 9.214895624489813e-06, "loss": 1.0694, "step": 2795 }, { "epoch": 0.5589065740486244, "grad_norm": 2.0625, "learning_rate": 9.21432864235794e-06, "loss": 1.0344, "step": 2796 }, { "epoch": 0.5591064691037205, "grad_norm": 2.125, "learning_rate": 9.213761473026039e-06, "loss": 1.1513, "step": 2797 }, { "epoch": 0.5593063641588166, "grad_norm": 2.015625, "learning_rate": 9.213194116519301e-06, "loss": 1.0874, "step": 2798 }, { "epoch": 0.5595062592139127, "grad_norm": 2.21875, "learning_rate": 9.212626572862933e-06, "loss": 1.0074, "step": 2799 }, { "epoch": 0.5597061542690087, "grad_norm": 2.15625, "learning_rate": 9.212058842082141e-06, "loss": 1.0354, "step": 2800 }, { "epoch": 0.5599060493241048, "grad_norm": 2.09375, "learning_rate": 9.211490924202147e-06, "loss": 0.9803, "step": 2801 }, { "epoch": 0.5601059443792009, "grad_norm": 2.015625, "learning_rate": 9.210922819248173e-06, "loss": 1.0824, "step": 2802 }, { "epoch": 0.560305839434297, "grad_norm": 1.9921875, "learning_rate": 9.21035452724546e-06, "loss": 1.1036, "step": 2803 }, { "epoch": 0.5605057344893931, "grad_norm": 2.140625, "learning_rate": 9.209786048219247e-06, "loss": 1.1111, "step": 2804 }, { "epoch": 0.5607056295444891, "grad_norm": 2.09375, "learning_rate": 9.209217382194788e-06, "loss": 1.1009, "step": 2805 }, { "epoch": 0.5609055245995852, "grad_norm": 2.71875, "learning_rate": 9.208648529197342e-06, "loss": 1.2159, "step": 2806 }, { "epoch": 0.5611054196546813, "grad_norm": 2.15625, "learning_rate": 9.208079489252178e-06, "loss": 1.0909, "step": 2807 }, { "epoch": 0.5613053147097774, "grad_norm": 2.125, "learning_rate": 9.207510262384573e-06, "loss": 1.0151, "step": 2808 }, { "epoch": 0.5615052097648734, "grad_norm": 1.9921875, "learning_rate": 9.20694084861981e-06, "loss": 1.0352, "step": 2809 }, { "epoch": 0.5617051048199695, "grad_norm": 2.09375, "learning_rate": 9.206371247983186e-06, "loss": 1.0656, "step": 2810 }, { "epoch": 0.5619049998750656, "grad_norm": 2.203125, "learning_rate": 9.205801460499999e-06, "loss": 1.1071, "step": 2811 }, { "epoch": 0.5621048949301617, "grad_norm": 2.03125, "learning_rate": 9.205231486195561e-06, "loss": 1.0241, "step": 2812 }, { "epoch": 0.5623047899852578, "grad_norm": 2.140625, "learning_rate": 9.204661325095189e-06, "loss": 1.1143, "step": 2813 }, { "epoch": 0.5625046850403538, "grad_norm": 2.078125, "learning_rate": 9.20409097722421e-06, "loss": 1.1146, "step": 2814 }, { "epoch": 0.5627045800954499, "grad_norm": 2.0, "learning_rate": 9.203520442607958e-06, "loss": 1.01, "step": 2815 }, { "epoch": 0.562904475150546, "grad_norm": 1.9609375, "learning_rate": 9.202949721271779e-06, "loss": 0.963, "step": 2816 }, { "epoch": 0.5631043702056421, "grad_norm": 2.046875, "learning_rate": 9.20237881324102e-06, "loss": 1.0704, "step": 2817 }, { "epoch": 0.5633042652607381, "grad_norm": 2.140625, "learning_rate": 9.201807718541046e-06, "loss": 1.0934, "step": 2818 }, { "epoch": 0.5635041603158342, "grad_norm": 2.109375, "learning_rate": 9.201236437197219e-06, "loss": 1.1211, "step": 2819 }, { "epoch": 0.5637040553709303, "grad_norm": 2.09375, "learning_rate": 9.200664969234919e-06, "loss": 1.0596, "step": 2820 }, { "epoch": 0.5639039504260264, "grad_norm": 2.109375, "learning_rate": 9.20009331467953e-06, "loss": 1.0909, "step": 2821 }, { "epoch": 0.5641038454811224, "grad_norm": 2.171875, "learning_rate": 9.199521473556444e-06, "loss": 1.0574, "step": 2822 }, { "epoch": 0.5643037405362185, "grad_norm": 2.015625, "learning_rate": 9.198949445891064e-06, "loss": 1.0427, "step": 2823 }, { "epoch": 0.5645036355913146, "grad_norm": 1.984375, "learning_rate": 9.198377231708795e-06, "loss": 1.0195, "step": 2824 }, { "epoch": 0.5647035306464107, "grad_norm": 2.25, "learning_rate": 9.197804831035061e-06, "loss": 1.1618, "step": 2825 }, { "epoch": 0.5649034257015068, "grad_norm": 2.15625, "learning_rate": 9.197232243895285e-06, "loss": 1.0371, "step": 2826 }, { "epoch": 0.5651033207566027, "grad_norm": 2.140625, "learning_rate": 9.196659470314899e-06, "loss": 1.0595, "step": 2827 }, { "epoch": 0.5653032158116988, "grad_norm": 2.0625, "learning_rate": 9.196086510319348e-06, "loss": 1.014, "step": 2828 }, { "epoch": 0.565503110866795, "grad_norm": 2.234375, "learning_rate": 9.195513363934083e-06, "loss": 1.1328, "step": 2829 }, { "epoch": 0.565703005921891, "grad_norm": 2.09375, "learning_rate": 9.194940031184562e-06, "loss": 1.0148, "step": 2830 }, { "epoch": 0.565902900976987, "grad_norm": 2.15625, "learning_rate": 9.194366512096252e-06, "loss": 1.1294, "step": 2831 }, { "epoch": 0.5661027960320831, "grad_norm": 2.203125, "learning_rate": 9.193792806694631e-06, "loss": 1.0612, "step": 2832 }, { "epoch": 0.5663026910871792, "grad_norm": 2.015625, "learning_rate": 9.19321891500518e-06, "loss": 1.0499, "step": 2833 }, { "epoch": 0.5665025861422753, "grad_norm": 2.140625, "learning_rate": 9.192644837053394e-06, "loss": 1.1479, "step": 2834 }, { "epoch": 0.5667024811973714, "grad_norm": 2.125, "learning_rate": 9.192070572864772e-06, "loss": 1.0428, "step": 2835 }, { "epoch": 0.5669023762524674, "grad_norm": 2.03125, "learning_rate": 9.191496122464822e-06, "loss": 1.0941, "step": 2836 }, { "epoch": 0.5671022713075635, "grad_norm": 2.046875, "learning_rate": 9.190921485879064e-06, "loss": 1.0604, "step": 2837 }, { "epoch": 0.5673021663626596, "grad_norm": 2.109375, "learning_rate": 9.19034666313302e-06, "loss": 1.1028, "step": 2838 }, { "epoch": 0.5675020614177557, "grad_norm": 2.09375, "learning_rate": 9.189771654252221e-06, "loss": 1.0805, "step": 2839 }, { "epoch": 0.5677019564728517, "grad_norm": 2.234375, "learning_rate": 9.189196459262217e-06, "loss": 1.1518, "step": 2840 }, { "epoch": 0.5679018515279478, "grad_norm": 2.3125, "learning_rate": 9.188621078188554e-06, "loss": 1.1624, "step": 2841 }, { "epoch": 0.5681017465830439, "grad_norm": 2.140625, "learning_rate": 9.188045511056787e-06, "loss": 1.0701, "step": 2842 }, { "epoch": 0.56830164163814, "grad_norm": 2.21875, "learning_rate": 9.187469757892489e-06, "loss": 1.1878, "step": 2843 }, { "epoch": 0.568501536693236, "grad_norm": 2.03125, "learning_rate": 9.18689381872123e-06, "loss": 1.0141, "step": 2844 }, { "epoch": 0.5687014317483321, "grad_norm": 1.9453125, "learning_rate": 9.186317693568594e-06, "loss": 0.999, "step": 2845 }, { "epoch": 0.5689013268034282, "grad_norm": 2.140625, "learning_rate": 9.185741382460173e-06, "loss": 1.0444, "step": 2846 }, { "epoch": 0.5691012218585243, "grad_norm": 2.125, "learning_rate": 9.18516488542157e-06, "loss": 1.078, "step": 2847 }, { "epoch": 0.5693011169136204, "grad_norm": 2.15625, "learning_rate": 9.184588202478387e-06, "loss": 1.1302, "step": 2848 }, { "epoch": 0.5695010119687164, "grad_norm": 2.03125, "learning_rate": 9.184011333656243e-06, "loss": 1.0853, "step": 2849 }, { "epoch": 0.5697009070238125, "grad_norm": 2.09375, "learning_rate": 9.183434278980763e-06, "loss": 1.0982, "step": 2850 }, { "epoch": 0.5699008020789086, "grad_norm": 2.140625, "learning_rate": 9.18285703847758e-06, "loss": 1.1812, "step": 2851 }, { "epoch": 0.5701006971340047, "grad_norm": 2.09375, "learning_rate": 9.182279612172332e-06, "loss": 1.0367, "step": 2852 }, { "epoch": 0.5703005921891007, "grad_norm": 2.03125, "learning_rate": 9.18170200009067e-06, "loss": 1.0112, "step": 2853 }, { "epoch": 0.5705004872441968, "grad_norm": 2.171875, "learning_rate": 9.181124202258254e-06, "loss": 0.9669, "step": 2854 }, { "epoch": 0.5707003822992929, "grad_norm": 1.9921875, "learning_rate": 9.180546218700748e-06, "loss": 0.9886, "step": 2855 }, { "epoch": 0.570900277354389, "grad_norm": 2.078125, "learning_rate": 9.179968049443823e-06, "loss": 1.0388, "step": 2856 }, { "epoch": 0.5711001724094851, "grad_norm": 2.015625, "learning_rate": 9.179389694513166e-06, "loss": 1.0461, "step": 2857 }, { "epoch": 0.5713000674645811, "grad_norm": 2.15625, "learning_rate": 9.178811153934463e-06, "loss": 1.0419, "step": 2858 }, { "epoch": 0.5714999625196772, "grad_norm": 2.125, "learning_rate": 9.178232427733418e-06, "loss": 1.0207, "step": 2859 }, { "epoch": 0.5716998575747733, "grad_norm": 2.046875, "learning_rate": 9.177653515935732e-06, "loss": 1.052, "step": 2860 }, { "epoch": 0.5718997526298694, "grad_norm": 2.078125, "learning_rate": 9.177074418567124e-06, "loss": 1.0429, "step": 2861 }, { "epoch": 0.5720996476849654, "grad_norm": 2.015625, "learning_rate": 9.176495135653315e-06, "loss": 0.9709, "step": 2862 }, { "epoch": 0.5722995427400615, "grad_norm": 2.234375, "learning_rate": 9.175915667220039e-06, "loss": 1.084, "step": 2863 }, { "epoch": 0.5724994377951576, "grad_norm": 1.9609375, "learning_rate": 9.175336013293037e-06, "loss": 1.029, "step": 2864 }, { "epoch": 0.5726993328502537, "grad_norm": 2.0, "learning_rate": 9.174756173898053e-06, "loss": 1.0011, "step": 2865 }, { "epoch": 0.5728992279053496, "grad_norm": 2.125, "learning_rate": 9.174176149060847e-06, "loss": 1.1444, "step": 2866 }, { "epoch": 0.5730991229604457, "grad_norm": 2.0625, "learning_rate": 9.173595938807181e-06, "loss": 1.0194, "step": 2867 }, { "epoch": 0.5732990180155418, "grad_norm": 2.140625, "learning_rate": 9.173015543162828e-06, "loss": 1.0937, "step": 2868 }, { "epoch": 0.5734989130706379, "grad_norm": 2.109375, "learning_rate": 9.172434962153573e-06, "loss": 1.0366, "step": 2869 }, { "epoch": 0.573698808125734, "grad_norm": 2.171875, "learning_rate": 9.171854195805202e-06, "loss": 1.0427, "step": 2870 }, { "epoch": 0.57389870318083, "grad_norm": 2.234375, "learning_rate": 9.171273244143512e-06, "loss": 1.0721, "step": 2871 }, { "epoch": 0.5740985982359261, "grad_norm": 2.046875, "learning_rate": 9.170692107194311e-06, "loss": 1.0075, "step": 2872 }, { "epoch": 0.5742984932910222, "grad_norm": 2.09375, "learning_rate": 9.170110784983413e-06, "loss": 1.0556, "step": 2873 }, { "epoch": 0.5744983883461183, "grad_norm": 2.0625, "learning_rate": 9.169529277536637e-06, "loss": 1.0713, "step": 2874 }, { "epoch": 0.5746982834012143, "grad_norm": 2.03125, "learning_rate": 9.168947584879818e-06, "loss": 0.9023, "step": 2875 }, { "epoch": 0.5748981784563104, "grad_norm": 2.15625, "learning_rate": 9.16836570703879e-06, "loss": 1.1496, "step": 2876 }, { "epoch": 0.5750980735114065, "grad_norm": 2.09375, "learning_rate": 9.167783644039405e-06, "loss": 1.145, "step": 2877 }, { "epoch": 0.5752979685665026, "grad_norm": 2.078125, "learning_rate": 9.167201395907515e-06, "loss": 0.9597, "step": 2878 }, { "epoch": 0.5754978636215987, "grad_norm": 2.109375, "learning_rate": 9.166618962668983e-06, "loss": 1.0683, "step": 2879 }, { "epoch": 0.5756977586766947, "grad_norm": 1.9296875, "learning_rate": 9.166036344349683e-06, "loss": 0.936, "step": 2880 }, { "epoch": 0.5758976537317908, "grad_norm": 2.0625, "learning_rate": 9.165453540975492e-06, "loss": 1.0168, "step": 2881 }, { "epoch": 0.5760975487868869, "grad_norm": 2.015625, "learning_rate": 9.1648705525723e-06, "loss": 1.0268, "step": 2882 }, { "epoch": 0.576297443841983, "grad_norm": 2.09375, "learning_rate": 9.164287379166004e-06, "loss": 1.0488, "step": 2883 }, { "epoch": 0.576497338897079, "grad_norm": 2.09375, "learning_rate": 9.163704020782507e-06, "loss": 1.0319, "step": 2884 }, { "epoch": 0.5766972339521751, "grad_norm": 2.03125, "learning_rate": 9.163120477447722e-06, "loss": 1.0859, "step": 2885 }, { "epoch": 0.5768971290072712, "grad_norm": 2.140625, "learning_rate": 9.16253674918757e-06, "loss": 1.0748, "step": 2886 }, { "epoch": 0.5770970240623673, "grad_norm": 2.03125, "learning_rate": 9.161952836027978e-06, "loss": 1.0619, "step": 2887 }, { "epoch": 0.5772969191174633, "grad_norm": 2.046875, "learning_rate": 9.16136873799489e-06, "loss": 1.0363, "step": 2888 }, { "epoch": 0.5774968141725594, "grad_norm": 1.9453125, "learning_rate": 9.160784455114245e-06, "loss": 0.9707, "step": 2889 }, { "epoch": 0.5776967092276555, "grad_norm": 2.078125, "learning_rate": 9.160199987411997e-06, "loss": 1.1169, "step": 2890 }, { "epoch": 0.5778966042827516, "grad_norm": 2.09375, "learning_rate": 9.159615334914112e-06, "loss": 1.0517, "step": 2891 }, { "epoch": 0.5780964993378477, "grad_norm": 2.1875, "learning_rate": 9.159030497646558e-06, "loss": 1.1246, "step": 2892 }, { "epoch": 0.5782963943929437, "grad_norm": 2.015625, "learning_rate": 9.158445475635312e-06, "loss": 0.9835, "step": 2893 }, { "epoch": 0.5784962894480398, "grad_norm": 2.0625, "learning_rate": 9.157860268906364e-06, "loss": 0.9527, "step": 2894 }, { "epoch": 0.5786961845031359, "grad_norm": 2.171875, "learning_rate": 9.157274877485706e-06, "loss": 1.0521, "step": 2895 }, { "epoch": 0.578896079558232, "grad_norm": 2.109375, "learning_rate": 9.156689301399342e-06, "loss": 1.0526, "step": 2896 }, { "epoch": 0.579095974613328, "grad_norm": 2.078125, "learning_rate": 9.156103540673282e-06, "loss": 1.0322, "step": 2897 }, { "epoch": 0.5792958696684241, "grad_norm": 2.234375, "learning_rate": 9.15551759533355e-06, "loss": 1.0069, "step": 2898 }, { "epoch": 0.5794957647235202, "grad_norm": 2.296875, "learning_rate": 9.154931465406167e-06, "loss": 1.1428, "step": 2899 }, { "epoch": 0.5796956597786163, "grad_norm": 2.609375, "learning_rate": 9.154345150917173e-06, "loss": 1.1547, "step": 2900 }, { "epoch": 0.5798955548337122, "grad_norm": 2.0625, "learning_rate": 9.15375865189261e-06, "loss": 1.1787, "step": 2901 }, { "epoch": 0.5800954498888083, "grad_norm": 2.046875, "learning_rate": 9.153171968358534e-06, "loss": 1.0503, "step": 2902 }, { "epoch": 0.5802953449439044, "grad_norm": 2.03125, "learning_rate": 9.152585100341e-06, "loss": 1.0329, "step": 2903 }, { "epoch": 0.5804952399990005, "grad_norm": 2.078125, "learning_rate": 9.151998047866082e-06, "loss": 1.0738, "step": 2904 }, { "epoch": 0.5806951350540966, "grad_norm": 2.109375, "learning_rate": 9.151410810959853e-06, "loss": 1.0141, "step": 2905 }, { "epoch": 0.5808950301091926, "grad_norm": 2.0625, "learning_rate": 9.150823389648398e-06, "loss": 1.1399, "step": 2906 }, { "epoch": 0.5810949251642887, "grad_norm": 2.046875, "learning_rate": 9.150235783957812e-06, "loss": 1.0416, "step": 2907 }, { "epoch": 0.5812948202193848, "grad_norm": 2.046875, "learning_rate": 9.149647993914196e-06, "loss": 1.0442, "step": 2908 }, { "epoch": 0.5814947152744809, "grad_norm": 2.140625, "learning_rate": 9.149060019543661e-06, "loss": 1.0853, "step": 2909 }, { "epoch": 0.5816946103295769, "grad_norm": 2.046875, "learning_rate": 9.148471860872322e-06, "loss": 0.9844, "step": 2910 }, { "epoch": 0.581894505384673, "grad_norm": 2.1875, "learning_rate": 9.147883517926303e-06, "loss": 1.0472, "step": 2911 }, { "epoch": 0.5820944004397691, "grad_norm": 2.171875, "learning_rate": 9.147294990731746e-06, "loss": 1.1004, "step": 2912 }, { "epoch": 0.5822942954948652, "grad_norm": 2.140625, "learning_rate": 9.146706279314786e-06, "loss": 1.075, "step": 2913 }, { "epoch": 0.5824941905499613, "grad_norm": 2.21875, "learning_rate": 9.146117383701575e-06, "loss": 0.9788, "step": 2914 }, { "epoch": 0.5826940856050573, "grad_norm": 2.125, "learning_rate": 9.145528303918273e-06, "loss": 1.0786, "step": 2915 }, { "epoch": 0.5828939806601534, "grad_norm": 2.171875, "learning_rate": 9.144939039991047e-06, "loss": 1.0987, "step": 2916 }, { "epoch": 0.5830938757152495, "grad_norm": 2.0625, "learning_rate": 9.144349591946072e-06, "loss": 1.0612, "step": 2917 }, { "epoch": 0.5832937707703456, "grad_norm": 2.09375, "learning_rate": 9.143759959809531e-06, "loss": 1.1392, "step": 2918 }, { "epoch": 0.5834936658254416, "grad_norm": 2.0, "learning_rate": 9.143170143607613e-06, "loss": 1.0307, "step": 2919 }, { "epoch": 0.5836935608805377, "grad_norm": 2.203125, "learning_rate": 9.142580143366523e-06, "loss": 1.1968, "step": 2920 }, { "epoch": 0.5838934559356338, "grad_norm": 2.15625, "learning_rate": 9.141989959112464e-06, "loss": 1.0975, "step": 2921 }, { "epoch": 0.5840933509907299, "grad_norm": 2.078125, "learning_rate": 9.141399590871652e-06, "loss": 1.0722, "step": 2922 }, { "epoch": 0.5842932460458259, "grad_norm": 2.171875, "learning_rate": 9.140809038670314e-06, "loss": 1.0535, "step": 2923 }, { "epoch": 0.584493141100922, "grad_norm": 2.078125, "learning_rate": 9.14021830253468e-06, "loss": 1.1045, "step": 2924 }, { "epoch": 0.5846930361560181, "grad_norm": 2.109375, "learning_rate": 9.139627382490992e-06, "loss": 1.0238, "step": 2925 }, { "epoch": 0.5848929312111142, "grad_norm": 2.15625, "learning_rate": 9.1390362785655e-06, "loss": 1.1183, "step": 2926 }, { "epoch": 0.5850928262662103, "grad_norm": 2.109375, "learning_rate": 9.138444990784455e-06, "loss": 1.0808, "step": 2927 }, { "epoch": 0.5852927213213063, "grad_norm": 2.1875, "learning_rate": 9.137853519174124e-06, "loss": 1.0787, "step": 2928 }, { "epoch": 0.5854926163764024, "grad_norm": 2.046875, "learning_rate": 9.137261863760784e-06, "loss": 1.0622, "step": 2929 }, { "epoch": 0.5856925114314985, "grad_norm": 2.1875, "learning_rate": 9.136670024570715e-06, "loss": 1.0653, "step": 2930 }, { "epoch": 0.5858924064865946, "grad_norm": 2.125, "learning_rate": 9.136078001630205e-06, "loss": 1.1184, "step": 2931 }, { "epoch": 0.5860923015416906, "grad_norm": 1.9375, "learning_rate": 9.13548579496555e-06, "loss": 0.9863, "step": 2932 }, { "epoch": 0.5862921965967867, "grad_norm": 2.09375, "learning_rate": 9.13489340460306e-06, "loss": 1.1144, "step": 2933 }, { "epoch": 0.5864920916518828, "grad_norm": 2.15625, "learning_rate": 9.134300830569046e-06, "loss": 1.1486, "step": 2934 }, { "epoch": 0.5866919867069789, "grad_norm": 2.203125, "learning_rate": 9.133708072889828e-06, "loss": 1.0694, "step": 2935 }, { "epoch": 0.586891881762075, "grad_norm": 2.078125, "learning_rate": 9.133115131591744e-06, "loss": 1.1313, "step": 2936 }, { "epoch": 0.587091776817171, "grad_norm": 2.0625, "learning_rate": 9.132522006701123e-06, "loss": 0.9837, "step": 2937 }, { "epoch": 0.587291671872267, "grad_norm": 2.265625, "learning_rate": 9.131928698244317e-06, "loss": 1.2114, "step": 2938 }, { "epoch": 0.5874915669273632, "grad_norm": 2.125, "learning_rate": 9.13133520624768e-06, "loss": 0.9378, "step": 2939 }, { "epoch": 0.5876914619824593, "grad_norm": 2.03125, "learning_rate": 9.130741530737573e-06, "loss": 1.0948, "step": 2940 }, { "epoch": 0.5878913570375552, "grad_norm": 2.046875, "learning_rate": 9.130147671740371e-06, "loss": 1.0223, "step": 2941 }, { "epoch": 0.5880912520926513, "grad_norm": 2.078125, "learning_rate": 9.129553629282448e-06, "loss": 1.1147, "step": 2942 }, { "epoch": 0.5882911471477474, "grad_norm": 2.109375, "learning_rate": 9.128959403390195e-06, "loss": 1.0553, "step": 2943 }, { "epoch": 0.5884910422028435, "grad_norm": 2.0625, "learning_rate": 9.128364994090007e-06, "loss": 1.0579, "step": 2944 }, { "epoch": 0.5886909372579395, "grad_norm": 2.1875, "learning_rate": 9.127770401408284e-06, "loss": 1.097, "step": 2945 }, { "epoch": 0.5888908323130356, "grad_norm": 2.125, "learning_rate": 9.127175625371443e-06, "loss": 1.0519, "step": 2946 }, { "epoch": 0.5890907273681317, "grad_norm": 2.15625, "learning_rate": 9.126580666005901e-06, "loss": 1.0373, "step": 2947 }, { "epoch": 0.5892906224232278, "grad_norm": 2.125, "learning_rate": 9.125985523338088e-06, "loss": 1.0299, "step": 2948 }, { "epoch": 0.5894905174783239, "grad_norm": 2.046875, "learning_rate": 9.125390197394437e-06, "loss": 1.0801, "step": 2949 }, { "epoch": 0.5896904125334199, "grad_norm": 2.078125, "learning_rate": 9.124794688201394e-06, "loss": 1.1661, "step": 2950 }, { "epoch": 0.589890307588516, "grad_norm": 2.015625, "learning_rate": 9.124198995785414e-06, "loss": 1.1123, "step": 2951 }, { "epoch": 0.5900902026436121, "grad_norm": 2.0, "learning_rate": 9.123603120172952e-06, "loss": 0.9848, "step": 2952 }, { "epoch": 0.5902900976987082, "grad_norm": 2.171875, "learning_rate": 9.123007061390481e-06, "loss": 0.9996, "step": 2953 }, { "epoch": 0.5904899927538042, "grad_norm": 2.0625, "learning_rate": 9.122410819464476e-06, "loss": 1.1073, "step": 2954 }, { "epoch": 0.5906898878089003, "grad_norm": 1.984375, "learning_rate": 9.121814394421423e-06, "loss": 0.9456, "step": 2955 }, { "epoch": 0.5908897828639964, "grad_norm": 2.046875, "learning_rate": 9.121217786287815e-06, "loss": 1.0559, "step": 2956 }, { "epoch": 0.5910896779190925, "grad_norm": 2.09375, "learning_rate": 9.120620995090154e-06, "loss": 0.9933, "step": 2957 }, { "epoch": 0.5912895729741886, "grad_norm": 1.9765625, "learning_rate": 9.120024020854947e-06, "loss": 1.0785, "step": 2958 }, { "epoch": 0.5914894680292846, "grad_norm": 2.140625, "learning_rate": 9.119426863608713e-06, "loss": 1.1198, "step": 2959 }, { "epoch": 0.5916893630843807, "grad_norm": 1.984375, "learning_rate": 9.11882952337798e-06, "loss": 0.9539, "step": 2960 }, { "epoch": 0.5918892581394768, "grad_norm": 1.8671875, "learning_rate": 9.118232000189277e-06, "loss": 0.9188, "step": 2961 }, { "epoch": 0.5920891531945729, "grad_norm": 2.109375, "learning_rate": 9.117634294069148e-06, "loss": 1.1255, "step": 2962 }, { "epoch": 0.5922890482496689, "grad_norm": 2.046875, "learning_rate": 9.117036405044146e-06, "loss": 1.1125, "step": 2963 }, { "epoch": 0.592488943304765, "grad_norm": 2.078125, "learning_rate": 9.116438333140825e-06, "loss": 1.0321, "step": 2964 }, { "epoch": 0.5926888383598611, "grad_norm": 2.171875, "learning_rate": 9.115840078385753e-06, "loss": 1.0814, "step": 2965 }, { "epoch": 0.5928887334149572, "grad_norm": 2.078125, "learning_rate": 9.115241640805506e-06, "loss": 1.0344, "step": 2966 }, { "epoch": 0.5930886284700532, "grad_norm": 2.109375, "learning_rate": 9.114643020426662e-06, "loss": 1.0181, "step": 2967 }, { "epoch": 0.5932885235251493, "grad_norm": 2.109375, "learning_rate": 9.114044217275816e-06, "loss": 1.0138, "step": 2968 }, { "epoch": 0.5934884185802454, "grad_norm": 2.25, "learning_rate": 9.113445231379565e-06, "loss": 1.1072, "step": 2969 }, { "epoch": 0.5936883136353415, "grad_norm": 2.078125, "learning_rate": 9.112846062764516e-06, "loss": 0.9869, "step": 2970 }, { "epoch": 0.5938882086904376, "grad_norm": 2.109375, "learning_rate": 9.112246711457284e-06, "loss": 0.998, "step": 2971 }, { "epoch": 0.5940881037455336, "grad_norm": 2.203125, "learning_rate": 9.111647177484493e-06, "loss": 1.0892, "step": 2972 }, { "epoch": 0.5942879988006297, "grad_norm": 2.3125, "learning_rate": 9.111047460872773e-06, "loss": 1.1074, "step": 2973 }, { "epoch": 0.5944878938557258, "grad_norm": 2.078125, "learning_rate": 9.110447561648766e-06, "loss": 1.0628, "step": 2974 }, { "epoch": 0.5946877889108219, "grad_norm": 2.0625, "learning_rate": 9.109847479839114e-06, "loss": 1.0733, "step": 2975 }, { "epoch": 0.5948876839659178, "grad_norm": 1.984375, "learning_rate": 9.109247215470478e-06, "loss": 1.0749, "step": 2976 }, { "epoch": 0.595087579021014, "grad_norm": 2.078125, "learning_rate": 9.108646768569518e-06, "loss": 0.9976, "step": 2977 }, { "epoch": 0.59528747407611, "grad_norm": 2.125, "learning_rate": 9.108046139162908e-06, "loss": 1.0781, "step": 2978 }, { "epoch": 0.5954873691312061, "grad_norm": 2.046875, "learning_rate": 9.107445327277327e-06, "loss": 1.1034, "step": 2979 }, { "epoch": 0.5956872641863022, "grad_norm": 1.953125, "learning_rate": 9.106844332939464e-06, "loss": 0.968, "step": 2980 }, { "epoch": 0.5958871592413982, "grad_norm": 2.09375, "learning_rate": 9.106243156176015e-06, "loss": 1.0788, "step": 2981 }, { "epoch": 0.5960870542964943, "grad_norm": 1.96875, "learning_rate": 9.105641797013682e-06, "loss": 1.058, "step": 2982 }, { "epoch": 0.5962869493515904, "grad_norm": 1.96875, "learning_rate": 9.10504025547918e-06, "loss": 1.0087, "step": 2983 }, { "epoch": 0.5964868444066865, "grad_norm": 2.21875, "learning_rate": 9.104438531599227e-06, "loss": 1.013, "step": 2984 }, { "epoch": 0.5966867394617825, "grad_norm": 2.046875, "learning_rate": 9.103836625400554e-06, "loss": 0.9962, "step": 2985 }, { "epoch": 0.5968866345168786, "grad_norm": 2.15625, "learning_rate": 9.103234536909895e-06, "loss": 1.0629, "step": 2986 }, { "epoch": 0.5970865295719747, "grad_norm": 2.09375, "learning_rate": 9.102632266153997e-06, "loss": 1.1041, "step": 2987 }, { "epoch": 0.5972864246270708, "grad_norm": 2.015625, "learning_rate": 9.102029813159613e-06, "loss": 0.9685, "step": 2988 }, { "epoch": 0.5974863196821668, "grad_norm": 2.03125, "learning_rate": 9.101427177953502e-06, "loss": 0.9442, "step": 2989 }, { "epoch": 0.5976862147372629, "grad_norm": 2.171875, "learning_rate": 9.100824360562432e-06, "loss": 0.9826, "step": 2990 }, { "epoch": 0.597886109792359, "grad_norm": 2.03125, "learning_rate": 9.100221361013185e-06, "loss": 0.9846, "step": 2991 }, { "epoch": 0.5980860048474551, "grad_norm": 2.140625, "learning_rate": 9.099618179332541e-06, "loss": 1.1631, "step": 2992 }, { "epoch": 0.5982858999025512, "grad_norm": 2.171875, "learning_rate": 9.099014815547296e-06, "loss": 1.1305, "step": 2993 }, { "epoch": 0.5984857949576472, "grad_norm": 2.0625, "learning_rate": 9.098411269684251e-06, "loss": 0.9717, "step": 2994 }, { "epoch": 0.5986856900127433, "grad_norm": 2.21875, "learning_rate": 9.097807541770214e-06, "loss": 1.1103, "step": 2995 }, { "epoch": 0.5988855850678394, "grad_norm": 2.046875, "learning_rate": 9.097203631832006e-06, "loss": 1.0348, "step": 2996 }, { "epoch": 0.5990854801229355, "grad_norm": 2.265625, "learning_rate": 9.096599539896447e-06, "loss": 1.1009, "step": 2997 }, { "epoch": 0.5992853751780315, "grad_norm": 2.109375, "learning_rate": 9.095995265990375e-06, "loss": 1.0759, "step": 2998 }, { "epoch": 0.5994852702331276, "grad_norm": 2.15625, "learning_rate": 9.095390810140633e-06, "loss": 1.0881, "step": 2999 }, { "epoch": 0.5996851652882237, "grad_norm": 2.125, "learning_rate": 9.094786172374066e-06, "loss": 1.0333, "step": 3000 }, { "epoch": 0.5996851652882237, "eval_loss": 0.9266427159309387, "eval_runtime": 594.7722, "eval_samples_per_second": 3.595, "eval_steps_per_second": 3.595, "step": 3000 }, { "epoch": 0.5998850603433198, "grad_norm": 1.9921875, "learning_rate": 9.094181352717535e-06, "loss": 1.0592, "step": 3001 }, { "epoch": 0.6000849553984159, "grad_norm": 2.234375, "learning_rate": 9.093576351197907e-06, "loss": 1.1619, "step": 3002 }, { "epoch": 0.6002848504535119, "grad_norm": 2.15625, "learning_rate": 9.092971167842053e-06, "loss": 1.0548, "step": 3003 }, { "epoch": 0.600484745508608, "grad_norm": 2.328125, "learning_rate": 9.092365802676858e-06, "loss": 1.1491, "step": 3004 }, { "epoch": 0.6006846405637041, "grad_norm": 2.140625, "learning_rate": 9.091760255729212e-06, "loss": 1.0809, "step": 3005 }, { "epoch": 0.6008845356188002, "grad_norm": 2.078125, "learning_rate": 9.09115452702601e-06, "loss": 1.0501, "step": 3006 }, { "epoch": 0.6010844306738962, "grad_norm": 2.234375, "learning_rate": 9.090548616594163e-06, "loss": 1.0777, "step": 3007 }, { "epoch": 0.6012843257289923, "grad_norm": 2.1875, "learning_rate": 9.089942524460582e-06, "loss": 1.0544, "step": 3008 }, { "epoch": 0.6014842207840884, "grad_norm": 2.0625, "learning_rate": 9.08933625065219e-06, "loss": 1.0639, "step": 3009 }, { "epoch": 0.6016841158391845, "grad_norm": 2.046875, "learning_rate": 9.088729795195921e-06, "loss": 1.0678, "step": 3010 }, { "epoch": 0.6018840108942805, "grad_norm": 2.140625, "learning_rate": 9.08812315811871e-06, "loss": 1.0587, "step": 3011 }, { "epoch": 0.6020839059493766, "grad_norm": 2.0625, "learning_rate": 9.087516339447504e-06, "loss": 1.0833, "step": 3012 }, { "epoch": 0.6022838010044727, "grad_norm": 2.28125, "learning_rate": 9.08690933920926e-06, "loss": 1.1271, "step": 3013 }, { "epoch": 0.6024836960595688, "grad_norm": 1.9453125, "learning_rate": 9.08630215743094e-06, "loss": 0.9931, "step": 3014 }, { "epoch": 0.6026835911146649, "grad_norm": 2.25, "learning_rate": 9.085694794139514e-06, "loss": 1.0981, "step": 3015 }, { "epoch": 0.6028834861697608, "grad_norm": 2.015625, "learning_rate": 9.085087249361961e-06, "loss": 1.0819, "step": 3016 }, { "epoch": 0.6030833812248569, "grad_norm": 2.03125, "learning_rate": 9.08447952312527e-06, "loss": 0.9643, "step": 3017 }, { "epoch": 0.603283276279953, "grad_norm": 2.15625, "learning_rate": 9.083871615456433e-06, "loss": 1.0773, "step": 3018 }, { "epoch": 0.6034831713350491, "grad_norm": 2.046875, "learning_rate": 9.083263526382457e-06, "loss": 0.9463, "step": 3019 }, { "epoch": 0.6036830663901451, "grad_norm": 2.21875, "learning_rate": 9.08265525593035e-06, "loss": 1.0375, "step": 3020 }, { "epoch": 0.6038829614452412, "grad_norm": 1.9765625, "learning_rate": 9.082046804127133e-06, "loss": 0.9687, "step": 3021 }, { "epoch": 0.6040828565003373, "grad_norm": 2.140625, "learning_rate": 9.081438170999833e-06, "loss": 1.1131, "step": 3022 }, { "epoch": 0.6042827515554334, "grad_norm": 2.0625, "learning_rate": 9.080829356575484e-06, "loss": 0.963, "step": 3023 }, { "epoch": 0.6044826466105294, "grad_norm": 2.140625, "learning_rate": 9.080220360881133e-06, "loss": 1.1095, "step": 3024 }, { "epoch": 0.6046825416656255, "grad_norm": 1.9921875, "learning_rate": 9.079611183943828e-06, "loss": 1.0107, "step": 3025 }, { "epoch": 0.6048824367207216, "grad_norm": 2.171875, "learning_rate": 9.079001825790632e-06, "loss": 1.0581, "step": 3026 }, { "epoch": 0.6050823317758177, "grad_norm": 2.203125, "learning_rate": 9.078392286448607e-06, "loss": 1.1935, "step": 3027 }, { "epoch": 0.6052822268309138, "grad_norm": 2.09375, "learning_rate": 9.077782565944836e-06, "loss": 1.0421, "step": 3028 }, { "epoch": 0.6054821218860098, "grad_norm": 2.25, "learning_rate": 9.077172664306396e-06, "loss": 1.1599, "step": 3029 }, { "epoch": 0.6056820169411059, "grad_norm": 2.234375, "learning_rate": 9.076562581560384e-06, "loss": 1.0622, "step": 3030 }, { "epoch": 0.605881911996202, "grad_norm": 2.203125, "learning_rate": 9.075952317733894e-06, "loss": 1.0438, "step": 3031 }, { "epoch": 0.6060818070512981, "grad_norm": 2.0625, "learning_rate": 9.07534187285404e-06, "loss": 0.9847, "step": 3032 }, { "epoch": 0.6062817021063941, "grad_norm": 2.140625, "learning_rate": 9.074731246947936e-06, "loss": 1.0763, "step": 3033 }, { "epoch": 0.6064815971614902, "grad_norm": 2.0, "learning_rate": 9.074120440042705e-06, "loss": 0.954, "step": 3034 }, { "epoch": 0.6066814922165863, "grad_norm": 2.0625, "learning_rate": 9.073509452165476e-06, "loss": 1.0371, "step": 3035 }, { "epoch": 0.6068813872716824, "grad_norm": 2.078125, "learning_rate": 9.072898283343395e-06, "loss": 1.0441, "step": 3036 }, { "epoch": 0.6070812823267785, "grad_norm": 2.328125, "learning_rate": 9.072286933603607e-06, "loss": 1.1834, "step": 3037 }, { "epoch": 0.6072811773818745, "grad_norm": 2.015625, "learning_rate": 9.071675402973268e-06, "loss": 1.1353, "step": 3038 }, { "epoch": 0.6074810724369706, "grad_norm": 2.09375, "learning_rate": 9.071063691479542e-06, "loss": 1.0175, "step": 3039 }, { "epoch": 0.6076809674920667, "grad_norm": 2.046875, "learning_rate": 9.070451799149604e-06, "loss": 1.003, "step": 3040 }, { "epoch": 0.6078808625471628, "grad_norm": 2.421875, "learning_rate": 9.069839726010629e-06, "loss": 1.2073, "step": 3041 }, { "epoch": 0.6080807576022588, "grad_norm": 2.265625, "learning_rate": 9.06922747208981e-06, "loss": 1.0623, "step": 3042 }, { "epoch": 0.6082806526573549, "grad_norm": 2.046875, "learning_rate": 9.068615037414339e-06, "loss": 1.0947, "step": 3043 }, { "epoch": 0.608480547712451, "grad_norm": 2.03125, "learning_rate": 9.068002422011426e-06, "loss": 0.8832, "step": 3044 }, { "epoch": 0.6086804427675471, "grad_norm": 2.0625, "learning_rate": 9.067389625908277e-06, "loss": 1.0155, "step": 3045 }, { "epoch": 0.6088803378226431, "grad_norm": 2.265625, "learning_rate": 9.066776649132116e-06, "loss": 1.1535, "step": 3046 }, { "epoch": 0.6090802328777392, "grad_norm": 2.171875, "learning_rate": 9.06616349171017e-06, "loss": 1.0368, "step": 3047 }, { "epoch": 0.6092801279328353, "grad_norm": 2.234375, "learning_rate": 9.065550153669676e-06, "loss": 1.1035, "step": 3048 }, { "epoch": 0.6094800229879314, "grad_norm": 2.0, "learning_rate": 9.06493663503788e-06, "loss": 1.0604, "step": 3049 }, { "epoch": 0.6096799180430275, "grad_norm": 2.03125, "learning_rate": 9.064322935842032e-06, "loss": 0.999, "step": 3050 }, { "epoch": 0.6098798130981234, "grad_norm": 2.046875, "learning_rate": 9.063709056109393e-06, "loss": 1.0718, "step": 3051 }, { "epoch": 0.6100797081532195, "grad_norm": 2.03125, "learning_rate": 9.063094995867232e-06, "loss": 1.0119, "step": 3052 }, { "epoch": 0.6102796032083156, "grad_norm": 2.0625, "learning_rate": 9.062480755142824e-06, "loss": 0.9728, "step": 3053 }, { "epoch": 0.6104794982634117, "grad_norm": 2.1875, "learning_rate": 9.061866333963455e-06, "loss": 1.0766, "step": 3054 }, { "epoch": 0.6106793933185077, "grad_norm": 2.109375, "learning_rate": 9.06125173235642e-06, "loss": 1.0861, "step": 3055 }, { "epoch": 0.6108792883736038, "grad_norm": 2.0, "learning_rate": 9.060636950349015e-06, "loss": 1.0806, "step": 3056 }, { "epoch": 0.6110791834286999, "grad_norm": 2.0, "learning_rate": 9.06002198796855e-06, "loss": 1.0579, "step": 3057 }, { "epoch": 0.611279078483796, "grad_norm": 2.1875, "learning_rate": 9.059406845242343e-06, "loss": 1.0949, "step": 3058 }, { "epoch": 0.6114789735388921, "grad_norm": 2.0625, "learning_rate": 9.058791522197717e-06, "loss": 0.9956, "step": 3059 }, { "epoch": 0.6116788685939881, "grad_norm": 1.9921875, "learning_rate": 9.058176018862004e-06, "loss": 1.0662, "step": 3060 }, { "epoch": 0.6118787636490842, "grad_norm": 1.984375, "learning_rate": 9.057560335262546e-06, "loss": 1.0139, "step": 3061 }, { "epoch": 0.6120786587041803, "grad_norm": 2.078125, "learning_rate": 9.056944471426692e-06, "loss": 1.0547, "step": 3062 }, { "epoch": 0.6122785537592764, "grad_norm": 2.09375, "learning_rate": 9.056328427381798e-06, "loss": 1.1331, "step": 3063 }, { "epoch": 0.6124784488143724, "grad_norm": 2.203125, "learning_rate": 9.055712203155226e-06, "loss": 1.0904, "step": 3064 }, { "epoch": 0.6126783438694685, "grad_norm": 2.09375, "learning_rate": 9.055095798774353e-06, "loss": 1.0879, "step": 3065 }, { "epoch": 0.6128782389245646, "grad_norm": 2.0, "learning_rate": 9.05447921426656e-06, "loss": 1.0473, "step": 3066 }, { "epoch": 0.6130781339796607, "grad_norm": 1.9453125, "learning_rate": 9.05386244965923e-06, "loss": 1.0045, "step": 3067 }, { "epoch": 0.6132780290347567, "grad_norm": 2.078125, "learning_rate": 9.053245504979764e-06, "loss": 0.9839, "step": 3068 }, { "epoch": 0.6134779240898528, "grad_norm": 2.09375, "learning_rate": 9.052628380255565e-06, "loss": 1.0137, "step": 3069 }, { "epoch": 0.6136778191449489, "grad_norm": 1.9375, "learning_rate": 9.052011075514049e-06, "loss": 0.985, "step": 3070 }, { "epoch": 0.613877714200045, "grad_norm": 2.0625, "learning_rate": 9.051393590782631e-06, "loss": 0.9909, "step": 3071 }, { "epoch": 0.6140776092551411, "grad_norm": 1.9609375, "learning_rate": 9.050775926088743e-06, "loss": 1.0162, "step": 3072 }, { "epoch": 0.6142775043102371, "grad_norm": 2.046875, "learning_rate": 9.050158081459821e-06, "loss": 1.0008, "step": 3073 }, { "epoch": 0.6144773993653332, "grad_norm": 1.953125, "learning_rate": 9.049540056923309e-06, "loss": 1.0254, "step": 3074 }, { "epoch": 0.6146772944204293, "grad_norm": 2.078125, "learning_rate": 9.048921852506662e-06, "loss": 1.0035, "step": 3075 }, { "epoch": 0.6148771894755254, "grad_norm": 2.09375, "learning_rate": 9.048303468237337e-06, "loss": 1.0047, "step": 3076 }, { "epoch": 0.6150770845306214, "grad_norm": 2.25, "learning_rate": 9.047684904142806e-06, "loss": 1.0216, "step": 3077 }, { "epoch": 0.6152769795857175, "grad_norm": 2.203125, "learning_rate": 9.047066160250542e-06, "loss": 1.0792, "step": 3078 }, { "epoch": 0.6154768746408136, "grad_norm": 1.96875, "learning_rate": 9.046447236588032e-06, "loss": 0.9194, "step": 3079 }, { "epoch": 0.6156767696959097, "grad_norm": 2.21875, "learning_rate": 9.045828133182769e-06, "loss": 1.0588, "step": 3080 }, { "epoch": 0.6158766647510058, "grad_norm": 2.03125, "learning_rate": 9.045208850062252e-06, "loss": 0.926, "step": 3081 }, { "epoch": 0.6160765598061018, "grad_norm": 3.609375, "learning_rate": 9.044589387253988e-06, "loss": 1.0148, "step": 3082 }, { "epoch": 0.6162764548611979, "grad_norm": 2.203125, "learning_rate": 9.043969744785498e-06, "loss": 1.0506, "step": 3083 }, { "epoch": 0.616476349916294, "grad_norm": 2.015625, "learning_rate": 9.043349922684302e-06, "loss": 1.0651, "step": 3084 }, { "epoch": 0.6166762449713901, "grad_norm": 2.234375, "learning_rate": 9.042729920977936e-06, "loss": 1.0557, "step": 3085 }, { "epoch": 0.616876140026486, "grad_norm": 2.09375, "learning_rate": 9.042109739693938e-06, "loss": 1.0763, "step": 3086 }, { "epoch": 0.6170760350815822, "grad_norm": 2.09375, "learning_rate": 9.041489378859856e-06, "loss": 1.0005, "step": 3087 }, { "epoch": 0.6172759301366783, "grad_norm": 2.109375, "learning_rate": 9.040868838503247e-06, "loss": 1.0142, "step": 3088 }, { "epoch": 0.6174758251917744, "grad_norm": 2.046875, "learning_rate": 9.040248118651677e-06, "loss": 1.0612, "step": 3089 }, { "epoch": 0.6176757202468703, "grad_norm": 2.046875, "learning_rate": 9.039627219332718e-06, "loss": 1.0389, "step": 3090 }, { "epoch": 0.6178756153019664, "grad_norm": 2.0, "learning_rate": 9.03900614057395e-06, "loss": 0.9776, "step": 3091 }, { "epoch": 0.6180755103570625, "grad_norm": 2.125, "learning_rate": 9.038384882402957e-06, "loss": 0.9351, "step": 3092 }, { "epoch": 0.6182754054121586, "grad_norm": 2.09375, "learning_rate": 9.037763444847342e-06, "loss": 1.0853, "step": 3093 }, { "epoch": 0.6184753004672547, "grad_norm": 2.15625, "learning_rate": 9.037141827934705e-06, "loss": 1.0996, "step": 3094 }, { "epoch": 0.6186751955223507, "grad_norm": 2.140625, "learning_rate": 9.036520031692658e-06, "loss": 0.9808, "step": 3095 }, { "epoch": 0.6188750905774468, "grad_norm": 2.15625, "learning_rate": 9.035898056148824e-06, "loss": 1.0767, "step": 3096 }, { "epoch": 0.6190749856325429, "grad_norm": 2.125, "learning_rate": 9.035275901330828e-06, "loss": 0.9893, "step": 3097 }, { "epoch": 0.619274880687639, "grad_norm": 2.71875, "learning_rate": 9.03465356726631e-06, "loss": 1.0129, "step": 3098 }, { "epoch": 0.619474775742735, "grad_norm": 2.015625, "learning_rate": 9.034031053982909e-06, "loss": 1.0106, "step": 3099 }, { "epoch": 0.6196746707978311, "grad_norm": 2.046875, "learning_rate": 9.03340836150828e-06, "loss": 0.9799, "step": 3100 }, { "epoch": 0.6198745658529272, "grad_norm": 2.03125, "learning_rate": 9.032785489870084e-06, "loss": 0.9461, "step": 3101 }, { "epoch": 0.6200744609080233, "grad_norm": 2.109375, "learning_rate": 9.032162439095984e-06, "loss": 1.1415, "step": 3102 }, { "epoch": 0.6202743559631194, "grad_norm": 2.21875, "learning_rate": 9.031539209213662e-06, "loss": 1.1099, "step": 3103 }, { "epoch": 0.6204742510182154, "grad_norm": 2.09375, "learning_rate": 9.030915800250797e-06, "loss": 1.03, "step": 3104 }, { "epoch": 0.6206741460733115, "grad_norm": 2.09375, "learning_rate": 9.030292212235083e-06, "loss": 1.0174, "step": 3105 }, { "epoch": 0.6208740411284076, "grad_norm": 2.171875, "learning_rate": 9.02966844519422e-06, "loss": 1.0381, "step": 3106 }, { "epoch": 0.6210739361835037, "grad_norm": 2.09375, "learning_rate": 9.029044499155914e-06, "loss": 0.9533, "step": 3107 }, { "epoch": 0.6212738312385997, "grad_norm": 2.125, "learning_rate": 9.028420374147885e-06, "loss": 0.9208, "step": 3108 }, { "epoch": 0.6214737262936958, "grad_norm": 2.140625, "learning_rate": 9.02779607019785e-06, "loss": 1.0589, "step": 3109 }, { "epoch": 0.6216736213487919, "grad_norm": 2.09375, "learning_rate": 9.027171587333543e-06, "loss": 1.0516, "step": 3110 }, { "epoch": 0.621873516403888, "grad_norm": 2.03125, "learning_rate": 9.026546925582707e-06, "loss": 1.038, "step": 3111 }, { "epoch": 0.622073411458984, "grad_norm": 2.046875, "learning_rate": 9.025922084973084e-06, "loss": 1.0842, "step": 3112 }, { "epoch": 0.6222733065140801, "grad_norm": 2.171875, "learning_rate": 9.025297065532435e-06, "loss": 1.1177, "step": 3113 }, { "epoch": 0.6224732015691762, "grad_norm": 2.25, "learning_rate": 9.024671867288518e-06, "loss": 1.1309, "step": 3114 }, { "epoch": 0.6226730966242723, "grad_norm": 2.203125, "learning_rate": 9.024046490269107e-06, "loss": 1.1373, "step": 3115 }, { "epoch": 0.6228729916793684, "grad_norm": 2.078125, "learning_rate": 9.023420934501981e-06, "loss": 1.0819, "step": 3116 }, { "epoch": 0.6230728867344644, "grad_norm": 2.09375, "learning_rate": 9.022795200014927e-06, "loss": 0.9935, "step": 3117 }, { "epoch": 0.6232727817895605, "grad_norm": 2.015625, "learning_rate": 9.022169286835737e-06, "loss": 0.9575, "step": 3118 }, { "epoch": 0.6234726768446566, "grad_norm": 2.125, "learning_rate": 9.02154319499222e-06, "loss": 1.0777, "step": 3119 }, { "epoch": 0.6236725718997527, "grad_norm": 2.109375, "learning_rate": 9.020916924512183e-06, "loss": 1.0401, "step": 3120 }, { "epoch": 0.6238724669548487, "grad_norm": 2.046875, "learning_rate": 9.020290475423447e-06, "loss": 0.9766, "step": 3121 }, { "epoch": 0.6240723620099448, "grad_norm": 2.09375, "learning_rate": 9.019663847753837e-06, "loss": 1.0157, "step": 3122 }, { "epoch": 0.6242722570650409, "grad_norm": 2.265625, "learning_rate": 9.019037041531187e-06, "loss": 0.9897, "step": 3123 }, { "epoch": 0.624472152120137, "grad_norm": 2.109375, "learning_rate": 9.01841005678334e-06, "loss": 0.974, "step": 3124 }, { "epoch": 0.624672047175233, "grad_norm": 2.078125, "learning_rate": 9.017782893538149e-06, "loss": 1.0121, "step": 3125 }, { "epoch": 0.624871942230329, "grad_norm": 2.046875, "learning_rate": 9.01715555182347e-06, "loss": 1.0159, "step": 3126 }, { "epoch": 0.6250718372854251, "grad_norm": 2.09375, "learning_rate": 9.016528031667173e-06, "loss": 1.0378, "step": 3127 }, { "epoch": 0.6252717323405212, "grad_norm": 2.0625, "learning_rate": 9.015900333097127e-06, "loss": 1.0614, "step": 3128 }, { "epoch": 0.6254716273956173, "grad_norm": 2.09375, "learning_rate": 9.015272456141218e-06, "loss": 1.041, "step": 3129 }, { "epoch": 0.6256715224507133, "grad_norm": 2.015625, "learning_rate": 9.014644400827336e-06, "loss": 1.0148, "step": 3130 }, { "epoch": 0.6258714175058094, "grad_norm": 2.171875, "learning_rate": 9.014016167183378e-06, "loss": 1.0268, "step": 3131 }, { "epoch": 0.6260713125609055, "grad_norm": 2.1875, "learning_rate": 9.013387755237251e-06, "loss": 0.9141, "step": 3132 }, { "epoch": 0.6262712076160016, "grad_norm": 2.203125, "learning_rate": 9.012759165016867e-06, "loss": 1.0393, "step": 3133 }, { "epoch": 0.6264711026710976, "grad_norm": 2.140625, "learning_rate": 9.01213039655015e-06, "loss": 1.0668, "step": 3134 }, { "epoch": 0.6266709977261937, "grad_norm": 2.09375, "learning_rate": 9.01150144986503e-06, "loss": 1.0435, "step": 3135 }, { "epoch": 0.6268708927812898, "grad_norm": 2.21875, "learning_rate": 9.010872324989444e-06, "loss": 1.0377, "step": 3136 }, { "epoch": 0.6270707878363859, "grad_norm": 2.171875, "learning_rate": 9.010243021951338e-06, "loss": 1.021, "step": 3137 }, { "epoch": 0.627270682891482, "grad_norm": 2.0, "learning_rate": 9.009613540778666e-06, "loss": 0.9202, "step": 3138 }, { "epoch": 0.627470577946578, "grad_norm": 2.21875, "learning_rate": 9.008983881499387e-06, "loss": 1.0746, "step": 3139 }, { "epoch": 0.6276704730016741, "grad_norm": 1.9609375, "learning_rate": 9.008354044141471e-06, "loss": 0.9922, "step": 3140 }, { "epoch": 0.6278703680567702, "grad_norm": 2.15625, "learning_rate": 9.0077240287329e-06, "loss": 1.1474, "step": 3141 }, { "epoch": 0.6280702631118663, "grad_norm": 2.0, "learning_rate": 9.007093835301652e-06, "loss": 1.0599, "step": 3142 }, { "epoch": 0.6282701581669623, "grad_norm": 2.0625, "learning_rate": 9.006463463875728e-06, "loss": 1.044, "step": 3143 }, { "epoch": 0.6284700532220584, "grad_norm": 2.0, "learning_rate": 9.005832914483121e-06, "loss": 0.971, "step": 3144 }, { "epoch": 0.6286699482771545, "grad_norm": 1.9765625, "learning_rate": 9.005202187151845e-06, "loss": 1.0, "step": 3145 }, { "epoch": 0.6288698433322506, "grad_norm": 2.0, "learning_rate": 9.004571281909918e-06, "loss": 1.0634, "step": 3146 }, { "epoch": 0.6290697383873466, "grad_norm": 2.109375, "learning_rate": 9.00394019878536e-06, "loss": 0.9944, "step": 3147 }, { "epoch": 0.6292696334424427, "grad_norm": 2.1875, "learning_rate": 9.003308937806206e-06, "loss": 1.077, "step": 3148 }, { "epoch": 0.6294695284975388, "grad_norm": 2.140625, "learning_rate": 9.002677499000496e-06, "loss": 1.1407, "step": 3149 }, { "epoch": 0.6296694235526349, "grad_norm": 2.046875, "learning_rate": 9.002045882396279e-06, "loss": 1.0637, "step": 3150 }, { "epoch": 0.629869318607731, "grad_norm": 2.171875, "learning_rate": 9.001414088021612e-06, "loss": 1.0534, "step": 3151 }, { "epoch": 0.630069213662827, "grad_norm": 1.9296875, "learning_rate": 9.00078211590456e-06, "loss": 1.0054, "step": 3152 }, { "epoch": 0.6302691087179231, "grad_norm": 2.015625, "learning_rate": 9.000149966073192e-06, "loss": 0.9175, "step": 3153 }, { "epoch": 0.6304690037730192, "grad_norm": 2.125, "learning_rate": 8.99951763855559e-06, "loss": 1.1064, "step": 3154 }, { "epoch": 0.6306688988281153, "grad_norm": 2.0625, "learning_rate": 8.998885133379842e-06, "loss": 1.07, "step": 3155 }, { "epoch": 0.6308687938832113, "grad_norm": 2.125, "learning_rate": 8.998252450574044e-06, "loss": 1.0637, "step": 3156 }, { "epoch": 0.6310686889383074, "grad_norm": 2.078125, "learning_rate": 8.997619590166298e-06, "loss": 1.1576, "step": 3157 }, { "epoch": 0.6312685839934035, "grad_norm": 2.09375, "learning_rate": 8.996986552184716e-06, "loss": 1.0527, "step": 3158 }, { "epoch": 0.6314684790484996, "grad_norm": 2.03125, "learning_rate": 8.996353336657421e-06, "loss": 1.0808, "step": 3159 }, { "epoch": 0.6316683741035957, "grad_norm": 2.21875, "learning_rate": 8.995719943612535e-06, "loss": 1.2639, "step": 3160 }, { "epoch": 0.6318682691586917, "grad_norm": 1.9140625, "learning_rate": 8.995086373078197e-06, "loss": 0.948, "step": 3161 }, { "epoch": 0.6320681642137878, "grad_norm": 1.921875, "learning_rate": 8.99445262508255e-06, "loss": 0.9227, "step": 3162 }, { "epoch": 0.6322680592688839, "grad_norm": 2.03125, "learning_rate": 8.993818699653742e-06, "loss": 1.0668, "step": 3163 }, { "epoch": 0.63246795432398, "grad_norm": 2.203125, "learning_rate": 8.993184596819935e-06, "loss": 1.103, "step": 3164 }, { "epoch": 0.6326678493790759, "grad_norm": 2.125, "learning_rate": 8.992550316609294e-06, "loss": 1.0526, "step": 3165 }, { "epoch": 0.632867744434172, "grad_norm": 2.3125, "learning_rate": 8.991915859049996e-06, "loss": 1.105, "step": 3166 }, { "epoch": 0.6330676394892681, "grad_norm": 2.03125, "learning_rate": 8.99128122417022e-06, "loss": 0.9941, "step": 3167 }, { "epoch": 0.6332675345443642, "grad_norm": 2.1875, "learning_rate": 8.990646411998161e-06, "loss": 1.0805, "step": 3168 }, { "epoch": 0.6334674295994602, "grad_norm": 1.96875, "learning_rate": 8.990011422562012e-06, "loss": 0.986, "step": 3169 }, { "epoch": 0.6336673246545563, "grad_norm": 2.171875, "learning_rate": 8.989376255889982e-06, "loss": 1.0714, "step": 3170 }, { "epoch": 0.6338672197096524, "grad_norm": 2.125, "learning_rate": 8.988740912010285e-06, "loss": 1.1068, "step": 3171 }, { "epoch": 0.6340671147647485, "grad_norm": 2.109375, "learning_rate": 8.988105390951143e-06, "loss": 1.0174, "step": 3172 }, { "epoch": 0.6342670098198446, "grad_norm": 2.140625, "learning_rate": 8.987469692740787e-06, "loss": 1.1122, "step": 3173 }, { "epoch": 0.6344669048749406, "grad_norm": 2.03125, "learning_rate": 8.98683381740745e-06, "loss": 1.0848, "step": 3174 }, { "epoch": 0.6346667999300367, "grad_norm": 2.21875, "learning_rate": 8.986197764979382e-06, "loss": 1.025, "step": 3175 }, { "epoch": 0.6348666949851328, "grad_norm": 1.9609375, "learning_rate": 8.985561535484836e-06, "loss": 0.9516, "step": 3176 }, { "epoch": 0.6350665900402289, "grad_norm": 2.21875, "learning_rate": 8.984925128952072e-06, "loss": 1.1565, "step": 3177 }, { "epoch": 0.6352664850953249, "grad_norm": 2.171875, "learning_rate": 8.984288545409358e-06, "loss": 1.0739, "step": 3178 }, { "epoch": 0.635466380150421, "grad_norm": 2.078125, "learning_rate": 8.983651784884974e-06, "loss": 1.0221, "step": 3179 }, { "epoch": 0.6356662752055171, "grad_norm": 1.96875, "learning_rate": 8.983014847407202e-06, "loss": 1.0373, "step": 3180 }, { "epoch": 0.6358661702606132, "grad_norm": 2.171875, "learning_rate": 8.982377733004338e-06, "loss": 1.0651, "step": 3181 }, { "epoch": 0.6360660653157093, "grad_norm": 2.09375, "learning_rate": 8.981740441704677e-06, "loss": 1.1086, "step": 3182 }, { "epoch": 0.6362659603708053, "grad_norm": 2.234375, "learning_rate": 8.981102973536533e-06, "loss": 1.1332, "step": 3183 }, { "epoch": 0.6364658554259014, "grad_norm": 2.171875, "learning_rate": 8.98046532852822e-06, "loss": 1.0953, "step": 3184 }, { "epoch": 0.6366657504809975, "grad_norm": 2.171875, "learning_rate": 8.97982750670806e-06, "loss": 1.0265, "step": 3185 }, { "epoch": 0.6368656455360936, "grad_norm": 2.03125, "learning_rate": 8.979189508104391e-06, "loss": 1.089, "step": 3186 }, { "epoch": 0.6370655405911896, "grad_norm": 2.171875, "learning_rate": 8.978551332745546e-06, "loss": 1.0756, "step": 3187 }, { "epoch": 0.6372654356462857, "grad_norm": 2.171875, "learning_rate": 8.977912980659878e-06, "loss": 1.0064, "step": 3188 }, { "epoch": 0.6374653307013818, "grad_norm": 2.109375, "learning_rate": 8.97727445187574e-06, "loss": 1.0146, "step": 3189 }, { "epoch": 0.6376652257564779, "grad_norm": 1.953125, "learning_rate": 8.976635746421493e-06, "loss": 1.0238, "step": 3190 }, { "epoch": 0.6378651208115739, "grad_norm": 2.046875, "learning_rate": 8.975996864325514e-06, "loss": 0.9988, "step": 3191 }, { "epoch": 0.63806501586667, "grad_norm": 2.09375, "learning_rate": 8.975357805616176e-06, "loss": 1.0451, "step": 3192 }, { "epoch": 0.6382649109217661, "grad_norm": 1.984375, "learning_rate": 8.974718570321873e-06, "loss": 1.0624, "step": 3193 }, { "epoch": 0.6384648059768622, "grad_norm": 2.15625, "learning_rate": 8.974079158470991e-06, "loss": 0.9772, "step": 3194 }, { "epoch": 0.6386647010319583, "grad_norm": 2.125, "learning_rate": 8.97343957009194e-06, "loss": 1.0772, "step": 3195 }, { "epoch": 0.6388645960870543, "grad_norm": 1.953125, "learning_rate": 8.972799805213125e-06, "loss": 0.962, "step": 3196 }, { "epoch": 0.6390644911421504, "grad_norm": 2.1875, "learning_rate": 8.97215986386297e-06, "loss": 1.1486, "step": 3197 }, { "epoch": 0.6392643861972465, "grad_norm": 2.125, "learning_rate": 8.971519746069897e-06, "loss": 1.0017, "step": 3198 }, { "epoch": 0.6394642812523426, "grad_norm": 2.109375, "learning_rate": 8.970879451862341e-06, "loss": 1.0565, "step": 3199 }, { "epoch": 0.6396641763074385, "grad_norm": 2.109375, "learning_rate": 8.970238981268745e-06, "loss": 1.0777, "step": 3200 }, { "epoch": 0.6398640713625346, "grad_norm": 2.015625, "learning_rate": 8.969598334317556e-06, "loss": 0.9181, "step": 3201 }, { "epoch": 0.6400639664176307, "grad_norm": 2.125, "learning_rate": 8.968957511037233e-06, "loss": 1.0161, "step": 3202 }, { "epoch": 0.6402638614727268, "grad_norm": 2.09375, "learning_rate": 8.968316511456241e-06, "loss": 1.1003, "step": 3203 }, { "epoch": 0.640463756527823, "grad_norm": 2.03125, "learning_rate": 8.967675335603055e-06, "loss": 1.0365, "step": 3204 }, { "epoch": 0.6406636515829189, "grad_norm": 2.0625, "learning_rate": 8.967033983506153e-06, "loss": 0.9613, "step": 3205 }, { "epoch": 0.640863546638015, "grad_norm": 2.0, "learning_rate": 8.966392455194026e-06, "loss": 1.041, "step": 3206 }, { "epoch": 0.6410634416931111, "grad_norm": 2.171875, "learning_rate": 8.965750750695168e-06, "loss": 1.0774, "step": 3207 }, { "epoch": 0.6412633367482072, "grad_norm": 2.078125, "learning_rate": 8.965108870038088e-06, "loss": 1.0497, "step": 3208 }, { "epoch": 0.6414632318033032, "grad_norm": 2.125, "learning_rate": 8.964466813251294e-06, "loss": 1.1386, "step": 3209 }, { "epoch": 0.6416631268583993, "grad_norm": 2.03125, "learning_rate": 8.963824580363307e-06, "loss": 1.098, "step": 3210 }, { "epoch": 0.6418630219134954, "grad_norm": 2.15625, "learning_rate": 8.963182171402656e-06, "loss": 1.1158, "step": 3211 }, { "epoch": 0.6420629169685915, "grad_norm": 2.140625, "learning_rate": 8.962539586397876e-06, "loss": 1.1141, "step": 3212 }, { "epoch": 0.6422628120236875, "grad_norm": 1.9453125, "learning_rate": 8.961896825377512e-06, "loss": 1.0352, "step": 3213 }, { "epoch": 0.6424627070787836, "grad_norm": 2.09375, "learning_rate": 8.961253888370113e-06, "loss": 0.9783, "step": 3214 }, { "epoch": 0.6426626021338797, "grad_norm": 2.03125, "learning_rate": 8.960610775404239e-06, "loss": 0.9919, "step": 3215 }, { "epoch": 0.6428624971889758, "grad_norm": 2.046875, "learning_rate": 8.95996748650846e-06, "loss": 0.9799, "step": 3216 }, { "epoch": 0.6430623922440719, "grad_norm": 2.015625, "learning_rate": 8.959324021711345e-06, "loss": 0.9817, "step": 3217 }, { "epoch": 0.6432622872991679, "grad_norm": 2.109375, "learning_rate": 8.95868038104148e-06, "loss": 1.13, "step": 3218 }, { "epoch": 0.643462182354264, "grad_norm": 2.203125, "learning_rate": 8.958036564527457e-06, "loss": 1.1042, "step": 3219 }, { "epoch": 0.6436620774093601, "grad_norm": 2.109375, "learning_rate": 8.957392572197871e-06, "loss": 0.9991, "step": 3220 }, { "epoch": 0.6438619724644562, "grad_norm": 2.0625, "learning_rate": 8.95674840408133e-06, "loss": 0.9395, "step": 3221 }, { "epoch": 0.6440618675195522, "grad_norm": 2.140625, "learning_rate": 8.95610406020645e-06, "loss": 1.0837, "step": 3222 }, { "epoch": 0.6442617625746483, "grad_norm": 2.0625, "learning_rate": 8.955459540601847e-06, "loss": 1.0176, "step": 3223 }, { "epoch": 0.6444616576297444, "grad_norm": 2.0625, "learning_rate": 8.954814845296153e-06, "loss": 1.0846, "step": 3224 }, { "epoch": 0.6446615526848405, "grad_norm": 2.09375, "learning_rate": 8.95416997431801e-06, "loss": 0.9247, "step": 3225 }, { "epoch": 0.6448614477399366, "grad_norm": 2.03125, "learning_rate": 8.953524927696056e-06, "loss": 1.0291, "step": 3226 }, { "epoch": 0.6450613427950326, "grad_norm": 2.125, "learning_rate": 8.952879705458949e-06, "loss": 1.0674, "step": 3227 }, { "epoch": 0.6452612378501287, "grad_norm": 2.125, "learning_rate": 8.952234307635346e-06, "loss": 1.0894, "step": 3228 }, { "epoch": 0.6454611329052248, "grad_norm": 2.1875, "learning_rate": 8.951588734253917e-06, "loss": 1.023, "step": 3229 }, { "epoch": 0.6456610279603209, "grad_norm": 1.9609375, "learning_rate": 8.950942985343339e-06, "loss": 0.9906, "step": 3230 }, { "epoch": 0.6458609230154169, "grad_norm": 2.015625, "learning_rate": 8.950297060932294e-06, "loss": 0.9879, "step": 3231 }, { "epoch": 0.646060818070513, "grad_norm": 2.109375, "learning_rate": 8.949650961049479e-06, "loss": 1.0749, "step": 3232 }, { "epoch": 0.6462607131256091, "grad_norm": 2.125, "learning_rate": 8.949004685723587e-06, "loss": 1.0911, "step": 3233 }, { "epoch": 0.6464606081807052, "grad_norm": 2.140625, "learning_rate": 8.94835823498333e-06, "loss": 0.9759, "step": 3234 }, { "epoch": 0.6466605032358012, "grad_norm": 2.109375, "learning_rate": 8.94771160885742e-06, "loss": 1.0999, "step": 3235 }, { "epoch": 0.6468603982908973, "grad_norm": 1.953125, "learning_rate": 8.947064807374586e-06, "loss": 0.9683, "step": 3236 }, { "epoch": 0.6470602933459934, "grad_norm": 2.09375, "learning_rate": 8.946417830563551e-06, "loss": 1.0466, "step": 3237 }, { "epoch": 0.6472601884010895, "grad_norm": 2.09375, "learning_rate": 8.94577067845306e-06, "loss": 1.0536, "step": 3238 }, { "epoch": 0.6474600834561856, "grad_norm": 2.203125, "learning_rate": 8.945123351071856e-06, "loss": 1.0053, "step": 3239 }, { "epoch": 0.6476599785112815, "grad_norm": 2.109375, "learning_rate": 8.944475848448692e-06, "loss": 1.0653, "step": 3240 }, { "epoch": 0.6478598735663776, "grad_norm": 2.15625, "learning_rate": 8.943828170612335e-06, "loss": 1.0131, "step": 3241 }, { "epoch": 0.6480597686214737, "grad_norm": 2.109375, "learning_rate": 8.94318031759155e-06, "loss": 1.125, "step": 3242 }, { "epoch": 0.6482596636765698, "grad_norm": 2.0, "learning_rate": 8.942532289415117e-06, "loss": 1.025, "step": 3243 }, { "epoch": 0.6484595587316658, "grad_norm": 2.109375, "learning_rate": 8.941884086111824e-06, "loss": 1.0884, "step": 3244 }, { "epoch": 0.6486594537867619, "grad_norm": 2.140625, "learning_rate": 8.941235707710457e-06, "loss": 1.0583, "step": 3245 }, { "epoch": 0.648859348841858, "grad_norm": 2.078125, "learning_rate": 8.940587154239822e-06, "loss": 1.0255, "step": 3246 }, { "epoch": 0.6490592438969541, "grad_norm": 2.125, "learning_rate": 8.939938425728725e-06, "loss": 1.1508, "step": 3247 }, { "epoch": 0.6492591389520501, "grad_norm": 2.0625, "learning_rate": 8.939289522205986e-06, "loss": 1.1196, "step": 3248 }, { "epoch": 0.6494590340071462, "grad_norm": 2.03125, "learning_rate": 8.938640443700426e-06, "loss": 0.9891, "step": 3249 }, { "epoch": 0.6496589290622423, "grad_norm": 2.125, "learning_rate": 8.937991190240878e-06, "loss": 1.0844, "step": 3250 }, { "epoch": 0.6498588241173384, "grad_norm": 2.140625, "learning_rate": 8.937341761856184e-06, "loss": 1.0354, "step": 3251 }, { "epoch": 0.6500587191724345, "grad_norm": 2.03125, "learning_rate": 8.936692158575186e-06, "loss": 1.0588, "step": 3252 }, { "epoch": 0.6502586142275305, "grad_norm": 2.078125, "learning_rate": 8.936042380426746e-06, "loss": 0.9619, "step": 3253 }, { "epoch": 0.6504585092826266, "grad_norm": 1.9765625, "learning_rate": 8.935392427439723e-06, "loss": 0.9899, "step": 3254 }, { "epoch": 0.6506584043377227, "grad_norm": 2.21875, "learning_rate": 8.934742299642987e-06, "loss": 1.1772, "step": 3255 }, { "epoch": 0.6508582993928188, "grad_norm": 2.171875, "learning_rate": 8.93409199706542e-06, "loss": 1.025, "step": 3256 }, { "epoch": 0.6510581944479148, "grad_norm": 2.09375, "learning_rate": 8.933441519735907e-06, "loss": 1.0423, "step": 3257 }, { "epoch": 0.6512580895030109, "grad_norm": 2.125, "learning_rate": 8.932790867683339e-06, "loss": 1.0719, "step": 3258 }, { "epoch": 0.651457984558107, "grad_norm": 2.109375, "learning_rate": 8.932140040936623e-06, "loss": 1.0208, "step": 3259 }, { "epoch": 0.6516578796132031, "grad_norm": 2.078125, "learning_rate": 8.931489039524667e-06, "loss": 1.0917, "step": 3260 }, { "epoch": 0.6518577746682992, "grad_norm": 2.109375, "learning_rate": 8.930837863476386e-06, "loss": 1.0357, "step": 3261 }, { "epoch": 0.6520576697233952, "grad_norm": 2.0625, "learning_rate": 8.930186512820707e-06, "loss": 1.02, "step": 3262 }, { "epoch": 0.6522575647784913, "grad_norm": 2.109375, "learning_rate": 8.929534987586565e-06, "loss": 1.1055, "step": 3263 }, { "epoch": 0.6524574598335874, "grad_norm": 2.046875, "learning_rate": 8.928883287802897e-06, "loss": 1.0013, "step": 3264 }, { "epoch": 0.6526573548886835, "grad_norm": 1.9921875, "learning_rate": 8.928231413498652e-06, "loss": 0.9495, "step": 3265 }, { "epoch": 0.6528572499437795, "grad_norm": 1.9375, "learning_rate": 8.92757936470279e-06, "loss": 0.9701, "step": 3266 }, { "epoch": 0.6530571449988756, "grad_norm": 2.046875, "learning_rate": 8.92692714144427e-06, "loss": 1.0784, "step": 3267 }, { "epoch": 0.6532570400539717, "grad_norm": 2.140625, "learning_rate": 8.926274743752065e-06, "loss": 1.0928, "step": 3268 }, { "epoch": 0.6534569351090678, "grad_norm": 2.0625, "learning_rate": 8.925622171655157e-06, "loss": 1.0559, "step": 3269 }, { "epoch": 0.6536568301641638, "grad_norm": 2.15625, "learning_rate": 8.92496942518253e-06, "loss": 0.9591, "step": 3270 }, { "epoch": 0.6538567252192599, "grad_norm": 2.0625, "learning_rate": 8.924316504363182e-06, "loss": 1.0161, "step": 3271 }, { "epoch": 0.654056620274356, "grad_norm": 2.09375, "learning_rate": 8.923663409226112e-06, "loss": 1.0892, "step": 3272 }, { "epoch": 0.6542565153294521, "grad_norm": 2.046875, "learning_rate": 8.923010139800335e-06, "loss": 1.0097, "step": 3273 }, { "epoch": 0.6544564103845482, "grad_norm": 2.328125, "learning_rate": 8.922356696114865e-06, "loss": 1.0528, "step": 3274 }, { "epoch": 0.6546563054396441, "grad_norm": 2.15625, "learning_rate": 8.921703078198728e-06, "loss": 1.0536, "step": 3275 }, { "epoch": 0.6548562004947402, "grad_norm": 2.15625, "learning_rate": 8.92104928608096e-06, "loss": 1.0184, "step": 3276 }, { "epoch": 0.6550560955498363, "grad_norm": 2.1875, "learning_rate": 8.920395319790604e-06, "loss": 0.9432, "step": 3277 }, { "epoch": 0.6552559906049324, "grad_norm": 2.265625, "learning_rate": 8.919741179356705e-06, "loss": 1.126, "step": 3278 }, { "epoch": 0.6554558856600284, "grad_norm": 2.171875, "learning_rate": 8.919086864808319e-06, "loss": 1.0793, "step": 3279 }, { "epoch": 0.6556557807151245, "grad_norm": 2.125, "learning_rate": 8.918432376174516e-06, "loss": 1.0164, "step": 3280 }, { "epoch": 0.6558556757702206, "grad_norm": 2.109375, "learning_rate": 8.917777713484366e-06, "loss": 1.0069, "step": 3281 }, { "epoch": 0.6560555708253167, "grad_norm": 2.0, "learning_rate": 8.917122876766946e-06, "loss": 1.0158, "step": 3282 }, { "epoch": 0.6562554658804128, "grad_norm": 2.078125, "learning_rate": 8.916467866051347e-06, "loss": 1.0721, "step": 3283 }, { "epoch": 0.6564553609355088, "grad_norm": 2.125, "learning_rate": 8.915812681366665e-06, "loss": 1.0576, "step": 3284 }, { "epoch": 0.6566552559906049, "grad_norm": 2.28125, "learning_rate": 8.915157322742e-06, "loss": 1.0276, "step": 3285 }, { "epoch": 0.656855151045701, "grad_norm": 2.09375, "learning_rate": 8.914501790206466e-06, "loss": 0.9683, "step": 3286 }, { "epoch": 0.6570550461007971, "grad_norm": 2.078125, "learning_rate": 8.913846083789179e-06, "loss": 0.9412, "step": 3287 }, { "epoch": 0.6572549411558931, "grad_norm": 2.0625, "learning_rate": 8.91319020351927e-06, "loss": 1.0381, "step": 3288 }, { "epoch": 0.6574548362109892, "grad_norm": 2.09375, "learning_rate": 8.912534149425868e-06, "loss": 1.049, "step": 3289 }, { "epoch": 0.6576547312660853, "grad_norm": 2.15625, "learning_rate": 8.911877921538117e-06, "loss": 1.0618, "step": 3290 }, { "epoch": 0.6578546263211814, "grad_norm": 2.0625, "learning_rate": 8.911221519885167e-06, "loss": 1.0587, "step": 3291 }, { "epoch": 0.6580545213762774, "grad_norm": 2.09375, "learning_rate": 8.910564944496174e-06, "loss": 0.9946, "step": 3292 }, { "epoch": 0.6582544164313735, "grad_norm": 2.109375, "learning_rate": 8.909908195400305e-06, "loss": 1.0538, "step": 3293 }, { "epoch": 0.6584543114864696, "grad_norm": 2.0625, "learning_rate": 8.909251272626731e-06, "loss": 1.0824, "step": 3294 }, { "epoch": 0.6586542065415657, "grad_norm": 2.046875, "learning_rate": 8.908594176204632e-06, "loss": 1.0578, "step": 3295 }, { "epoch": 0.6588541015966618, "grad_norm": 2.125, "learning_rate": 8.9079369061632e-06, "loss": 1.0767, "step": 3296 }, { "epoch": 0.6590539966517578, "grad_norm": 2.0625, "learning_rate": 8.907279462531625e-06, "loss": 1.0766, "step": 3297 }, { "epoch": 0.6592538917068539, "grad_norm": 2.09375, "learning_rate": 8.906621845339115e-06, "loss": 0.9306, "step": 3298 }, { "epoch": 0.65945378676195, "grad_norm": 2.0, "learning_rate": 8.90596405461488e-06, "loss": 0.8914, "step": 3299 }, { "epoch": 0.6596536818170461, "grad_norm": 2.21875, "learning_rate": 8.905306090388137e-06, "loss": 1.0862, "step": 3300 }, { "epoch": 0.6598535768721421, "grad_norm": 2.0, "learning_rate": 8.904647952688117e-06, "loss": 0.9762, "step": 3301 }, { "epoch": 0.6600534719272382, "grad_norm": 2.09375, "learning_rate": 8.903989641544052e-06, "loss": 1.0451, "step": 3302 }, { "epoch": 0.6602533669823343, "grad_norm": 2.125, "learning_rate": 8.903331156985181e-06, "loss": 1.044, "step": 3303 }, { "epoch": 0.6604532620374304, "grad_norm": 2.078125, "learning_rate": 8.902672499040759e-06, "loss": 1.0062, "step": 3304 }, { "epoch": 0.6606531570925265, "grad_norm": 2.078125, "learning_rate": 8.902013667740043e-06, "loss": 1.0294, "step": 3305 }, { "epoch": 0.6608530521476225, "grad_norm": 2.125, "learning_rate": 8.901354663112294e-06, "loss": 1.0725, "step": 3306 }, { "epoch": 0.6610529472027186, "grad_norm": 2.171875, "learning_rate": 8.900695485186788e-06, "loss": 1.0517, "step": 3307 }, { "epoch": 0.6612528422578147, "grad_norm": 2.03125, "learning_rate": 8.900036133992807e-06, "loss": 0.9954, "step": 3308 }, { "epoch": 0.6614527373129108, "grad_norm": 1.9921875, "learning_rate": 8.899376609559636e-06, "loss": 1.0294, "step": 3309 }, { "epoch": 0.6616526323680068, "grad_norm": 2.21875, "learning_rate": 8.898716911916571e-06, "loss": 1.0569, "step": 3310 }, { "epoch": 0.6618525274231029, "grad_norm": 2.09375, "learning_rate": 8.89805704109292e-06, "loss": 0.9953, "step": 3311 }, { "epoch": 0.662052422478199, "grad_norm": 2.0, "learning_rate": 8.897396997117991e-06, "loss": 1.005, "step": 3312 }, { "epoch": 0.662252317533295, "grad_norm": 2.15625, "learning_rate": 8.896736780021102e-06, "loss": 1.0816, "step": 3313 }, { "epoch": 0.662452212588391, "grad_norm": 1.9921875, "learning_rate": 8.896076389831583e-06, "loss": 0.9675, "step": 3314 }, { "epoch": 0.6626521076434871, "grad_norm": 2.234375, "learning_rate": 8.895415826578766e-06, "loss": 1.1988, "step": 3315 }, { "epoch": 0.6628520026985832, "grad_norm": 2.140625, "learning_rate": 8.894755090291996e-06, "loss": 1.1341, "step": 3316 }, { "epoch": 0.6630518977536793, "grad_norm": 2.109375, "learning_rate": 8.89409418100062e-06, "loss": 1.0202, "step": 3317 }, { "epoch": 0.6632517928087754, "grad_norm": 2.265625, "learning_rate": 8.893433098733995e-06, "loss": 1.0561, "step": 3318 }, { "epoch": 0.6634516878638714, "grad_norm": 2.203125, "learning_rate": 8.892771843521487e-06, "loss": 1.1008, "step": 3319 }, { "epoch": 0.6636515829189675, "grad_norm": 2.109375, "learning_rate": 8.89211041539247e-06, "loss": 1.0711, "step": 3320 }, { "epoch": 0.6638514779740636, "grad_norm": 2.078125, "learning_rate": 8.891448814376326e-06, "loss": 1.0149, "step": 3321 }, { "epoch": 0.6640513730291597, "grad_norm": 2.046875, "learning_rate": 8.89078704050244e-06, "loss": 0.9886, "step": 3322 }, { "epoch": 0.6642512680842557, "grad_norm": 2.140625, "learning_rate": 8.890125093800208e-06, "loss": 1.0175, "step": 3323 }, { "epoch": 0.6644511631393518, "grad_norm": 2.015625, "learning_rate": 8.889462974299037e-06, "loss": 1.0867, "step": 3324 }, { "epoch": 0.6646510581944479, "grad_norm": 2.0625, "learning_rate": 8.888800682028334e-06, "loss": 1.0379, "step": 3325 }, { "epoch": 0.664850953249544, "grad_norm": 2.09375, "learning_rate": 8.888138217017521e-06, "loss": 1.0689, "step": 3326 }, { "epoch": 0.6650508483046401, "grad_norm": 2.109375, "learning_rate": 8.887475579296025e-06, "loss": 1.0328, "step": 3327 }, { "epoch": 0.6652507433597361, "grad_norm": 1.9296875, "learning_rate": 8.886812768893277e-06, "loss": 1.0361, "step": 3328 }, { "epoch": 0.6654506384148322, "grad_norm": 2.15625, "learning_rate": 8.886149785838722e-06, "loss": 1.0431, "step": 3329 }, { "epoch": 0.6656505334699283, "grad_norm": 2.1875, "learning_rate": 8.885486630161808e-06, "loss": 1.0075, "step": 3330 }, { "epoch": 0.6658504285250244, "grad_norm": 2.09375, "learning_rate": 8.884823301891993e-06, "loss": 1.0585, "step": 3331 }, { "epoch": 0.6660503235801204, "grad_norm": 2.0625, "learning_rate": 8.884159801058743e-06, "loss": 1.0838, "step": 3332 }, { "epoch": 0.6662502186352165, "grad_norm": 2.140625, "learning_rate": 8.88349612769153e-06, "loss": 1.0646, "step": 3333 }, { "epoch": 0.6664501136903126, "grad_norm": 1.96875, "learning_rate": 8.88283228181983e-06, "loss": 0.8971, "step": 3334 }, { "epoch": 0.6666500087454087, "grad_norm": 2.109375, "learning_rate": 8.882168263473137e-06, "loss": 1.0699, "step": 3335 }, { "epoch": 0.6668499038005047, "grad_norm": 2.109375, "learning_rate": 8.881504072680945e-06, "loss": 0.9898, "step": 3336 }, { "epoch": 0.6670497988556008, "grad_norm": 2.09375, "learning_rate": 8.880839709472755e-06, "loss": 1.0643, "step": 3337 }, { "epoch": 0.6672496939106969, "grad_norm": 2.234375, "learning_rate": 8.88017517387808e-06, "loss": 1.1073, "step": 3338 }, { "epoch": 0.667449588965793, "grad_norm": 2.15625, "learning_rate": 8.87951046592644e-06, "loss": 1.0848, "step": 3339 }, { "epoch": 0.6676494840208891, "grad_norm": 2.078125, "learning_rate": 8.878845585647357e-06, "loss": 1.0101, "step": 3340 }, { "epoch": 0.6678493790759851, "grad_norm": 2.109375, "learning_rate": 8.87818053307037e-06, "loss": 1.055, "step": 3341 }, { "epoch": 0.6680492741310812, "grad_norm": 1.9375, "learning_rate": 8.877515308225015e-06, "loss": 0.9716, "step": 3342 }, { "epoch": 0.6682491691861773, "grad_norm": 2.03125, "learning_rate": 8.876849911140846e-06, "loss": 0.9588, "step": 3343 }, { "epoch": 0.6684490642412734, "grad_norm": 1.9609375, "learning_rate": 8.876184341847418e-06, "loss": 1.0359, "step": 3344 }, { "epoch": 0.6686489592963694, "grad_norm": 2.125, "learning_rate": 8.875518600374296e-06, "loss": 1.0397, "step": 3345 }, { "epoch": 0.6688488543514655, "grad_norm": 2.046875, "learning_rate": 8.874852686751051e-06, "loss": 1.0957, "step": 3346 }, { "epoch": 0.6690487494065616, "grad_norm": 2.078125, "learning_rate": 8.874186601007263e-06, "loss": 1.0045, "step": 3347 }, { "epoch": 0.6692486444616577, "grad_norm": 2.015625, "learning_rate": 8.87352034317252e-06, "loss": 1.0593, "step": 3348 }, { "epoch": 0.6694485395167536, "grad_norm": 2.21875, "learning_rate": 8.872853913276418e-06, "loss": 1.045, "step": 3349 }, { "epoch": 0.6696484345718497, "grad_norm": 2.109375, "learning_rate": 8.872187311348558e-06, "loss": 1.1227, "step": 3350 }, { "epoch": 0.6698483296269458, "grad_norm": 2.109375, "learning_rate": 8.871520537418552e-06, "loss": 0.9508, "step": 3351 }, { "epoch": 0.670048224682042, "grad_norm": 2.140625, "learning_rate": 8.870853591516016e-06, "loss": 0.9973, "step": 3352 }, { "epoch": 0.670248119737138, "grad_norm": 2.078125, "learning_rate": 8.870186473670577e-06, "loss": 1.0057, "step": 3353 }, { "epoch": 0.670448014792234, "grad_norm": 2.0625, "learning_rate": 8.86951918391187e-06, "loss": 1.0588, "step": 3354 }, { "epoch": 0.6706479098473301, "grad_norm": 2.03125, "learning_rate": 8.868851722269531e-06, "loss": 0.987, "step": 3355 }, { "epoch": 0.6708478049024262, "grad_norm": 2.203125, "learning_rate": 8.868184088773216e-06, "loss": 1.1464, "step": 3356 }, { "epoch": 0.6710476999575223, "grad_norm": 2.015625, "learning_rate": 8.867516283452572e-06, "loss": 1.0607, "step": 3357 }, { "epoch": 0.6712475950126183, "grad_norm": 2.078125, "learning_rate": 8.866848306337272e-06, "loss": 0.99, "step": 3358 }, { "epoch": 0.6714474900677144, "grad_norm": 2.078125, "learning_rate": 8.866180157456981e-06, "loss": 1.0594, "step": 3359 }, { "epoch": 0.6716473851228105, "grad_norm": 2.0625, "learning_rate": 8.865511836841381e-06, "loss": 1.0414, "step": 3360 }, { "epoch": 0.6718472801779066, "grad_norm": 2.140625, "learning_rate": 8.864843344520158e-06, "loss": 1.16, "step": 3361 }, { "epoch": 0.6720471752330027, "grad_norm": 2.171875, "learning_rate": 8.864174680523005e-06, "loss": 1.0834, "step": 3362 }, { "epoch": 0.6722470702880987, "grad_norm": 2.0625, "learning_rate": 8.863505844879628e-06, "loss": 1.1152, "step": 3363 }, { "epoch": 0.6724469653431948, "grad_norm": 2.171875, "learning_rate": 8.862836837619732e-06, "loss": 0.9614, "step": 3364 }, { "epoch": 0.6726468603982909, "grad_norm": 2.078125, "learning_rate": 8.862167658773037e-06, "loss": 0.9623, "step": 3365 }, { "epoch": 0.672846755453387, "grad_norm": 2.359375, "learning_rate": 8.861498308369267e-06, "loss": 1.1579, "step": 3366 }, { "epoch": 0.673046650508483, "grad_norm": 1.9765625, "learning_rate": 8.860828786438155e-06, "loss": 0.9972, "step": 3367 }, { "epoch": 0.6732465455635791, "grad_norm": 2.140625, "learning_rate": 8.86015909300944e-06, "loss": 1.0825, "step": 3368 }, { "epoch": 0.6734464406186752, "grad_norm": 2.0625, "learning_rate": 8.85948922811287e-06, "loss": 1.0788, "step": 3369 }, { "epoch": 0.6736463356737713, "grad_norm": 2.0625, "learning_rate": 8.858819191778201e-06, "loss": 1.1012, "step": 3370 }, { "epoch": 0.6738462307288673, "grad_norm": 2.046875, "learning_rate": 8.858148984035196e-06, "loss": 0.9939, "step": 3371 }, { "epoch": 0.6740461257839634, "grad_norm": 2.171875, "learning_rate": 8.857478604913625e-06, "loss": 1.1226, "step": 3372 }, { "epoch": 0.6742460208390595, "grad_norm": 2.03125, "learning_rate": 8.856808054443266e-06, "loss": 0.9636, "step": 3373 }, { "epoch": 0.6744459158941556, "grad_norm": 2.140625, "learning_rate": 8.856137332653907e-06, "loss": 0.9492, "step": 3374 }, { "epoch": 0.6746458109492517, "grad_norm": 1.9609375, "learning_rate": 8.855466439575338e-06, "loss": 1.0091, "step": 3375 }, { "epoch": 0.6748457060043477, "grad_norm": 2.03125, "learning_rate": 8.85479537523736e-06, "loss": 1.0131, "step": 3376 }, { "epoch": 0.6750456010594438, "grad_norm": 1.953125, "learning_rate": 8.854124139669786e-06, "loss": 1.013, "step": 3377 }, { "epoch": 0.6752454961145399, "grad_norm": 2.296875, "learning_rate": 8.853452732902428e-06, "loss": 1.0919, "step": 3378 }, { "epoch": 0.675445391169636, "grad_norm": 2.15625, "learning_rate": 8.85278115496511e-06, "loss": 1.0912, "step": 3379 }, { "epoch": 0.675645286224732, "grad_norm": 2.09375, "learning_rate": 8.852109405887667e-06, "loss": 1.1148, "step": 3380 }, { "epoch": 0.6758451812798281, "grad_norm": 1.984375, "learning_rate": 8.851437485699935e-06, "loss": 0.9866, "step": 3381 }, { "epoch": 0.6760450763349242, "grad_norm": 2.046875, "learning_rate": 8.85076539443176e-06, "loss": 0.9758, "step": 3382 }, { "epoch": 0.6762449713900203, "grad_norm": 2.015625, "learning_rate": 8.850093132112999e-06, "loss": 0.9487, "step": 3383 }, { "epoch": 0.6764448664451164, "grad_norm": 2.09375, "learning_rate": 8.849420698773513e-06, "loss": 1.0743, "step": 3384 }, { "epoch": 0.6766447615002124, "grad_norm": 2.109375, "learning_rate": 8.848748094443167e-06, "loss": 1.088, "step": 3385 }, { "epoch": 0.6768446565553085, "grad_norm": 2.03125, "learning_rate": 8.848075319151844e-06, "loss": 1.0013, "step": 3386 }, { "epoch": 0.6770445516104046, "grad_norm": 2.109375, "learning_rate": 8.847402372929426e-06, "loss": 1.1631, "step": 3387 }, { "epoch": 0.6772444466655007, "grad_norm": 2.03125, "learning_rate": 8.846729255805806e-06, "loss": 1.0131, "step": 3388 }, { "epoch": 0.6774443417205966, "grad_norm": 2.0625, "learning_rate": 8.846055967810882e-06, "loss": 1.0088, "step": 3389 }, { "epoch": 0.6776442367756927, "grad_norm": 2.078125, "learning_rate": 8.845382508974565e-06, "loss": 1.0365, "step": 3390 }, { "epoch": 0.6778441318307888, "grad_norm": 2.0, "learning_rate": 8.844708879326767e-06, "loss": 0.9699, "step": 3391 }, { "epoch": 0.6780440268858849, "grad_norm": 2.0625, "learning_rate": 8.84403507889741e-06, "loss": 0.9918, "step": 3392 }, { "epoch": 0.6782439219409809, "grad_norm": 2.140625, "learning_rate": 8.843361107716427e-06, "loss": 1.0832, "step": 3393 }, { "epoch": 0.678443816996077, "grad_norm": 2.109375, "learning_rate": 8.842686965813752e-06, "loss": 0.992, "step": 3394 }, { "epoch": 0.6786437120511731, "grad_norm": 2.078125, "learning_rate": 8.842012653219333e-06, "loss": 1.0853, "step": 3395 }, { "epoch": 0.6788436071062692, "grad_norm": 1.9765625, "learning_rate": 8.841338169963122e-06, "loss": 1.0101, "step": 3396 }, { "epoch": 0.6790435021613653, "grad_norm": 2.0625, "learning_rate": 8.840663516075081e-06, "loss": 0.9916, "step": 3397 }, { "epoch": 0.6792433972164613, "grad_norm": 2.1875, "learning_rate": 8.839988691585177e-06, "loss": 1.0407, "step": 3398 }, { "epoch": 0.6794432922715574, "grad_norm": 2.09375, "learning_rate": 8.839313696523384e-06, "loss": 1.0267, "step": 3399 }, { "epoch": 0.6796431873266535, "grad_norm": 2.046875, "learning_rate": 8.838638530919688e-06, "loss": 1.0569, "step": 3400 }, { "epoch": 0.6798430823817496, "grad_norm": 2.046875, "learning_rate": 8.837963194804077e-06, "loss": 1.0449, "step": 3401 }, { "epoch": 0.6800429774368456, "grad_norm": 2.1875, "learning_rate": 8.837287688206552e-06, "loss": 1.0707, "step": 3402 }, { "epoch": 0.6802428724919417, "grad_norm": 2.078125, "learning_rate": 8.836612011157117e-06, "loss": 1.0324, "step": 3403 }, { "epoch": 0.6804427675470378, "grad_norm": 2.0625, "learning_rate": 8.835936163685786e-06, "loss": 1.0641, "step": 3404 }, { "epoch": 0.6806426626021339, "grad_norm": 2.0625, "learning_rate": 8.835260145822582e-06, "loss": 1.0306, "step": 3405 }, { "epoch": 0.68084255765723, "grad_norm": 2.109375, "learning_rate": 8.83458395759753e-06, "loss": 1.1774, "step": 3406 }, { "epoch": 0.681042452712326, "grad_norm": 2.109375, "learning_rate": 8.833907599040668e-06, "loss": 1.0588, "step": 3407 }, { "epoch": 0.6812423477674221, "grad_norm": 2.171875, "learning_rate": 8.833231070182042e-06, "loss": 1.1366, "step": 3408 }, { "epoch": 0.6814422428225182, "grad_norm": 1.9765625, "learning_rate": 8.8325543710517e-06, "loss": 1.0346, "step": 3409 }, { "epoch": 0.6816421378776143, "grad_norm": 2.15625, "learning_rate": 8.831877501679701e-06, "loss": 1.1177, "step": 3410 }, { "epoch": 0.6818420329327103, "grad_norm": 2.125, "learning_rate": 8.831200462096115e-06, "loss": 1.0492, "step": 3411 }, { "epoch": 0.6820419279878064, "grad_norm": 2.0, "learning_rate": 8.83052325233101e-06, "loss": 1.0085, "step": 3412 }, { "epoch": 0.6822418230429025, "grad_norm": 2.03125, "learning_rate": 8.829845872414477e-06, "loss": 1.0229, "step": 3413 }, { "epoch": 0.6824417180979986, "grad_norm": 2.03125, "learning_rate": 8.829168322376595e-06, "loss": 0.9829, "step": 3414 }, { "epoch": 0.6826416131530946, "grad_norm": 2.03125, "learning_rate": 8.828490602247466e-06, "loss": 1.0217, "step": 3415 }, { "epoch": 0.6828415082081907, "grad_norm": 2.109375, "learning_rate": 8.827812712057195e-06, "loss": 1.0444, "step": 3416 }, { "epoch": 0.6830414032632868, "grad_norm": 2.109375, "learning_rate": 8.827134651835889e-06, "loss": 1.0498, "step": 3417 }, { "epoch": 0.6832412983183829, "grad_norm": 2.109375, "learning_rate": 8.826456421613674e-06, "loss": 1.0302, "step": 3418 }, { "epoch": 0.683441193373479, "grad_norm": 2.015625, "learning_rate": 8.82577802142067e-06, "loss": 1.0086, "step": 3419 }, { "epoch": 0.683641088428575, "grad_norm": 2.125, "learning_rate": 8.825099451287018e-06, "loss": 1.0617, "step": 3420 }, { "epoch": 0.6838409834836711, "grad_norm": 2.1875, "learning_rate": 8.824420711242855e-06, "loss": 1.027, "step": 3421 }, { "epoch": 0.6840408785387672, "grad_norm": 2.015625, "learning_rate": 8.823741801318332e-06, "loss": 1.1092, "step": 3422 }, { "epoch": 0.6842407735938633, "grad_norm": 2.03125, "learning_rate": 8.82306272154361e-06, "loss": 1.0552, "step": 3423 }, { "epoch": 0.6844406686489592, "grad_norm": 2.0625, "learning_rate": 8.822383471948846e-06, "loss": 0.9296, "step": 3424 }, { "epoch": 0.6846405637040553, "grad_norm": 2.0625, "learning_rate": 8.821704052564218e-06, "loss": 1.0257, "step": 3425 }, { "epoch": 0.6848404587591514, "grad_norm": 1.9609375, "learning_rate": 8.821024463419904e-06, "loss": 0.9842, "step": 3426 }, { "epoch": 0.6850403538142475, "grad_norm": 2.078125, "learning_rate": 8.820344704546093e-06, "loss": 1.0134, "step": 3427 }, { "epoch": 0.6852402488693436, "grad_norm": 2.15625, "learning_rate": 8.819664775972976e-06, "loss": 1.1109, "step": 3428 }, { "epoch": 0.6854401439244396, "grad_norm": 2.015625, "learning_rate": 8.818984677730759e-06, "loss": 1.0232, "step": 3429 }, { "epoch": 0.6856400389795357, "grad_norm": 2.0, "learning_rate": 8.81830440984965e-06, "loss": 1.0117, "step": 3430 }, { "epoch": 0.6858399340346318, "grad_norm": 2.0625, "learning_rate": 8.817623972359867e-06, "loss": 1.0454, "step": 3431 }, { "epoch": 0.6860398290897279, "grad_norm": 2.09375, "learning_rate": 8.816943365291635e-06, "loss": 1.0472, "step": 3432 }, { "epoch": 0.6862397241448239, "grad_norm": 2.234375, "learning_rate": 8.816262588675186e-06, "loss": 1.1059, "step": 3433 }, { "epoch": 0.68643961919992, "grad_norm": 2.03125, "learning_rate": 8.815581642540763e-06, "loss": 0.9828, "step": 3434 }, { "epoch": 0.6866395142550161, "grad_norm": 2.109375, "learning_rate": 8.814900526918608e-06, "loss": 1.0322, "step": 3435 }, { "epoch": 0.6868394093101122, "grad_norm": 2.59375, "learning_rate": 8.814219241838979e-06, "loss": 1.0228, "step": 3436 }, { "epoch": 0.6870393043652082, "grad_norm": 2.296875, "learning_rate": 8.81353778733214e-06, "loss": 1.0842, "step": 3437 }, { "epoch": 0.6872391994203043, "grad_norm": 2.015625, "learning_rate": 8.812856163428358e-06, "loss": 1.0629, "step": 3438 }, { "epoch": 0.6874390944754004, "grad_norm": 2.265625, "learning_rate": 8.812174370157915e-06, "loss": 1.0816, "step": 3439 }, { "epoch": 0.6876389895304965, "grad_norm": 2.09375, "learning_rate": 8.811492407551092e-06, "loss": 1.0512, "step": 3440 }, { "epoch": 0.6878388845855926, "grad_norm": 2.03125, "learning_rate": 8.810810275638183e-06, "loss": 1.0826, "step": 3441 }, { "epoch": 0.6880387796406886, "grad_norm": 2.03125, "learning_rate": 8.810127974449489e-06, "loss": 1.0436, "step": 3442 }, { "epoch": 0.6882386746957847, "grad_norm": 2.03125, "learning_rate": 8.809445504015318e-06, "loss": 0.9917, "step": 3443 }, { "epoch": 0.6884385697508808, "grad_norm": 2.296875, "learning_rate": 8.808762864365985e-06, "loss": 1.106, "step": 3444 }, { "epoch": 0.6886384648059769, "grad_norm": 2.015625, "learning_rate": 8.80808005553181e-06, "loss": 1.0241, "step": 3445 }, { "epoch": 0.6888383598610729, "grad_norm": 2.171875, "learning_rate": 8.807397077543127e-06, "loss": 1.1441, "step": 3446 }, { "epoch": 0.689038254916169, "grad_norm": 2.0625, "learning_rate": 8.806713930430273e-06, "loss": 1.0712, "step": 3447 }, { "epoch": 0.6892381499712651, "grad_norm": 2.03125, "learning_rate": 8.806030614223592e-06, "loss": 1.082, "step": 3448 }, { "epoch": 0.6894380450263612, "grad_norm": 2.03125, "learning_rate": 8.805347128953438e-06, "loss": 1.0949, "step": 3449 }, { "epoch": 0.6896379400814573, "grad_norm": 2.03125, "learning_rate": 8.80466347465017e-06, "loss": 0.9658, "step": 3450 }, { "epoch": 0.6898378351365533, "grad_norm": 2.0625, "learning_rate": 8.803979651344159e-06, "loss": 0.9569, "step": 3451 }, { "epoch": 0.6900377301916494, "grad_norm": 2.140625, "learning_rate": 8.803295659065776e-06, "loss": 1.0401, "step": 3452 }, { "epoch": 0.6902376252467455, "grad_norm": 2.046875, "learning_rate": 8.802611497845407e-06, "loss": 1.0324, "step": 3453 }, { "epoch": 0.6904375203018416, "grad_norm": 2.078125, "learning_rate": 8.801927167713442e-06, "loss": 1.0611, "step": 3454 }, { "epoch": 0.6906374153569376, "grad_norm": 2.296875, "learning_rate": 8.801242668700277e-06, "loss": 1.005, "step": 3455 }, { "epoch": 0.6908373104120337, "grad_norm": 2.09375, "learning_rate": 8.800558000836318e-06, "loss": 1.0715, "step": 3456 }, { "epoch": 0.6910372054671298, "grad_norm": 2.15625, "learning_rate": 8.799873164151981e-06, "loss": 1.0608, "step": 3457 }, { "epoch": 0.6912371005222259, "grad_norm": 2.09375, "learning_rate": 8.799188158677683e-06, "loss": 1.1111, "step": 3458 }, { "epoch": 0.6914369955773219, "grad_norm": 2.09375, "learning_rate": 8.79850298444385e-06, "loss": 1.0775, "step": 3459 }, { "epoch": 0.691636890632418, "grad_norm": 2.21875, "learning_rate": 8.797817641480923e-06, "loss": 1.0977, "step": 3460 }, { "epoch": 0.691836785687514, "grad_norm": 2.140625, "learning_rate": 8.797132129819343e-06, "loss": 1.0691, "step": 3461 }, { "epoch": 0.6920366807426102, "grad_norm": 2.109375, "learning_rate": 8.796446449489557e-06, "loss": 1.0476, "step": 3462 }, { "epoch": 0.6922365757977063, "grad_norm": 2.15625, "learning_rate": 8.795760600522025e-06, "loss": 1.0863, "step": 3463 }, { "epoch": 0.6924364708528022, "grad_norm": 1.9765625, "learning_rate": 8.795074582947214e-06, "loss": 1.073, "step": 3464 }, { "epoch": 0.6926363659078983, "grad_norm": 2.046875, "learning_rate": 8.794388396795595e-06, "loss": 1.0367, "step": 3465 }, { "epoch": 0.6928362609629944, "grad_norm": 2.046875, "learning_rate": 8.79370204209765e-06, "loss": 0.9947, "step": 3466 }, { "epoch": 0.6930361560180905, "grad_norm": 2.15625, "learning_rate": 8.793015518883862e-06, "loss": 1.1298, "step": 3467 }, { "epoch": 0.6932360510731865, "grad_norm": 2.03125, "learning_rate": 8.792328827184733e-06, "loss": 1.0406, "step": 3468 }, { "epoch": 0.6934359461282826, "grad_norm": 2.25, "learning_rate": 8.791641967030761e-06, "loss": 1.0598, "step": 3469 }, { "epoch": 0.6936358411833787, "grad_norm": 2.140625, "learning_rate": 8.790954938452458e-06, "loss": 1.0779, "step": 3470 }, { "epoch": 0.6938357362384748, "grad_norm": 2.1875, "learning_rate": 8.790267741480342e-06, "loss": 1.0949, "step": 3471 }, { "epoch": 0.6940356312935708, "grad_norm": 2.09375, "learning_rate": 8.789580376144938e-06, "loss": 1.0611, "step": 3472 }, { "epoch": 0.6942355263486669, "grad_norm": 2.125, "learning_rate": 8.788892842476777e-06, "loss": 1.1123, "step": 3473 }, { "epoch": 0.694435421403763, "grad_norm": 2.046875, "learning_rate": 8.7882051405064e-06, "loss": 1.0101, "step": 3474 }, { "epoch": 0.6946353164588591, "grad_norm": 1.9609375, "learning_rate": 8.78751727026436e-06, "loss": 1.0699, "step": 3475 }, { "epoch": 0.6948352115139552, "grad_norm": 2.03125, "learning_rate": 8.786829231781203e-06, "loss": 0.986, "step": 3476 }, { "epoch": 0.6950351065690512, "grad_norm": 2.15625, "learning_rate": 8.786141025087496e-06, "loss": 1.0022, "step": 3477 }, { "epoch": 0.6952350016241473, "grad_norm": 2.15625, "learning_rate": 8.78545265021381e-06, "loss": 1.0105, "step": 3478 }, { "epoch": 0.6954348966792434, "grad_norm": 2.203125, "learning_rate": 8.784764107190723e-06, "loss": 1.059, "step": 3479 }, { "epoch": 0.6956347917343395, "grad_norm": 2.015625, "learning_rate": 8.784075396048814e-06, "loss": 0.9632, "step": 3480 }, { "epoch": 0.6958346867894355, "grad_norm": 2.03125, "learning_rate": 8.783386516818684e-06, "loss": 1.001, "step": 3481 }, { "epoch": 0.6960345818445316, "grad_norm": 1.9765625, "learning_rate": 8.782697469530929e-06, "loss": 1.0463, "step": 3482 }, { "epoch": 0.6962344768996277, "grad_norm": 2.0625, "learning_rate": 8.782008254216155e-06, "loss": 1.0061, "step": 3483 }, { "epoch": 0.6964343719547238, "grad_norm": 2.109375, "learning_rate": 8.78131887090498e-06, "loss": 1.1041, "step": 3484 }, { "epoch": 0.6966342670098199, "grad_norm": 2.046875, "learning_rate": 8.780629319628023e-06, "loss": 0.9527, "step": 3485 }, { "epoch": 0.6968341620649159, "grad_norm": 2.171875, "learning_rate": 8.779939600415917e-06, "loss": 1.0341, "step": 3486 }, { "epoch": 0.697034057120012, "grad_norm": 2.046875, "learning_rate": 8.779249713299296e-06, "loss": 0.8984, "step": 3487 }, { "epoch": 0.6972339521751081, "grad_norm": 2.140625, "learning_rate": 8.778559658308806e-06, "loss": 1.0076, "step": 3488 }, { "epoch": 0.6974338472302042, "grad_norm": 2.125, "learning_rate": 8.777869435475101e-06, "loss": 1.0677, "step": 3489 }, { "epoch": 0.6976337422853002, "grad_norm": 2.1875, "learning_rate": 8.777179044828838e-06, "loss": 1.0401, "step": 3490 }, { "epoch": 0.6978336373403963, "grad_norm": 2.0, "learning_rate": 8.776488486400688e-06, "loss": 0.942, "step": 3491 }, { "epoch": 0.6980335323954924, "grad_norm": 2.203125, "learning_rate": 8.775797760221318e-06, "loss": 1.1208, "step": 3492 }, { "epoch": 0.6982334274505885, "grad_norm": 2.171875, "learning_rate": 8.775106866321419e-06, "loss": 1.0702, "step": 3493 }, { "epoch": 0.6984333225056845, "grad_norm": 2.25, "learning_rate": 8.774415804731674e-06, "loss": 1.153, "step": 3494 }, { "epoch": 0.6986332175607806, "grad_norm": 2.09375, "learning_rate": 8.773724575482783e-06, "loss": 1.0306, "step": 3495 }, { "epoch": 0.6988331126158767, "grad_norm": 2.0625, "learning_rate": 8.77303317860545e-06, "loss": 0.9907, "step": 3496 }, { "epoch": 0.6990330076709728, "grad_norm": 2.171875, "learning_rate": 8.772341614130384e-06, "loss": 1.0426, "step": 3497 }, { "epoch": 0.6992329027260689, "grad_norm": 1.9921875, "learning_rate": 8.771649882088309e-06, "loss": 1.0738, "step": 3498 }, { "epoch": 0.6994327977811649, "grad_norm": 2.046875, "learning_rate": 8.770957982509947e-06, "loss": 1.0714, "step": 3499 }, { "epoch": 0.699632692836261, "grad_norm": 2.015625, "learning_rate": 8.770265915426035e-06, "loss": 1.0174, "step": 3500 }, { "epoch": 0.699832587891357, "grad_norm": 2.125, "learning_rate": 8.769573680867314e-06, "loss": 0.9708, "step": 3501 }, { "epoch": 0.7000324829464531, "grad_norm": 2.0, "learning_rate": 8.768881278864532e-06, "loss": 1.025, "step": 3502 }, { "epoch": 0.7002323780015491, "grad_norm": 2.0625, "learning_rate": 8.768188709448446e-06, "loss": 1.0102, "step": 3503 }, { "epoch": 0.7004322730566452, "grad_norm": 2.203125, "learning_rate": 8.76749597264982e-06, "loss": 1.0685, "step": 3504 }, { "epoch": 0.7006321681117413, "grad_norm": 2.078125, "learning_rate": 8.766803068499426e-06, "loss": 1.0231, "step": 3505 }, { "epoch": 0.7008320631668374, "grad_norm": 2.109375, "learning_rate": 8.766109997028042e-06, "loss": 1.0792, "step": 3506 }, { "epoch": 0.7010319582219335, "grad_norm": 2.0625, "learning_rate": 8.765416758266454e-06, "loss": 1.0519, "step": 3507 }, { "epoch": 0.7012318532770295, "grad_norm": 2.109375, "learning_rate": 8.764723352245455e-06, "loss": 1.057, "step": 3508 }, { "epoch": 0.7014317483321256, "grad_norm": 2.0625, "learning_rate": 8.764029778995848e-06, "loss": 0.9859, "step": 3509 }, { "epoch": 0.7016316433872217, "grad_norm": 1.984375, "learning_rate": 8.76333603854844e-06, "loss": 1.0624, "step": 3510 }, { "epoch": 0.7018315384423178, "grad_norm": 2.0625, "learning_rate": 8.762642130934048e-06, "loss": 1.0676, "step": 3511 }, { "epoch": 0.7020314334974138, "grad_norm": 1.9921875, "learning_rate": 8.761948056183492e-06, "loss": 1.0841, "step": 3512 }, { "epoch": 0.7022313285525099, "grad_norm": 2.03125, "learning_rate": 8.761253814327606e-06, "loss": 1.0859, "step": 3513 }, { "epoch": 0.702431223607606, "grad_norm": 2.03125, "learning_rate": 8.760559405397228e-06, "loss": 1.0258, "step": 3514 }, { "epoch": 0.7026311186627021, "grad_norm": 1.90625, "learning_rate": 8.759864829423202e-06, "loss": 0.9817, "step": 3515 }, { "epoch": 0.7028310137177981, "grad_norm": 1.9921875, "learning_rate": 8.759170086436382e-06, "loss": 1.0133, "step": 3516 }, { "epoch": 0.7030309087728942, "grad_norm": 2.15625, "learning_rate": 8.75847517646763e-06, "loss": 1.1056, "step": 3517 }, { "epoch": 0.7032308038279903, "grad_norm": 2.03125, "learning_rate": 8.75778009954781e-06, "loss": 1.0127, "step": 3518 }, { "epoch": 0.7034306988830864, "grad_norm": 2.09375, "learning_rate": 8.757084855707799e-06, "loss": 1.1148, "step": 3519 }, { "epoch": 0.7036305939381825, "grad_norm": 2.15625, "learning_rate": 8.756389444978482e-06, "loss": 1.0803, "step": 3520 }, { "epoch": 0.7038304889932785, "grad_norm": 2.078125, "learning_rate": 8.755693867390746e-06, "loss": 1.0793, "step": 3521 }, { "epoch": 0.7040303840483746, "grad_norm": 2.09375, "learning_rate": 8.754998122975489e-06, "loss": 1.1372, "step": 3522 }, { "epoch": 0.7042302791034707, "grad_norm": 2.078125, "learning_rate": 8.754302211763616e-06, "loss": 1.0156, "step": 3523 }, { "epoch": 0.7044301741585668, "grad_norm": 1.9765625, "learning_rate": 8.753606133786042e-06, "loss": 0.9808, "step": 3524 }, { "epoch": 0.7046300692136628, "grad_norm": 2.09375, "learning_rate": 8.752909889073681e-06, "loss": 1.0435, "step": 3525 }, { "epoch": 0.7048299642687589, "grad_norm": 2.046875, "learning_rate": 8.752213477657467e-06, "loss": 1.017, "step": 3526 }, { "epoch": 0.705029859323855, "grad_norm": 2.109375, "learning_rate": 8.751516899568329e-06, "loss": 1.0034, "step": 3527 }, { "epoch": 0.7052297543789511, "grad_norm": 2.09375, "learning_rate": 8.750820154837213e-06, "loss": 1.0923, "step": 3528 }, { "epoch": 0.7054296494340472, "grad_norm": 2.234375, "learning_rate": 8.750123243495066e-06, "loss": 1.1833, "step": 3529 }, { "epoch": 0.7056295444891432, "grad_norm": 2.046875, "learning_rate": 8.749426165572843e-06, "loss": 1.0742, "step": 3530 }, { "epoch": 0.7058294395442393, "grad_norm": 2.0625, "learning_rate": 8.748728921101511e-06, "loss": 0.9526, "step": 3531 }, { "epoch": 0.7060293345993354, "grad_norm": 2.09375, "learning_rate": 8.748031510112041e-06, "loss": 1.0627, "step": 3532 }, { "epoch": 0.7062292296544315, "grad_norm": 2.15625, "learning_rate": 8.747333932635412e-06, "loss": 0.9998, "step": 3533 }, { "epoch": 0.7064291247095275, "grad_norm": 1.921875, "learning_rate": 8.746636188702609e-06, "loss": 0.9563, "step": 3534 }, { "epoch": 0.7066290197646236, "grad_norm": 2.125, "learning_rate": 8.745938278344628e-06, "loss": 1.1783, "step": 3535 }, { "epoch": 0.7068289148197197, "grad_norm": 2.0, "learning_rate": 8.745240201592466e-06, "loss": 1.0853, "step": 3536 }, { "epoch": 0.7070288098748158, "grad_norm": 2.109375, "learning_rate": 8.744541958477138e-06, "loss": 1.1799, "step": 3537 }, { "epoch": 0.7072287049299117, "grad_norm": 2.03125, "learning_rate": 8.743843549029653e-06, "loss": 0.994, "step": 3538 }, { "epoch": 0.7074285999850078, "grad_norm": 2.125, "learning_rate": 8.74314497328104e-06, "loss": 1.1054, "step": 3539 }, { "epoch": 0.7076284950401039, "grad_norm": 2.140625, "learning_rate": 8.742446231262324e-06, "loss": 1.0607, "step": 3540 }, { "epoch": 0.7078283900952, "grad_norm": 2.109375, "learning_rate": 8.741747323004549e-06, "loss": 1.0312, "step": 3541 }, { "epoch": 0.7080282851502961, "grad_norm": 2.109375, "learning_rate": 8.741048248538757e-06, "loss": 1.0552, "step": 3542 }, { "epoch": 0.7082281802053921, "grad_norm": 2.171875, "learning_rate": 8.740349007896001e-06, "loss": 1.0946, "step": 3543 }, { "epoch": 0.7084280752604882, "grad_norm": 2.109375, "learning_rate": 8.73964960110734e-06, "loss": 1.029, "step": 3544 }, { "epoch": 0.7086279703155843, "grad_norm": 2.15625, "learning_rate": 8.738950028203845e-06, "loss": 1.1373, "step": 3545 }, { "epoch": 0.7088278653706804, "grad_norm": 2.234375, "learning_rate": 8.738250289216588e-06, "loss": 1.1068, "step": 3546 }, { "epoch": 0.7090277604257764, "grad_norm": 2.046875, "learning_rate": 8.737550384176654e-06, "loss": 0.9384, "step": 3547 }, { "epoch": 0.7092276554808725, "grad_norm": 2.125, "learning_rate": 8.73685031311513e-06, "loss": 1.1025, "step": 3548 }, { "epoch": 0.7094275505359686, "grad_norm": 2.140625, "learning_rate": 8.736150076063114e-06, "loss": 1.1502, "step": 3549 }, { "epoch": 0.7096274455910647, "grad_norm": 2.09375, "learning_rate": 8.735449673051711e-06, "loss": 1.0809, "step": 3550 }, { "epoch": 0.7098273406461608, "grad_norm": 2.0625, "learning_rate": 8.734749104112032e-06, "loss": 1.0088, "step": 3551 }, { "epoch": 0.7100272357012568, "grad_norm": 2.140625, "learning_rate": 8.734048369275199e-06, "loss": 1.1559, "step": 3552 }, { "epoch": 0.7102271307563529, "grad_norm": 1.9765625, "learning_rate": 8.733347468572333e-06, "loss": 0.9297, "step": 3553 }, { "epoch": 0.710427025811449, "grad_norm": 2.015625, "learning_rate": 8.732646402034572e-06, "loss": 0.9721, "step": 3554 }, { "epoch": 0.7106269208665451, "grad_norm": 2.03125, "learning_rate": 8.731945169693058e-06, "loss": 1.0645, "step": 3555 }, { "epoch": 0.7108268159216411, "grad_norm": 1.984375, "learning_rate": 8.731243771578937e-06, "loss": 1.0229, "step": 3556 }, { "epoch": 0.7110267109767372, "grad_norm": 2.140625, "learning_rate": 8.730542207723367e-06, "loss": 1.0102, "step": 3557 }, { "epoch": 0.7112266060318333, "grad_norm": 2.015625, "learning_rate": 8.72984047815751e-06, "loss": 1.0751, "step": 3558 }, { "epoch": 0.7114265010869294, "grad_norm": 2.171875, "learning_rate": 8.729138582912538e-06, "loss": 1.0856, "step": 3559 }, { "epoch": 0.7116263961420254, "grad_norm": 1.96875, "learning_rate": 8.728436522019627e-06, "loss": 1.0536, "step": 3560 }, { "epoch": 0.7118262911971215, "grad_norm": 2.109375, "learning_rate": 8.727734295509964e-06, "loss": 1.0875, "step": 3561 }, { "epoch": 0.7120261862522176, "grad_norm": 2.140625, "learning_rate": 8.727031903414743e-06, "loss": 1.0707, "step": 3562 }, { "epoch": 0.7122260813073137, "grad_norm": 2.09375, "learning_rate": 8.72632934576516e-06, "loss": 1.0657, "step": 3563 }, { "epoch": 0.7124259763624098, "grad_norm": 2.03125, "learning_rate": 8.72562662259243e-06, "loss": 1.0626, "step": 3564 }, { "epoch": 0.7126258714175058, "grad_norm": 1.9765625, "learning_rate": 8.72492373392776e-06, "loss": 0.9411, "step": 3565 }, { "epoch": 0.7128257664726019, "grad_norm": 2.0625, "learning_rate": 8.724220679802377e-06, "loss": 1.157, "step": 3566 }, { "epoch": 0.713025661527698, "grad_norm": 1.9921875, "learning_rate": 8.723517460247509e-06, "loss": 1.0013, "step": 3567 }, { "epoch": 0.7132255565827941, "grad_norm": 2.0, "learning_rate": 8.722814075294392e-06, "loss": 1.0098, "step": 3568 }, { "epoch": 0.7134254516378901, "grad_norm": 2.03125, "learning_rate": 8.722110524974273e-06, "loss": 1.0333, "step": 3569 }, { "epoch": 0.7136253466929862, "grad_norm": 2.09375, "learning_rate": 8.7214068093184e-06, "loss": 1.0358, "step": 3570 }, { "epoch": 0.7138252417480823, "grad_norm": 2.15625, "learning_rate": 8.720702928358036e-06, "loss": 1.0748, "step": 3571 }, { "epoch": 0.7140251368031784, "grad_norm": 2.03125, "learning_rate": 8.719998882124446e-06, "loss": 1.0685, "step": 3572 }, { "epoch": 0.7142250318582744, "grad_norm": 1.9765625, "learning_rate": 8.7192946706489e-06, "loss": 1.0813, "step": 3573 }, { "epoch": 0.7144249269133705, "grad_norm": 2.09375, "learning_rate": 8.718590293962684e-06, "loss": 1.0034, "step": 3574 }, { "epoch": 0.7146248219684666, "grad_norm": 2.078125, "learning_rate": 8.717885752097084e-06, "loss": 1.0424, "step": 3575 }, { "epoch": 0.7148247170235627, "grad_norm": 2.109375, "learning_rate": 8.717181045083396e-06, "loss": 1.0401, "step": 3576 }, { "epoch": 0.7150246120786588, "grad_norm": 2.046875, "learning_rate": 8.716476172952921e-06, "loss": 1.0595, "step": 3577 }, { "epoch": 0.7152245071337547, "grad_norm": 2.140625, "learning_rate": 8.715771135736975e-06, "loss": 1.0123, "step": 3578 }, { "epoch": 0.7154244021888508, "grad_norm": 2.1875, "learning_rate": 8.715065933466869e-06, "loss": 1.1616, "step": 3579 }, { "epoch": 0.7156242972439469, "grad_norm": 2.078125, "learning_rate": 8.714360566173932e-06, "loss": 1.057, "step": 3580 }, { "epoch": 0.715824192299043, "grad_norm": 2.140625, "learning_rate": 8.713655033889495e-06, "loss": 1.0771, "step": 3581 }, { "epoch": 0.716024087354139, "grad_norm": 2.140625, "learning_rate": 8.712949336644898e-06, "loss": 1.0603, "step": 3582 }, { "epoch": 0.7162239824092351, "grad_norm": 2.09375, "learning_rate": 8.71224347447149e-06, "loss": 1.0811, "step": 3583 }, { "epoch": 0.7164238774643312, "grad_norm": 1.9765625, "learning_rate": 8.711537447400622e-06, "loss": 1.0414, "step": 3584 }, { "epoch": 0.7166237725194273, "grad_norm": 2.140625, "learning_rate": 8.710831255463656e-06, "loss": 1.0759, "step": 3585 }, { "epoch": 0.7168236675745234, "grad_norm": 2.1875, "learning_rate": 8.710124898691963e-06, "loss": 1.0057, "step": 3586 }, { "epoch": 0.7170235626296194, "grad_norm": 2.078125, "learning_rate": 8.709418377116918e-06, "loss": 1.0743, "step": 3587 }, { "epoch": 0.7172234576847155, "grad_norm": 2.234375, "learning_rate": 8.708711690769904e-06, "loss": 1.1095, "step": 3588 }, { "epoch": 0.7174233527398116, "grad_norm": 2.125, "learning_rate": 8.708004839682315e-06, "loss": 1.1079, "step": 3589 }, { "epoch": 0.7176232477949077, "grad_norm": 2.03125, "learning_rate": 8.707297823885545e-06, "loss": 1.0356, "step": 3590 }, { "epoch": 0.7178231428500037, "grad_norm": 2.03125, "learning_rate": 8.706590643411002e-06, "loss": 1.057, "step": 3591 }, { "epoch": 0.7180230379050998, "grad_norm": 2.03125, "learning_rate": 8.7058832982901e-06, "loss": 1.0344, "step": 3592 }, { "epoch": 0.7182229329601959, "grad_norm": 2.125, "learning_rate": 8.705175788554256e-06, "loss": 0.9713, "step": 3593 }, { "epoch": 0.718422828015292, "grad_norm": 2.15625, "learning_rate": 8.7044681142349e-06, "loss": 1.0609, "step": 3594 }, { "epoch": 0.718622723070388, "grad_norm": 2.1875, "learning_rate": 8.703760275363466e-06, "loss": 1.0574, "step": 3595 }, { "epoch": 0.7188226181254841, "grad_norm": 2.0, "learning_rate": 8.703052271971395e-06, "loss": 1.0209, "step": 3596 }, { "epoch": 0.7190225131805802, "grad_norm": 2.140625, "learning_rate": 8.702344104090139e-06, "loss": 0.9954, "step": 3597 }, { "epoch": 0.7192224082356763, "grad_norm": 2.125, "learning_rate": 8.701635771751153e-06, "loss": 0.9903, "step": 3598 }, { "epoch": 0.7194223032907724, "grad_norm": 2.109375, "learning_rate": 8.700927274985903e-06, "loss": 1.092, "step": 3599 }, { "epoch": 0.7196221983458684, "grad_norm": 2.109375, "learning_rate": 8.700218613825855e-06, "loss": 1.0449, "step": 3600 }, { "epoch": 0.7198220934009645, "grad_norm": 2.0, "learning_rate": 8.699509788302493e-06, "loss": 1.0659, "step": 3601 }, { "epoch": 0.7200219884560606, "grad_norm": 2.046875, "learning_rate": 8.698800798447302e-06, "loss": 0.9256, "step": 3602 }, { "epoch": 0.7202218835111567, "grad_norm": 1.9921875, "learning_rate": 8.698091644291774e-06, "loss": 1.0132, "step": 3603 }, { "epoch": 0.7204217785662527, "grad_norm": 2.09375, "learning_rate": 8.69738232586741e-06, "loss": 1.0542, "step": 3604 }, { "epoch": 0.7206216736213488, "grad_norm": 2.09375, "learning_rate": 8.696672843205718e-06, "loss": 1.046, "step": 3605 }, { "epoch": 0.7208215686764449, "grad_norm": 2.109375, "learning_rate": 8.695963196338214e-06, "loss": 0.9889, "step": 3606 }, { "epoch": 0.721021463731541, "grad_norm": 2.078125, "learning_rate": 8.69525338529642e-06, "loss": 1.0458, "step": 3607 }, { "epoch": 0.7212213587866371, "grad_norm": 2.015625, "learning_rate": 8.694543410111864e-06, "loss": 1.0359, "step": 3608 }, { "epoch": 0.7214212538417331, "grad_norm": 2.015625, "learning_rate": 8.693833270816083e-06, "loss": 1.0612, "step": 3609 }, { "epoch": 0.7216211488968292, "grad_norm": 2.15625, "learning_rate": 8.693122967440626e-06, "loss": 1.0821, "step": 3610 }, { "epoch": 0.7218210439519253, "grad_norm": 2.015625, "learning_rate": 8.69241250001704e-06, "loss": 1.0275, "step": 3611 }, { "epoch": 0.7220209390070214, "grad_norm": 2.03125, "learning_rate": 8.691701868576883e-06, "loss": 1.0511, "step": 3612 }, { "epoch": 0.7222208340621173, "grad_norm": 1.96875, "learning_rate": 8.690991073151724e-06, "loss": 0.9708, "step": 3613 }, { "epoch": 0.7224207291172134, "grad_norm": 2.03125, "learning_rate": 8.690280113773138e-06, "loss": 0.9171, "step": 3614 }, { "epoch": 0.7226206241723095, "grad_norm": 2.203125, "learning_rate": 8.689568990472701e-06, "loss": 1.1249, "step": 3615 }, { "epoch": 0.7228205192274056, "grad_norm": 2.15625, "learning_rate": 8.688857703282005e-06, "loss": 1.0374, "step": 3616 }, { "epoch": 0.7230204142825016, "grad_norm": 2.015625, "learning_rate": 8.688146252232644e-06, "loss": 0.9733, "step": 3617 }, { "epoch": 0.7232203093375977, "grad_norm": 2.0625, "learning_rate": 8.68743463735622e-06, "loss": 1.0975, "step": 3618 }, { "epoch": 0.7234202043926938, "grad_norm": 2.015625, "learning_rate": 8.686722858684342e-06, "loss": 1.0447, "step": 3619 }, { "epoch": 0.7236200994477899, "grad_norm": 2.078125, "learning_rate": 8.68601091624863e-06, "loss": 1.1732, "step": 3620 }, { "epoch": 0.723819994502886, "grad_norm": 2.03125, "learning_rate": 8.685298810080706e-06, "loss": 1.0777, "step": 3621 }, { "epoch": 0.724019889557982, "grad_norm": 2.15625, "learning_rate": 8.684586540212203e-06, "loss": 1.1034, "step": 3622 }, { "epoch": 0.7242197846130781, "grad_norm": 2.0, "learning_rate": 8.683874106674759e-06, "loss": 1.0943, "step": 3623 }, { "epoch": 0.7244196796681742, "grad_norm": 1.9921875, "learning_rate": 8.68316150950002e-06, "loss": 1.0627, "step": 3624 }, { "epoch": 0.7246195747232703, "grad_norm": 2.0, "learning_rate": 8.68244874871964e-06, "loss": 0.9761, "step": 3625 }, { "epoch": 0.7248194697783663, "grad_norm": 2.0625, "learning_rate": 8.681735824365281e-06, "loss": 0.9976, "step": 3626 }, { "epoch": 0.7250193648334624, "grad_norm": 2.015625, "learning_rate": 8.681022736468609e-06, "loss": 0.9816, "step": 3627 }, { "epoch": 0.7252192598885585, "grad_norm": 2.046875, "learning_rate": 8.680309485061302e-06, "loss": 1.0761, "step": 3628 }, { "epoch": 0.7254191549436546, "grad_norm": 2.09375, "learning_rate": 8.679596070175038e-06, "loss": 1.0507, "step": 3629 }, { "epoch": 0.7256190499987507, "grad_norm": 2.078125, "learning_rate": 8.678882491841512e-06, "loss": 1.0754, "step": 3630 }, { "epoch": 0.7258189450538467, "grad_norm": 2.015625, "learning_rate": 8.678168750092419e-06, "loss": 1.0151, "step": 3631 }, { "epoch": 0.7260188401089428, "grad_norm": 2.140625, "learning_rate": 8.67745484495946e-06, "loss": 1.0596, "step": 3632 }, { "epoch": 0.7262187351640389, "grad_norm": 2.03125, "learning_rate": 8.676740776474351e-06, "loss": 1.0023, "step": 3633 }, { "epoch": 0.726418630219135, "grad_norm": 2.171875, "learning_rate": 8.67602654466881e-06, "loss": 1.0468, "step": 3634 }, { "epoch": 0.726618525274231, "grad_norm": 2.046875, "learning_rate": 8.675312149574562e-06, "loss": 1.0281, "step": 3635 }, { "epoch": 0.7268184203293271, "grad_norm": 2.09375, "learning_rate": 8.67459759122334e-06, "loss": 1.0334, "step": 3636 }, { "epoch": 0.7270183153844232, "grad_norm": 2.109375, "learning_rate": 8.673882869646888e-06, "loss": 1.1007, "step": 3637 }, { "epoch": 0.7272182104395193, "grad_norm": 2.15625, "learning_rate": 8.67316798487695e-06, "loss": 1.1333, "step": 3638 }, { "epoch": 0.7274181054946153, "grad_norm": 2.09375, "learning_rate": 8.672452936945282e-06, "loss": 1.0483, "step": 3639 }, { "epoch": 0.7276180005497114, "grad_norm": 2.125, "learning_rate": 8.671737725883646e-06, "loss": 1.0026, "step": 3640 }, { "epoch": 0.7278178956048075, "grad_norm": 1.9921875, "learning_rate": 8.671022351723813e-06, "loss": 0.9676, "step": 3641 }, { "epoch": 0.7280177906599036, "grad_norm": 2.171875, "learning_rate": 8.67030681449756e-06, "loss": 1.0058, "step": 3642 }, { "epoch": 0.7282176857149997, "grad_norm": 2.25, "learning_rate": 8.66959111423667e-06, "loss": 1.0806, "step": 3643 }, { "epoch": 0.7284175807700957, "grad_norm": 2.03125, "learning_rate": 8.668875250972934e-06, "loss": 1.0737, "step": 3644 }, { "epoch": 0.7286174758251918, "grad_norm": 2.0625, "learning_rate": 8.668159224738153e-06, "loss": 1.0112, "step": 3645 }, { "epoch": 0.7288173708802879, "grad_norm": 2.15625, "learning_rate": 8.667443035564129e-06, "loss": 1.073, "step": 3646 }, { "epoch": 0.729017265935384, "grad_norm": 2.078125, "learning_rate": 8.666726683482678e-06, "loss": 1.1717, "step": 3647 }, { "epoch": 0.72921716099048, "grad_norm": 2.15625, "learning_rate": 8.666010168525618e-06, "loss": 1.072, "step": 3648 }, { "epoch": 0.729417056045576, "grad_norm": 2.09375, "learning_rate": 8.66529349072478e-06, "loss": 1.0895, "step": 3649 }, { "epoch": 0.7296169511006722, "grad_norm": 2.234375, "learning_rate": 8.664576650111995e-06, "loss": 1.0967, "step": 3650 }, { "epoch": 0.7298168461557683, "grad_norm": 2.171875, "learning_rate": 8.663859646719106e-06, "loss": 1.024, "step": 3651 }, { "epoch": 0.7300167412108644, "grad_norm": 2.109375, "learning_rate": 8.663142480577965e-06, "loss": 0.999, "step": 3652 }, { "epoch": 0.7302166362659603, "grad_norm": 2.140625, "learning_rate": 8.662425151720425e-06, "loss": 1.0337, "step": 3653 }, { "epoch": 0.7304165313210564, "grad_norm": 2.015625, "learning_rate": 8.661707660178351e-06, "loss": 1.0501, "step": 3654 }, { "epoch": 0.7306164263761525, "grad_norm": 2.046875, "learning_rate": 8.660990005983613e-06, "loss": 1.0929, "step": 3655 }, { "epoch": 0.7308163214312486, "grad_norm": 2.03125, "learning_rate": 8.660272189168093e-06, "loss": 1.002, "step": 3656 }, { "epoch": 0.7310162164863446, "grad_norm": 2.109375, "learning_rate": 8.659554209763669e-06, "loss": 1.0552, "step": 3657 }, { "epoch": 0.7312161115414407, "grad_norm": 2.0625, "learning_rate": 8.65883606780224e-06, "loss": 1.031, "step": 3658 }, { "epoch": 0.7314160065965368, "grad_norm": 2.015625, "learning_rate": 8.658117763315705e-06, "loss": 0.9722, "step": 3659 }, { "epoch": 0.7316159016516329, "grad_norm": 2.21875, "learning_rate": 8.657399296335967e-06, "loss": 1.0115, "step": 3660 }, { "epoch": 0.7318157967067289, "grad_norm": 2.125, "learning_rate": 8.656680666894945e-06, "loss": 0.9965, "step": 3661 }, { "epoch": 0.732015691761825, "grad_norm": 2.15625, "learning_rate": 8.655961875024557e-06, "loss": 1.065, "step": 3662 }, { "epoch": 0.7322155868169211, "grad_norm": 2.1875, "learning_rate": 8.655242920756733e-06, "loss": 1.1328, "step": 3663 }, { "epoch": 0.7324154818720172, "grad_norm": 2.09375, "learning_rate": 8.65452380412341e-06, "loss": 1.085, "step": 3664 }, { "epoch": 0.7326153769271133, "grad_norm": 2.09375, "learning_rate": 8.653804525156529e-06, "loss": 1.0886, "step": 3665 }, { "epoch": 0.7328152719822093, "grad_norm": 2.015625, "learning_rate": 8.653085083888042e-06, "loss": 1.0906, "step": 3666 }, { "epoch": 0.7330151670373054, "grad_norm": 2.046875, "learning_rate": 8.652365480349904e-06, "loss": 1.0874, "step": 3667 }, { "epoch": 0.7332150620924015, "grad_norm": 1.9375, "learning_rate": 8.651645714574082e-06, "loss": 0.971, "step": 3668 }, { "epoch": 0.7334149571474976, "grad_norm": 1.875, "learning_rate": 8.65092578659255e-06, "loss": 1.0021, "step": 3669 }, { "epoch": 0.7336148522025936, "grad_norm": 2.140625, "learning_rate": 8.650205696437282e-06, "loss": 0.9673, "step": 3670 }, { "epoch": 0.7338147472576897, "grad_norm": 2.0625, "learning_rate": 8.649485444140267e-06, "loss": 1.0164, "step": 3671 }, { "epoch": 0.7340146423127858, "grad_norm": 2.046875, "learning_rate": 8.6487650297335e-06, "loss": 1.0038, "step": 3672 }, { "epoch": 0.7342145373678819, "grad_norm": 2.078125, "learning_rate": 8.648044453248978e-06, "loss": 0.9634, "step": 3673 }, { "epoch": 0.734414432422978, "grad_norm": 2.03125, "learning_rate": 8.647323714718712e-06, "loss": 1.1127, "step": 3674 }, { "epoch": 0.734614327478074, "grad_norm": 2.015625, "learning_rate": 8.646602814174715e-06, "loss": 0.8807, "step": 3675 }, { "epoch": 0.7348142225331701, "grad_norm": 1.96875, "learning_rate": 8.645881751649012e-06, "loss": 1.0312, "step": 3676 }, { "epoch": 0.7350141175882662, "grad_norm": 1.8828125, "learning_rate": 8.64516052717363e-06, "loss": 1.0603, "step": 3677 }, { "epoch": 0.7352140126433623, "grad_norm": 2.1875, "learning_rate": 8.644439140780608e-06, "loss": 1.1072, "step": 3678 }, { "epoch": 0.7354139076984583, "grad_norm": 2.109375, "learning_rate": 8.643717592501988e-06, "loss": 1.0585, "step": 3679 }, { "epoch": 0.7356138027535544, "grad_norm": 2.0625, "learning_rate": 8.64299588236982e-06, "loss": 1.0328, "step": 3680 }, { "epoch": 0.7358136978086505, "grad_norm": 2.125, "learning_rate": 8.642274010416165e-06, "loss": 1.0931, "step": 3681 }, { "epoch": 0.7360135928637466, "grad_norm": 2.0625, "learning_rate": 8.641551976673088e-06, "loss": 1.0921, "step": 3682 }, { "epoch": 0.7362134879188426, "grad_norm": 2.046875, "learning_rate": 8.64082978117266e-06, "loss": 1.0605, "step": 3683 }, { "epoch": 0.7364133829739387, "grad_norm": 1.9296875, "learning_rate": 8.640107423946964e-06, "loss": 0.9517, "step": 3684 }, { "epoch": 0.7366132780290348, "grad_norm": 2.0625, "learning_rate": 8.639384905028084e-06, "loss": 1.0521, "step": 3685 }, { "epoch": 0.7368131730841309, "grad_norm": 2.046875, "learning_rate": 8.638662224448115e-06, "loss": 0.9472, "step": 3686 }, { "epoch": 0.737013068139227, "grad_norm": 2.0625, "learning_rate": 8.63793938223916e-06, "loss": 1.0221, "step": 3687 }, { "epoch": 0.737212963194323, "grad_norm": 2.171875, "learning_rate": 8.637216378433324e-06, "loss": 1.121, "step": 3688 }, { "epoch": 0.737412858249419, "grad_norm": 2.140625, "learning_rate": 8.636493213062725e-06, "loss": 1.0409, "step": 3689 }, { "epoch": 0.7376127533045151, "grad_norm": 2.1875, "learning_rate": 8.635769886159488e-06, "loss": 1.1295, "step": 3690 }, { "epoch": 0.7378126483596112, "grad_norm": 2.046875, "learning_rate": 8.63504639775574e-06, "loss": 1.0737, "step": 3691 }, { "epoch": 0.7380125434147072, "grad_norm": 2.046875, "learning_rate": 8.634322747883619e-06, "loss": 1.025, "step": 3692 }, { "epoch": 0.7382124384698033, "grad_norm": 2.03125, "learning_rate": 8.63359893657527e-06, "loss": 1.011, "step": 3693 }, { "epoch": 0.7384123335248994, "grad_norm": 2.1875, "learning_rate": 8.632874963862844e-06, "loss": 1.0802, "step": 3694 }, { "epoch": 0.7386122285799955, "grad_norm": 2.125, "learning_rate": 8.632150829778498e-06, "loss": 1.0525, "step": 3695 }, { "epoch": 0.7388121236350915, "grad_norm": 1.9765625, "learning_rate": 8.631426534354404e-06, "loss": 0.9998, "step": 3696 }, { "epoch": 0.7390120186901876, "grad_norm": 2.03125, "learning_rate": 8.630702077622728e-06, "loss": 1.0091, "step": 3697 }, { "epoch": 0.7392119137452837, "grad_norm": 2.046875, "learning_rate": 8.629977459615655e-06, "loss": 1.0374, "step": 3698 }, { "epoch": 0.7394118088003798, "grad_norm": 1.9765625, "learning_rate": 8.62925268036537e-06, "loss": 0.9763, "step": 3699 }, { "epoch": 0.7396117038554759, "grad_norm": 2.09375, "learning_rate": 8.62852773990407e-06, "loss": 1.0981, "step": 3700 }, { "epoch": 0.7398115989105719, "grad_norm": 1.96875, "learning_rate": 8.627802638263955e-06, "loss": 0.9995, "step": 3701 }, { "epoch": 0.740011493965668, "grad_norm": 2.0, "learning_rate": 8.627077375477233e-06, "loss": 1.0346, "step": 3702 }, { "epoch": 0.7402113890207641, "grad_norm": 2.171875, "learning_rate": 8.626351951576122e-06, "loss": 1.0345, "step": 3703 }, { "epoch": 0.7404112840758602, "grad_norm": 2.125, "learning_rate": 8.625626366592844e-06, "loss": 0.99, "step": 3704 }, { "epoch": 0.7406111791309562, "grad_norm": 2.03125, "learning_rate": 8.624900620559633e-06, "loss": 1.1051, "step": 3705 }, { "epoch": 0.7408110741860523, "grad_norm": 2.265625, "learning_rate": 8.624174713508722e-06, "loss": 1.1394, "step": 3706 }, { "epoch": 0.7410109692411484, "grad_norm": 2.078125, "learning_rate": 8.623448645472356e-06, "loss": 1.0432, "step": 3707 }, { "epoch": 0.7412108642962445, "grad_norm": 1.8515625, "learning_rate": 8.62272241648279e-06, "loss": 0.9488, "step": 3708 }, { "epoch": 0.7414107593513406, "grad_norm": 2.078125, "learning_rate": 8.62199602657228e-06, "loss": 1.0239, "step": 3709 }, { "epoch": 0.7416106544064366, "grad_norm": 2.234375, "learning_rate": 8.621269475773092e-06, "loss": 1.1023, "step": 3710 }, { "epoch": 0.7418105494615327, "grad_norm": 1.96875, "learning_rate": 8.620542764117503e-06, "loss": 0.9405, "step": 3711 }, { "epoch": 0.7420104445166288, "grad_norm": 2.078125, "learning_rate": 8.61981589163779e-06, "loss": 1.129, "step": 3712 }, { "epoch": 0.7422103395717249, "grad_norm": 2.140625, "learning_rate": 8.619088858366242e-06, "loss": 0.9819, "step": 3713 }, { "epoch": 0.7424102346268209, "grad_norm": 2.078125, "learning_rate": 8.618361664335153e-06, "loss": 1.0968, "step": 3714 }, { "epoch": 0.742610129681917, "grad_norm": 2.171875, "learning_rate": 8.617634309576827e-06, "loss": 1.0922, "step": 3715 }, { "epoch": 0.7428100247370131, "grad_norm": 2.046875, "learning_rate": 8.61690679412357e-06, "loss": 1.0316, "step": 3716 }, { "epoch": 0.7430099197921092, "grad_norm": 2.09375, "learning_rate": 8.6161791180077e-06, "loss": 1.0087, "step": 3717 }, { "epoch": 0.7432098148472052, "grad_norm": 1.96875, "learning_rate": 8.615451281261539e-06, "loss": 0.9273, "step": 3718 }, { "epoch": 0.7434097099023013, "grad_norm": 2.03125, "learning_rate": 8.614723283917418e-06, "loss": 0.9929, "step": 3719 }, { "epoch": 0.7436096049573974, "grad_norm": 2.046875, "learning_rate": 8.613995126007674e-06, "loss": 0.9644, "step": 3720 }, { "epoch": 0.7438095000124935, "grad_norm": 2.015625, "learning_rate": 8.613266807564656e-06, "loss": 1.0416, "step": 3721 }, { "epoch": 0.7440093950675896, "grad_norm": 2.015625, "learning_rate": 8.61253832862071e-06, "loss": 1.0855, "step": 3722 }, { "epoch": 0.7442092901226856, "grad_norm": 2.15625, "learning_rate": 8.611809689208197e-06, "loss": 1.073, "step": 3723 }, { "epoch": 0.7444091851777817, "grad_norm": 2.0, "learning_rate": 8.611080889359485e-06, "loss": 1.0305, "step": 3724 }, { "epoch": 0.7446090802328778, "grad_norm": 2.125, "learning_rate": 8.610351929106944e-06, "loss": 1.0055, "step": 3725 }, { "epoch": 0.7448089752879739, "grad_norm": 2.203125, "learning_rate": 8.609622808482956e-06, "loss": 1.0343, "step": 3726 }, { "epoch": 0.7450088703430698, "grad_norm": 2.140625, "learning_rate": 8.608893527519908e-06, "loss": 1.0539, "step": 3727 }, { "epoch": 0.7452087653981659, "grad_norm": 2.125, "learning_rate": 8.608164086250197e-06, "loss": 1.0643, "step": 3728 }, { "epoch": 0.745408660453262, "grad_norm": 1.953125, "learning_rate": 8.607434484706221e-06, "loss": 1.0237, "step": 3729 }, { "epoch": 0.7456085555083581, "grad_norm": 2.1875, "learning_rate": 8.60670472292039e-06, "loss": 1.0754, "step": 3730 }, { "epoch": 0.7458084505634542, "grad_norm": 2.140625, "learning_rate": 8.605974800925121e-06, "loss": 1.0677, "step": 3731 }, { "epoch": 0.7460083456185502, "grad_norm": 1.9921875, "learning_rate": 8.605244718752837e-06, "loss": 0.9748, "step": 3732 }, { "epoch": 0.7462082406736463, "grad_norm": 2.015625, "learning_rate": 8.604514476435969e-06, "loss": 0.9695, "step": 3733 }, { "epoch": 0.7464081357287424, "grad_norm": 2.015625, "learning_rate": 8.60378407400695e-06, "loss": 0.9913, "step": 3734 }, { "epoch": 0.7466080307838385, "grad_norm": 2.0625, "learning_rate": 8.603053511498228e-06, "loss": 1.0384, "step": 3735 }, { "epoch": 0.7468079258389345, "grad_norm": 2.078125, "learning_rate": 8.602322788942255e-06, "loss": 0.9894, "step": 3736 }, { "epoch": 0.7470078208940306, "grad_norm": 2.125, "learning_rate": 8.601591906371487e-06, "loss": 1.0653, "step": 3737 }, { "epoch": 0.7472077159491267, "grad_norm": 2.09375, "learning_rate": 8.600860863818392e-06, "loss": 1.0099, "step": 3738 }, { "epoch": 0.7474076110042228, "grad_norm": 2.109375, "learning_rate": 8.600129661315443e-06, "loss": 1.1134, "step": 3739 }, { "epoch": 0.7476075060593188, "grad_norm": 2.046875, "learning_rate": 8.599398298895117e-06, "loss": 1.0463, "step": 3740 }, { "epoch": 0.7478074011144149, "grad_norm": 2.0625, "learning_rate": 8.598666776589904e-06, "loss": 1.0142, "step": 3741 }, { "epoch": 0.748007296169511, "grad_norm": 2.0, "learning_rate": 8.597935094432298e-06, "loss": 1.0762, "step": 3742 }, { "epoch": 0.7482071912246071, "grad_norm": 2.09375, "learning_rate": 8.597203252454798e-06, "loss": 1.1143, "step": 3743 }, { "epoch": 0.7484070862797032, "grad_norm": 2.125, "learning_rate": 8.596471250689915e-06, "loss": 1.078, "step": 3744 }, { "epoch": 0.7486069813347992, "grad_norm": 2.25, "learning_rate": 8.595739089170162e-06, "loss": 1.0361, "step": 3745 }, { "epoch": 0.7488068763898953, "grad_norm": 2.140625, "learning_rate": 8.595006767928064e-06, "loss": 1.0813, "step": 3746 }, { "epoch": 0.7490067714449914, "grad_norm": 2.21875, "learning_rate": 8.594274286996147e-06, "loss": 1.1081, "step": 3747 }, { "epoch": 0.7492066665000875, "grad_norm": 2.03125, "learning_rate": 8.593541646406952e-06, "loss": 1.0592, "step": 3748 }, { "epoch": 0.7494065615551835, "grad_norm": 2.078125, "learning_rate": 8.59280884619302e-06, "loss": 0.9439, "step": 3749 }, { "epoch": 0.7496064566102796, "grad_norm": 2.078125, "learning_rate": 8.592075886386903e-06, "loss": 1.0597, "step": 3750 }, { "epoch": 0.7498063516653757, "grad_norm": 2.15625, "learning_rate": 8.59134276702116e-06, "loss": 0.9952, "step": 3751 }, { "epoch": 0.7500062467204718, "grad_norm": 2.21875, "learning_rate": 8.590609488128354e-06, "loss": 0.9731, "step": 3752 }, { "epoch": 0.7502061417755679, "grad_norm": 2.125, "learning_rate": 8.589876049741058e-06, "loss": 0.9752, "step": 3753 }, { "epoch": 0.7504060368306639, "grad_norm": 2.078125, "learning_rate": 8.589142451891849e-06, "loss": 1.0571, "step": 3754 }, { "epoch": 0.75060593188576, "grad_norm": 2.109375, "learning_rate": 8.58840869461332e-06, "loss": 1.0112, "step": 3755 }, { "epoch": 0.7508058269408561, "grad_norm": 2.109375, "learning_rate": 8.587674777938057e-06, "loss": 0.9903, "step": 3756 }, { "epoch": 0.7510057219959522, "grad_norm": 2.1875, "learning_rate": 8.586940701898665e-06, "loss": 1.1016, "step": 3757 }, { "epoch": 0.7512056170510482, "grad_norm": 2.09375, "learning_rate": 8.586206466527749e-06, "loss": 0.9849, "step": 3758 }, { "epoch": 0.7514055121061443, "grad_norm": 2.078125, "learning_rate": 8.585472071857924e-06, "loss": 1.0105, "step": 3759 }, { "epoch": 0.7516054071612404, "grad_norm": 1.9765625, "learning_rate": 8.584737517921815e-06, "loss": 0.9755, "step": 3760 }, { "epoch": 0.7518053022163365, "grad_norm": 2.109375, "learning_rate": 8.584002804752046e-06, "loss": 1.0461, "step": 3761 }, { "epoch": 0.7520051972714324, "grad_norm": 2.171875, "learning_rate": 8.583267932381257e-06, "loss": 1.1102, "step": 3762 }, { "epoch": 0.7522050923265285, "grad_norm": 2.109375, "learning_rate": 8.582532900842088e-06, "loss": 1.0866, "step": 3763 }, { "epoch": 0.7524049873816246, "grad_norm": 2.03125, "learning_rate": 8.58179771016719e-06, "loss": 1.0478, "step": 3764 }, { "epoch": 0.7526048824367207, "grad_norm": 1.9765625, "learning_rate": 8.581062360389222e-06, "loss": 0.9944, "step": 3765 }, { "epoch": 0.7528047774918168, "grad_norm": 2.0, "learning_rate": 8.580326851540844e-06, "loss": 1.1228, "step": 3766 }, { "epoch": 0.7530046725469128, "grad_norm": 2.203125, "learning_rate": 8.579591183654731e-06, "loss": 1.0448, "step": 3767 }, { "epoch": 0.7532045676020089, "grad_norm": 2.28125, "learning_rate": 8.578855356763559e-06, "loss": 1.1004, "step": 3768 }, { "epoch": 0.753404462657105, "grad_norm": 2.15625, "learning_rate": 8.578119370900016e-06, "loss": 1.0422, "step": 3769 }, { "epoch": 0.7536043577122011, "grad_norm": 2.03125, "learning_rate": 8.577383226096792e-06, "loss": 0.9927, "step": 3770 }, { "epoch": 0.7538042527672971, "grad_norm": 2.109375, "learning_rate": 8.576646922386587e-06, "loss": 1.0948, "step": 3771 }, { "epoch": 0.7540041478223932, "grad_norm": 1.9765625, "learning_rate": 8.575910459802107e-06, "loss": 1.0204, "step": 3772 }, { "epoch": 0.7542040428774893, "grad_norm": 2.109375, "learning_rate": 8.575173838376067e-06, "loss": 1.0898, "step": 3773 }, { "epoch": 0.7544039379325854, "grad_norm": 1.9921875, "learning_rate": 8.574437058141187e-06, "loss": 0.9686, "step": 3774 }, { "epoch": 0.7546038329876815, "grad_norm": 2.140625, "learning_rate": 8.573700119130194e-06, "loss": 1.0663, "step": 3775 }, { "epoch": 0.7548037280427775, "grad_norm": 2.125, "learning_rate": 8.572963021375825e-06, "loss": 0.9369, "step": 3776 }, { "epoch": 0.7550036230978736, "grad_norm": 2.046875, "learning_rate": 8.572225764910818e-06, "loss": 1.0403, "step": 3777 }, { "epoch": 0.7552035181529697, "grad_norm": 2.15625, "learning_rate": 8.571488349767925e-06, "loss": 1.069, "step": 3778 }, { "epoch": 0.7554034132080658, "grad_norm": 1.9765625, "learning_rate": 8.570750775979901e-06, "loss": 0.9747, "step": 3779 }, { "epoch": 0.7556033082631618, "grad_norm": 2.09375, "learning_rate": 8.57001304357951e-06, "loss": 1.1118, "step": 3780 }, { "epoch": 0.7558032033182579, "grad_norm": 2.109375, "learning_rate": 8.56927515259952e-06, "loss": 1.0418, "step": 3781 }, { "epoch": 0.756003098373354, "grad_norm": 2.25, "learning_rate": 8.568537103072707e-06, "loss": 1.1046, "step": 3782 }, { "epoch": 0.7562029934284501, "grad_norm": 2.078125, "learning_rate": 8.56779889503186e-06, "loss": 1.0775, "step": 3783 }, { "epoch": 0.7564028884835461, "grad_norm": 2.140625, "learning_rate": 8.567060528509767e-06, "loss": 1.1559, "step": 3784 }, { "epoch": 0.7566027835386422, "grad_norm": 2.125, "learning_rate": 8.566322003539225e-06, "loss": 1.0536, "step": 3785 }, { "epoch": 0.7568026785937383, "grad_norm": 2.015625, "learning_rate": 8.56558332015304e-06, "loss": 1.0553, "step": 3786 }, { "epoch": 0.7570025736488344, "grad_norm": 2.0, "learning_rate": 8.564844478384027e-06, "loss": 1.0333, "step": 3787 }, { "epoch": 0.7572024687039305, "grad_norm": 2.015625, "learning_rate": 8.564105478265e-06, "loss": 0.9435, "step": 3788 }, { "epoch": 0.7574023637590265, "grad_norm": 2.171875, "learning_rate": 8.563366319828789e-06, "loss": 1.1762, "step": 3789 }, { "epoch": 0.7576022588141226, "grad_norm": 2.0625, "learning_rate": 8.562627003108229e-06, "loss": 1.0673, "step": 3790 }, { "epoch": 0.7578021538692187, "grad_norm": 2.015625, "learning_rate": 8.561887528136157e-06, "loss": 0.942, "step": 3791 }, { "epoch": 0.7580020489243148, "grad_norm": 2.125, "learning_rate": 8.56114789494542e-06, "loss": 1.0512, "step": 3792 }, { "epoch": 0.7582019439794108, "grad_norm": 2.078125, "learning_rate": 8.560408103568875e-06, "loss": 1.0053, "step": 3793 }, { "epoch": 0.7584018390345069, "grad_norm": 2.140625, "learning_rate": 8.559668154039382e-06, "loss": 1.0991, "step": 3794 }, { "epoch": 0.758601734089603, "grad_norm": 2.0625, "learning_rate": 8.55892804638981e-06, "loss": 0.9974, "step": 3795 }, { "epoch": 0.7588016291446991, "grad_norm": 1.9921875, "learning_rate": 8.558187780653033e-06, "loss": 0.9253, "step": 3796 }, { "epoch": 0.759001524199795, "grad_norm": 2.078125, "learning_rate": 8.557447356861937e-06, "loss": 1.0197, "step": 3797 }, { "epoch": 0.7592014192548912, "grad_norm": 2.03125, "learning_rate": 8.556706775049408e-06, "loss": 1.0748, "step": 3798 }, { "epoch": 0.7594013143099873, "grad_norm": 2.09375, "learning_rate": 8.555966035248344e-06, "loss": 1.0473, "step": 3799 }, { "epoch": 0.7596012093650834, "grad_norm": 2.203125, "learning_rate": 8.555225137491649e-06, "loss": 1.1592, "step": 3800 }, { "epoch": 0.7598011044201795, "grad_norm": 2.28125, "learning_rate": 8.554484081812233e-06, "loss": 1.1061, "step": 3801 }, { "epoch": 0.7600009994752754, "grad_norm": 2.171875, "learning_rate": 8.553742868243014e-06, "loss": 1.1416, "step": 3802 }, { "epoch": 0.7602008945303715, "grad_norm": 2.0625, "learning_rate": 8.553001496816915e-06, "loss": 1.0807, "step": 3803 }, { "epoch": 0.7604007895854676, "grad_norm": 2.03125, "learning_rate": 8.552259967566871e-06, "loss": 1.1007, "step": 3804 }, { "epoch": 0.7606006846405637, "grad_norm": 2.078125, "learning_rate": 8.551518280525816e-06, "loss": 0.9757, "step": 3805 }, { "epoch": 0.7608005796956597, "grad_norm": 2.046875, "learning_rate": 8.550776435726701e-06, "loss": 1.0683, "step": 3806 }, { "epoch": 0.7610004747507558, "grad_norm": 2.125, "learning_rate": 8.550034433202474e-06, "loss": 1.1383, "step": 3807 }, { "epoch": 0.7612003698058519, "grad_norm": 2.234375, "learning_rate": 8.549292272986098e-06, "loss": 1.1601, "step": 3808 }, { "epoch": 0.761400264860948, "grad_norm": 1.9921875, "learning_rate": 8.548549955110538e-06, "loss": 0.9979, "step": 3809 }, { "epoch": 0.7616001599160441, "grad_norm": 2.125, "learning_rate": 8.547807479608768e-06, "loss": 1.0703, "step": 3810 }, { "epoch": 0.7618000549711401, "grad_norm": 1.96875, "learning_rate": 8.547064846513768e-06, "loss": 1.0299, "step": 3811 }, { "epoch": 0.7619999500262362, "grad_norm": 2.0625, "learning_rate": 8.546322055858526e-06, "loss": 0.9472, "step": 3812 }, { "epoch": 0.7621998450813323, "grad_norm": 2.125, "learning_rate": 8.545579107676039e-06, "loss": 1.095, "step": 3813 }, { "epoch": 0.7623997401364284, "grad_norm": 2.078125, "learning_rate": 8.544836001999306e-06, "loss": 1.1145, "step": 3814 }, { "epoch": 0.7625996351915244, "grad_norm": 2.125, "learning_rate": 8.544092738861336e-06, "loss": 1.0481, "step": 3815 }, { "epoch": 0.7627995302466205, "grad_norm": 2.25, "learning_rate": 8.543349318295145e-06, "loss": 1.0895, "step": 3816 }, { "epoch": 0.7629994253017166, "grad_norm": 2.078125, "learning_rate": 8.542605740333754e-06, "loss": 1.1261, "step": 3817 }, { "epoch": 0.7631993203568127, "grad_norm": 2.140625, "learning_rate": 8.541862005010198e-06, "loss": 0.9861, "step": 3818 }, { "epoch": 0.7633992154119087, "grad_norm": 2.03125, "learning_rate": 8.541118112357507e-06, "loss": 0.97, "step": 3819 }, { "epoch": 0.7635991104670048, "grad_norm": 2.015625, "learning_rate": 8.54037406240873e-06, "loss": 1.0524, "step": 3820 }, { "epoch": 0.7637990055221009, "grad_norm": 2.078125, "learning_rate": 8.539629855196913e-06, "loss": 1.0944, "step": 3821 }, { "epoch": 0.763998900577197, "grad_norm": 2.03125, "learning_rate": 8.538885490755117e-06, "loss": 0.9729, "step": 3822 }, { "epoch": 0.7641987956322931, "grad_norm": 2.0625, "learning_rate": 8.538140969116406e-06, "loss": 1.0453, "step": 3823 }, { "epoch": 0.7643986906873891, "grad_norm": 1.9765625, "learning_rate": 8.537396290313849e-06, "loss": 1.0251, "step": 3824 }, { "epoch": 0.7645985857424852, "grad_norm": 2.09375, "learning_rate": 8.53665145438053e-06, "loss": 1.1079, "step": 3825 }, { "epoch": 0.7647984807975813, "grad_norm": 2.09375, "learning_rate": 8.535906461349528e-06, "loss": 1.0166, "step": 3826 }, { "epoch": 0.7649983758526774, "grad_norm": 2.03125, "learning_rate": 8.53516131125394e-06, "loss": 1.0555, "step": 3827 }, { "epoch": 0.7651982709077734, "grad_norm": 2.09375, "learning_rate": 8.534416004126863e-06, "loss": 1.1303, "step": 3828 }, { "epoch": 0.7653981659628695, "grad_norm": 2.078125, "learning_rate": 8.533670540001406e-06, "loss": 1.0398, "step": 3829 }, { "epoch": 0.7655980610179656, "grad_norm": 1.9765625, "learning_rate": 8.53292491891068e-06, "loss": 1.0753, "step": 3830 }, { "epoch": 0.7657979560730617, "grad_norm": 2.03125, "learning_rate": 8.532179140887807e-06, "loss": 0.973, "step": 3831 }, { "epoch": 0.7659978511281578, "grad_norm": 2.046875, "learning_rate": 8.531433205965913e-06, "loss": 0.9727, "step": 3832 }, { "epoch": 0.7661977461832538, "grad_norm": 2.203125, "learning_rate": 8.530687114178134e-06, "loss": 1.0441, "step": 3833 }, { "epoch": 0.7663976412383499, "grad_norm": 2.109375, "learning_rate": 8.529940865557611e-06, "loss": 0.9827, "step": 3834 }, { "epoch": 0.766597536293446, "grad_norm": 1.984375, "learning_rate": 8.52919446013749e-06, "loss": 1.0333, "step": 3835 }, { "epoch": 0.7667974313485421, "grad_norm": 2.28125, "learning_rate": 8.528447897950932e-06, "loss": 1.0204, "step": 3836 }, { "epoch": 0.766997326403638, "grad_norm": 2.1875, "learning_rate": 8.527701179031092e-06, "loss": 1.1111, "step": 3837 }, { "epoch": 0.7671972214587341, "grad_norm": 2.0, "learning_rate": 8.526954303411141e-06, "loss": 1.0995, "step": 3838 }, { "epoch": 0.7673971165138302, "grad_norm": 1.9140625, "learning_rate": 8.526207271124258e-06, "loss": 0.9218, "step": 3839 }, { "epoch": 0.7675970115689263, "grad_norm": 2.140625, "learning_rate": 8.525460082203626e-06, "loss": 1.113, "step": 3840 }, { "epoch": 0.7677969066240223, "grad_norm": 1.9140625, "learning_rate": 8.524712736682433e-06, "loss": 0.9998, "step": 3841 }, { "epoch": 0.7679968016791184, "grad_norm": 2.171875, "learning_rate": 8.523965234593877e-06, "loss": 1.0899, "step": 3842 }, { "epoch": 0.7681966967342145, "grad_norm": 2.015625, "learning_rate": 8.523217575971159e-06, "loss": 1.0126, "step": 3843 }, { "epoch": 0.7683965917893106, "grad_norm": 1.984375, "learning_rate": 8.522469760847496e-06, "loss": 0.9371, "step": 3844 }, { "epoch": 0.7685964868444067, "grad_norm": 2.140625, "learning_rate": 8.5217217892561e-06, "loss": 1.0394, "step": 3845 }, { "epoch": 0.7687963818995027, "grad_norm": 2.109375, "learning_rate": 8.520973661230198e-06, "loss": 1.0531, "step": 3846 }, { "epoch": 0.7689962769545988, "grad_norm": 2.0625, "learning_rate": 8.520225376803024e-06, "loss": 1.0576, "step": 3847 }, { "epoch": 0.7691961720096949, "grad_norm": 2.171875, "learning_rate": 8.519476936007814e-06, "loss": 1.0065, "step": 3848 }, { "epoch": 0.769396067064791, "grad_norm": 2.296875, "learning_rate": 8.518728338877814e-06, "loss": 1.0778, "step": 3849 }, { "epoch": 0.769595962119887, "grad_norm": 2.0, "learning_rate": 8.51797958544628e-06, "loss": 0.9625, "step": 3850 }, { "epoch": 0.7697958571749831, "grad_norm": 1.984375, "learning_rate": 8.517230675746464e-06, "loss": 1.035, "step": 3851 }, { "epoch": 0.7699957522300792, "grad_norm": 2.125, "learning_rate": 8.51648160981164e-06, "loss": 1.0866, "step": 3852 }, { "epoch": 0.7701956472851753, "grad_norm": 2.0625, "learning_rate": 8.515732387675077e-06, "loss": 1.1333, "step": 3853 }, { "epoch": 0.7703955423402714, "grad_norm": 2.125, "learning_rate": 8.514983009370057e-06, "loss": 1.0626, "step": 3854 }, { "epoch": 0.7705954373953674, "grad_norm": 2.09375, "learning_rate": 8.514233474929867e-06, "loss": 1.0494, "step": 3855 }, { "epoch": 0.7707953324504635, "grad_norm": 2.15625, "learning_rate": 8.5134837843878e-06, "loss": 1.1193, "step": 3856 }, { "epoch": 0.7709952275055596, "grad_norm": 2.03125, "learning_rate": 8.512733937777162e-06, "loss": 0.9475, "step": 3857 }, { "epoch": 0.7711951225606557, "grad_norm": 2.0, "learning_rate": 8.511983935131256e-06, "loss": 1.052, "step": 3858 }, { "epoch": 0.7713950176157517, "grad_norm": 2.1875, "learning_rate": 8.511233776483398e-06, "loss": 1.0843, "step": 3859 }, { "epoch": 0.7715949126708478, "grad_norm": 2.109375, "learning_rate": 8.510483461866911e-06, "loss": 1.0252, "step": 3860 }, { "epoch": 0.7717948077259439, "grad_norm": 2.15625, "learning_rate": 8.509732991315125e-06, "loss": 1.0261, "step": 3861 }, { "epoch": 0.77199470278104, "grad_norm": 2.1875, "learning_rate": 8.508982364861373e-06, "loss": 1.0702, "step": 3862 }, { "epoch": 0.772194597836136, "grad_norm": 2.046875, "learning_rate": 8.508231582538999e-06, "loss": 1.0123, "step": 3863 }, { "epoch": 0.7723944928912321, "grad_norm": 2.125, "learning_rate": 8.507480644381355e-06, "loss": 1.0134, "step": 3864 }, { "epoch": 0.7725943879463282, "grad_norm": 2.125, "learning_rate": 8.506729550421791e-06, "loss": 1.1005, "step": 3865 }, { "epoch": 0.7727942830014243, "grad_norm": 2.09375, "learning_rate": 8.505978300693678e-06, "loss": 1.0609, "step": 3866 }, { "epoch": 0.7729941780565204, "grad_norm": 2.046875, "learning_rate": 8.505226895230383e-06, "loss": 1.0536, "step": 3867 }, { "epoch": 0.7731940731116164, "grad_norm": 2.015625, "learning_rate": 8.504475334065283e-06, "loss": 1.0487, "step": 3868 }, { "epoch": 0.7733939681667125, "grad_norm": 2.296875, "learning_rate": 8.503723617231764e-06, "loss": 1.0884, "step": 3869 }, { "epoch": 0.7735938632218086, "grad_norm": 2.09375, "learning_rate": 8.502971744763216e-06, "loss": 1.0808, "step": 3870 }, { "epoch": 0.7737937582769047, "grad_norm": 2.15625, "learning_rate": 8.502219716693037e-06, "loss": 1.0959, "step": 3871 }, { "epoch": 0.7739936533320007, "grad_norm": 2.125, "learning_rate": 8.501467533054632e-06, "loss": 1.1245, "step": 3872 }, { "epoch": 0.7741935483870968, "grad_norm": 1.8984375, "learning_rate": 8.500715193881412e-06, "loss": 0.8819, "step": 3873 }, { "epoch": 0.7743934434421929, "grad_norm": 2.09375, "learning_rate": 8.499962699206798e-06, "loss": 0.9738, "step": 3874 }, { "epoch": 0.774593338497289, "grad_norm": 2.109375, "learning_rate": 8.499210049064216e-06, "loss": 1.1011, "step": 3875 }, { "epoch": 0.774793233552385, "grad_norm": 2.03125, "learning_rate": 8.498457243487096e-06, "loss": 1.0569, "step": 3876 }, { "epoch": 0.774993128607481, "grad_norm": 2.15625, "learning_rate": 8.497704282508879e-06, "loss": 1.0651, "step": 3877 }, { "epoch": 0.7751930236625771, "grad_norm": 2.015625, "learning_rate": 8.496951166163012e-06, "loss": 1.0435, "step": 3878 }, { "epoch": 0.7753929187176732, "grad_norm": 2.109375, "learning_rate": 8.496197894482947e-06, "loss": 0.9878, "step": 3879 }, { "epoch": 0.7755928137727693, "grad_norm": 2.109375, "learning_rate": 8.495444467502144e-06, "loss": 1.1107, "step": 3880 }, { "epoch": 0.7757927088278653, "grad_norm": 2.0625, "learning_rate": 8.494690885254073e-06, "loss": 1.0232, "step": 3881 }, { "epoch": 0.7759926038829614, "grad_norm": 2.109375, "learning_rate": 8.493937147772204e-06, "loss": 1.058, "step": 3882 }, { "epoch": 0.7761924989380575, "grad_norm": 2.203125, "learning_rate": 8.493183255090022e-06, "loss": 1.043, "step": 3883 }, { "epoch": 0.7763923939931536, "grad_norm": 1.9765625, "learning_rate": 8.492429207241013e-06, "loss": 0.9438, "step": 3884 }, { "epoch": 0.7765922890482496, "grad_norm": 2.03125, "learning_rate": 8.49167500425867e-06, "loss": 1.0213, "step": 3885 }, { "epoch": 0.7767921841033457, "grad_norm": 2.046875, "learning_rate": 8.490920646176497e-06, "loss": 1.0034, "step": 3886 }, { "epoch": 0.7769920791584418, "grad_norm": 2.125, "learning_rate": 8.490166133028002e-06, "loss": 1.0945, "step": 3887 }, { "epoch": 0.7771919742135379, "grad_norm": 2.203125, "learning_rate": 8.4894114648467e-06, "loss": 0.9811, "step": 3888 }, { "epoch": 0.777391869268634, "grad_norm": 2.109375, "learning_rate": 8.488656641666113e-06, "loss": 1.1282, "step": 3889 }, { "epoch": 0.77759176432373, "grad_norm": 2.0625, "learning_rate": 8.487901663519771e-06, "loss": 0.9224, "step": 3890 }, { "epoch": 0.7777916593788261, "grad_norm": 2.078125, "learning_rate": 8.48714653044121e-06, "loss": 1.0431, "step": 3891 }, { "epoch": 0.7779915544339222, "grad_norm": 2.015625, "learning_rate": 8.486391242463972e-06, "loss": 1.0122, "step": 3892 }, { "epoch": 0.7781914494890183, "grad_norm": 2.21875, "learning_rate": 8.485635799621607e-06, "loss": 1.112, "step": 3893 }, { "epoch": 0.7783913445441143, "grad_norm": 2.171875, "learning_rate": 8.484880201947674e-06, "loss": 0.95, "step": 3894 }, { "epoch": 0.7785912395992104, "grad_norm": 2.21875, "learning_rate": 8.484124449475733e-06, "loss": 1.0991, "step": 3895 }, { "epoch": 0.7787911346543065, "grad_norm": 2.078125, "learning_rate": 8.483368542239356e-06, "loss": 1.0603, "step": 3896 }, { "epoch": 0.7789910297094026, "grad_norm": 2.09375, "learning_rate": 8.482612480272121e-06, "loss": 1.0628, "step": 3897 }, { "epoch": 0.7791909247644987, "grad_norm": 2.03125, "learning_rate": 8.481856263607611e-06, "loss": 1.004, "step": 3898 }, { "epoch": 0.7793908198195947, "grad_norm": 2.25, "learning_rate": 8.481099892279418e-06, "loss": 1.0244, "step": 3899 }, { "epoch": 0.7795907148746908, "grad_norm": 2.109375, "learning_rate": 8.480343366321138e-06, "loss": 1.0226, "step": 3900 }, { "epoch": 0.7797906099297869, "grad_norm": 2.140625, "learning_rate": 8.47958668576638e-06, "loss": 0.9943, "step": 3901 }, { "epoch": 0.779990504984883, "grad_norm": 2.203125, "learning_rate": 8.478829850648752e-06, "loss": 1.0683, "step": 3902 }, { "epoch": 0.780190400039979, "grad_norm": 2.125, "learning_rate": 8.478072861001872e-06, "loss": 1.0354, "step": 3903 }, { "epoch": 0.7803902950950751, "grad_norm": 2.15625, "learning_rate": 8.477315716859369e-06, "loss": 1.1009, "step": 3904 }, { "epoch": 0.7805901901501712, "grad_norm": 2.140625, "learning_rate": 8.476558418254872e-06, "loss": 1.1659, "step": 3905 }, { "epoch": 0.7807900852052673, "grad_norm": 2.109375, "learning_rate": 8.475800965222024e-06, "loss": 1.0932, "step": 3906 }, { "epoch": 0.7809899802603633, "grad_norm": 2.0, "learning_rate": 8.475043357794466e-06, "loss": 0.9968, "step": 3907 }, { "epoch": 0.7811898753154594, "grad_norm": 2.015625, "learning_rate": 8.474285596005853e-06, "loss": 1.0099, "step": 3908 }, { "epoch": 0.7813897703705555, "grad_norm": 2.25, "learning_rate": 8.473527679889847e-06, "loss": 1.0626, "step": 3909 }, { "epoch": 0.7815896654256516, "grad_norm": 2.109375, "learning_rate": 8.472769609480109e-06, "loss": 1.1036, "step": 3910 }, { "epoch": 0.7817895604807477, "grad_norm": 2.046875, "learning_rate": 8.472011384810317e-06, "loss": 1.0819, "step": 3911 }, { "epoch": 0.7819894555358436, "grad_norm": 2.09375, "learning_rate": 8.471253005914153e-06, "loss": 1.0333, "step": 3912 }, { "epoch": 0.7821893505909397, "grad_norm": 2.046875, "learning_rate": 8.470494472825299e-06, "loss": 1.0854, "step": 3913 }, { "epoch": 0.7823892456460358, "grad_norm": 2.015625, "learning_rate": 8.469735785577451e-06, "loss": 0.9313, "step": 3914 }, { "epoch": 0.782589140701132, "grad_norm": 2.09375, "learning_rate": 8.46897694420431e-06, "loss": 1.0547, "step": 3915 }, { "epoch": 0.7827890357562279, "grad_norm": 2.046875, "learning_rate": 8.468217948739583e-06, "loss": 1.0774, "step": 3916 }, { "epoch": 0.782988930811324, "grad_norm": 1.921875, "learning_rate": 8.467458799216987e-06, "loss": 1.0402, "step": 3917 }, { "epoch": 0.7831888258664201, "grad_norm": 2.109375, "learning_rate": 8.46669949567024e-06, "loss": 0.9801, "step": 3918 }, { "epoch": 0.7833887209215162, "grad_norm": 2.171875, "learning_rate": 8.465940038133072e-06, "loss": 1.0642, "step": 3919 }, { "epoch": 0.7835886159766122, "grad_norm": 2.0625, "learning_rate": 8.465180426639218e-06, "loss": 1.0513, "step": 3920 }, { "epoch": 0.7837885110317083, "grad_norm": 2.1875, "learning_rate": 8.464420661222419e-06, "loss": 1.0154, "step": 3921 }, { "epoch": 0.7839884060868044, "grad_norm": 2.171875, "learning_rate": 8.463660741916425e-06, "loss": 1.0165, "step": 3922 }, { "epoch": 0.7841883011419005, "grad_norm": 2.109375, "learning_rate": 8.46290066875499e-06, "loss": 1.0002, "step": 3923 }, { "epoch": 0.7843881961969966, "grad_norm": 2.078125, "learning_rate": 8.462140441771878e-06, "loss": 1.0972, "step": 3924 }, { "epoch": 0.7845880912520926, "grad_norm": 2.078125, "learning_rate": 8.461380061000857e-06, "loss": 1.0545, "step": 3925 }, { "epoch": 0.7847879863071887, "grad_norm": 2.0625, "learning_rate": 8.460619526475704e-06, "loss": 1.0847, "step": 3926 }, { "epoch": 0.7849878813622848, "grad_norm": 2.078125, "learning_rate": 8.4598588382302e-06, "loss": 0.9939, "step": 3927 }, { "epoch": 0.7851877764173809, "grad_norm": 2.28125, "learning_rate": 8.459097996298137e-06, "loss": 1.1484, "step": 3928 }, { "epoch": 0.7853876714724769, "grad_norm": 2.0625, "learning_rate": 8.45833700071331e-06, "loss": 1.029, "step": 3929 }, { "epoch": 0.785587566527573, "grad_norm": 2.0625, "learning_rate": 8.457575851509523e-06, "loss": 0.9141, "step": 3930 }, { "epoch": 0.7857874615826691, "grad_norm": 2.125, "learning_rate": 8.456814548720584e-06, "loss": 1.0615, "step": 3931 }, { "epoch": 0.7859873566377652, "grad_norm": 1.9765625, "learning_rate": 8.456053092380313e-06, "loss": 0.8921, "step": 3932 }, { "epoch": 0.7861872516928613, "grad_norm": 2.171875, "learning_rate": 8.455291482522536e-06, "loss": 1.1657, "step": 3933 }, { "epoch": 0.7863871467479573, "grad_norm": 2.015625, "learning_rate": 8.454529719181077e-06, "loss": 1.0102, "step": 3934 }, { "epoch": 0.7865870418030534, "grad_norm": 2.09375, "learning_rate": 8.453767802389776e-06, "loss": 1.0866, "step": 3935 }, { "epoch": 0.7867869368581495, "grad_norm": 2.140625, "learning_rate": 8.453005732182482e-06, "loss": 1.0282, "step": 3936 }, { "epoch": 0.7869868319132456, "grad_norm": 2.03125, "learning_rate": 8.452243508593038e-06, "loss": 1.1174, "step": 3937 }, { "epoch": 0.7871867269683416, "grad_norm": 2.109375, "learning_rate": 8.451481131655308e-06, "loss": 0.9955, "step": 3938 }, { "epoch": 0.7873866220234377, "grad_norm": 2.046875, "learning_rate": 8.450718601403155e-06, "loss": 0.9518, "step": 3939 }, { "epoch": 0.7875865170785338, "grad_norm": 2.078125, "learning_rate": 8.449955917870449e-06, "loss": 1.1177, "step": 3940 }, { "epoch": 0.7877864121336299, "grad_norm": 2.125, "learning_rate": 8.449193081091071e-06, "loss": 1.1051, "step": 3941 }, { "epoch": 0.7879863071887259, "grad_norm": 2.015625, "learning_rate": 8.448430091098904e-06, "loss": 1.0499, "step": 3942 }, { "epoch": 0.788186202243822, "grad_norm": 2.03125, "learning_rate": 8.447666947927842e-06, "loss": 1.0631, "step": 3943 }, { "epoch": 0.7883860972989181, "grad_norm": 2.140625, "learning_rate": 8.446903651611782e-06, "loss": 1.0635, "step": 3944 }, { "epoch": 0.7885859923540142, "grad_norm": 2.09375, "learning_rate": 8.446140202184628e-06, "loss": 0.8986, "step": 3945 }, { "epoch": 0.7887858874091103, "grad_norm": 2.0625, "learning_rate": 8.445376599680295e-06, "loss": 1.0746, "step": 3946 }, { "epoch": 0.7889857824642063, "grad_norm": 1.96875, "learning_rate": 8.444612844132703e-06, "loss": 0.9498, "step": 3947 }, { "epoch": 0.7891856775193024, "grad_norm": 2.125, "learning_rate": 8.443848935575776e-06, "loss": 1.0482, "step": 3948 }, { "epoch": 0.7893855725743985, "grad_norm": 2.109375, "learning_rate": 8.443084874043446e-06, "loss": 1.0781, "step": 3949 }, { "epoch": 0.7895854676294946, "grad_norm": 2.234375, "learning_rate": 8.442320659569654e-06, "loss": 1.1076, "step": 3950 }, { "epoch": 0.7897853626845905, "grad_norm": 2.0, "learning_rate": 8.441556292188347e-06, "loss": 0.984, "step": 3951 }, { "epoch": 0.7899852577396866, "grad_norm": 2.125, "learning_rate": 8.440791771933474e-06, "loss": 1.0832, "step": 3952 }, { "epoch": 0.7901851527947827, "grad_norm": 2.09375, "learning_rate": 8.440027098839002e-06, "loss": 0.9624, "step": 3953 }, { "epoch": 0.7903850478498788, "grad_norm": 2.078125, "learning_rate": 8.439262272938891e-06, "loss": 1.1171, "step": 3954 }, { "epoch": 0.7905849429049749, "grad_norm": 2.296875, "learning_rate": 8.438497294267117e-06, "loss": 1.1415, "step": 3955 }, { "epoch": 0.7907848379600709, "grad_norm": 2.09375, "learning_rate": 8.43773216285766e-06, "loss": 1.0359, "step": 3956 }, { "epoch": 0.790984733015167, "grad_norm": 2.453125, "learning_rate": 8.43696687874451e-06, "loss": 1.0812, "step": 3957 }, { "epoch": 0.7911846280702631, "grad_norm": 2.015625, "learning_rate": 8.436201441961658e-06, "loss": 1.0503, "step": 3958 }, { "epoch": 0.7913845231253592, "grad_norm": 2.21875, "learning_rate": 8.435435852543103e-06, "loss": 1.1193, "step": 3959 }, { "epoch": 0.7915844181804552, "grad_norm": 1.9296875, "learning_rate": 8.434670110522855e-06, "loss": 0.9925, "step": 3960 }, { "epoch": 0.7917843132355513, "grad_norm": 2.015625, "learning_rate": 8.433904215934929e-06, "loss": 1.1037, "step": 3961 }, { "epoch": 0.7919842082906474, "grad_norm": 2.0625, "learning_rate": 8.433138168813344e-06, "loss": 0.9896, "step": 3962 }, { "epoch": 0.7921841033457435, "grad_norm": 2.0625, "learning_rate": 8.432371969192127e-06, "loss": 1.1006, "step": 3963 }, { "epoch": 0.7923839984008395, "grad_norm": 2.0, "learning_rate": 8.431605617105314e-06, "loss": 1.0182, "step": 3964 }, { "epoch": 0.7925838934559356, "grad_norm": 2.125, "learning_rate": 8.430839112586947e-06, "loss": 1.0864, "step": 3965 }, { "epoch": 0.7927837885110317, "grad_norm": 2.015625, "learning_rate": 8.430072455671072e-06, "loss": 1.0649, "step": 3966 }, { "epoch": 0.7929836835661278, "grad_norm": 2.140625, "learning_rate": 8.429305646391746e-06, "loss": 1.0821, "step": 3967 }, { "epoch": 0.7931835786212239, "grad_norm": 2.0625, "learning_rate": 8.428538684783027e-06, "loss": 1.0131, "step": 3968 }, { "epoch": 0.7933834736763199, "grad_norm": 2.078125, "learning_rate": 8.427771570878988e-06, "loss": 1.1314, "step": 3969 }, { "epoch": 0.793583368731416, "grad_norm": 2.25, "learning_rate": 8.427004304713701e-06, "loss": 1.1188, "step": 3970 }, { "epoch": 0.7937832637865121, "grad_norm": 2.015625, "learning_rate": 8.42623688632125e-06, "loss": 0.9961, "step": 3971 }, { "epoch": 0.7939831588416082, "grad_norm": 2.140625, "learning_rate": 8.425469315735722e-06, "loss": 1.0934, "step": 3972 }, { "epoch": 0.7941830538967042, "grad_norm": 2.015625, "learning_rate": 8.424701592991212e-06, "loss": 0.9757, "step": 3973 }, { "epoch": 0.7943829489518003, "grad_norm": 2.125, "learning_rate": 8.423933718121823e-06, "loss": 1.0581, "step": 3974 }, { "epoch": 0.7945828440068964, "grad_norm": 2.015625, "learning_rate": 8.423165691161665e-06, "loss": 1.033, "step": 3975 }, { "epoch": 0.7947827390619925, "grad_norm": 2.125, "learning_rate": 8.422397512144854e-06, "loss": 1.0961, "step": 3976 }, { "epoch": 0.7949826341170886, "grad_norm": 2.25, "learning_rate": 8.421629181105509e-06, "loss": 1.08, "step": 3977 }, { "epoch": 0.7951825291721846, "grad_norm": 2.109375, "learning_rate": 8.42086069807776e-06, "loss": 1.0658, "step": 3978 }, { "epoch": 0.7953824242272807, "grad_norm": 1.9921875, "learning_rate": 8.420092063095746e-06, "loss": 0.9352, "step": 3979 }, { "epoch": 0.7955823192823768, "grad_norm": 2.171875, "learning_rate": 8.419323276193607e-06, "loss": 1.0642, "step": 3980 }, { "epoch": 0.7957822143374729, "grad_norm": 2.109375, "learning_rate": 8.418554337405493e-06, "loss": 1.0289, "step": 3981 }, { "epoch": 0.7959821093925689, "grad_norm": 2.28125, "learning_rate": 8.417785246765561e-06, "loss": 1.1825, "step": 3982 }, { "epoch": 0.796182004447665, "grad_norm": 1.9765625, "learning_rate": 8.417016004307974e-06, "loss": 0.9411, "step": 3983 }, { "epoch": 0.7963818995027611, "grad_norm": 2.078125, "learning_rate": 8.4162466100669e-06, "loss": 1.015, "step": 3984 }, { "epoch": 0.7965817945578572, "grad_norm": 1.9765625, "learning_rate": 8.415477064076518e-06, "loss": 1.0249, "step": 3985 }, { "epoch": 0.7967816896129531, "grad_norm": 2.1875, "learning_rate": 8.414707366371006e-06, "loss": 1.1011, "step": 3986 }, { "epoch": 0.7969815846680492, "grad_norm": 2.171875, "learning_rate": 8.41393751698456e-06, "loss": 1.15, "step": 3987 }, { "epoch": 0.7971814797231453, "grad_norm": 2.390625, "learning_rate": 8.413167515951374e-06, "loss": 1.0764, "step": 3988 }, { "epoch": 0.7973813747782414, "grad_norm": 2.21875, "learning_rate": 8.412397363305653e-06, "loss": 1.0558, "step": 3989 }, { "epoch": 0.7975812698333375, "grad_norm": 2.140625, "learning_rate": 8.411627059081603e-06, "loss": 1.0267, "step": 3990 }, { "epoch": 0.7977811648884335, "grad_norm": 2.09375, "learning_rate": 8.410856603313446e-06, "loss": 1.0978, "step": 3991 }, { "epoch": 0.7979810599435296, "grad_norm": 2.03125, "learning_rate": 8.410085996035402e-06, "loss": 1.0463, "step": 3992 }, { "epoch": 0.7981809549986257, "grad_norm": 2.015625, "learning_rate": 8.409315237281701e-06, "loss": 1.0431, "step": 3993 }, { "epoch": 0.7983808500537218, "grad_norm": 2.265625, "learning_rate": 8.408544327086583e-06, "loss": 1.2017, "step": 3994 }, { "epoch": 0.7985807451088178, "grad_norm": 2.125, "learning_rate": 8.407773265484289e-06, "loss": 1.1155, "step": 3995 }, { "epoch": 0.7987806401639139, "grad_norm": 1.953125, "learning_rate": 8.40700205250907e-06, "loss": 1.0254, "step": 3996 }, { "epoch": 0.79898053521901, "grad_norm": 2.046875, "learning_rate": 8.406230688195184e-06, "loss": 0.9988, "step": 3997 }, { "epoch": 0.7991804302741061, "grad_norm": 1.9453125, "learning_rate": 8.405459172576895e-06, "loss": 0.9203, "step": 3998 }, { "epoch": 0.7993803253292022, "grad_norm": 2.03125, "learning_rate": 8.404687505688474e-06, "loss": 0.9984, "step": 3999 }, { "epoch": 0.7995802203842982, "grad_norm": 2.234375, "learning_rate": 8.403915687564198e-06, "loss": 0.9678, "step": 4000 }, { "epoch": 0.7997801154393943, "grad_norm": 2.0, "learning_rate": 8.403143718238347e-06, "loss": 0.986, "step": 4001 }, { "epoch": 0.7999800104944904, "grad_norm": 1.953125, "learning_rate": 8.402371597745218e-06, "loss": 1.0025, "step": 4002 }, { "epoch": 0.8001799055495865, "grad_norm": 2.078125, "learning_rate": 8.401599326119108e-06, "loss": 1.0755, "step": 4003 }, { "epoch": 0.8003798006046825, "grad_norm": 1.9140625, "learning_rate": 8.400826903394317e-06, "loss": 0.8787, "step": 4004 }, { "epoch": 0.8005796956597786, "grad_norm": 2.1875, "learning_rate": 8.400054329605159e-06, "loss": 0.9739, "step": 4005 }, { "epoch": 0.8007795907148747, "grad_norm": 2.078125, "learning_rate": 8.39928160478595e-06, "loss": 0.9824, "step": 4006 }, { "epoch": 0.8009794857699708, "grad_norm": 2.1875, "learning_rate": 8.398508728971016e-06, "loss": 1.004, "step": 4007 }, { "epoch": 0.8011793808250668, "grad_norm": 2.03125, "learning_rate": 8.397735702194686e-06, "loss": 0.9618, "step": 4008 }, { "epoch": 0.8013792758801629, "grad_norm": 2.09375, "learning_rate": 8.396962524491299e-06, "loss": 1.1228, "step": 4009 }, { "epoch": 0.801579170935259, "grad_norm": 2.171875, "learning_rate": 8.3961891958952e-06, "loss": 0.9377, "step": 4010 }, { "epoch": 0.8017790659903551, "grad_norm": 2.1875, "learning_rate": 8.39541571644074e-06, "loss": 1.0748, "step": 4011 }, { "epoch": 0.8019789610454512, "grad_norm": 2.09375, "learning_rate": 8.394642086162278e-06, "loss": 1.0429, "step": 4012 }, { "epoch": 0.8021788561005472, "grad_norm": 2.046875, "learning_rate": 8.393868305094173e-06, "loss": 1.099, "step": 4013 }, { "epoch": 0.8023787511556433, "grad_norm": 1.9765625, "learning_rate": 8.393094373270804e-06, "loss": 1.0019, "step": 4014 }, { "epoch": 0.8025786462107394, "grad_norm": 2.0625, "learning_rate": 8.392320290726543e-06, "loss": 1.059, "step": 4015 }, { "epoch": 0.8027785412658355, "grad_norm": 2.078125, "learning_rate": 8.391546057495778e-06, "loss": 1.0901, "step": 4016 }, { "epoch": 0.8029784363209315, "grad_norm": 2.09375, "learning_rate": 8.3907716736129e-06, "loss": 1.0134, "step": 4017 }, { "epoch": 0.8031783313760276, "grad_norm": 2.0625, "learning_rate": 8.389997139112306e-06, "loss": 0.9359, "step": 4018 }, { "epoch": 0.8033782264311237, "grad_norm": 2.046875, "learning_rate": 8.3892224540284e-06, "loss": 1.0603, "step": 4019 }, { "epoch": 0.8035781214862198, "grad_norm": 1.9453125, "learning_rate": 8.388447618395598e-06, "loss": 0.9355, "step": 4020 }, { "epoch": 0.8037780165413158, "grad_norm": 2.140625, "learning_rate": 8.387672632248312e-06, "loss": 1.1244, "step": 4021 }, { "epoch": 0.8039779115964119, "grad_norm": 1.9609375, "learning_rate": 8.38689749562097e-06, "loss": 1.0161, "step": 4022 }, { "epoch": 0.804177806651508, "grad_norm": 2.078125, "learning_rate": 8.386122208548002e-06, "loss": 0.9934, "step": 4023 }, { "epoch": 0.804377701706604, "grad_norm": 2.015625, "learning_rate": 8.385346771063848e-06, "loss": 1.0326, "step": 4024 }, { "epoch": 0.8045775967617002, "grad_norm": 1.984375, "learning_rate": 8.384571183202952e-06, "loss": 1.059, "step": 4025 }, { "epoch": 0.8047774918167961, "grad_norm": 2.078125, "learning_rate": 8.383795444999766e-06, "loss": 1.0174, "step": 4026 }, { "epoch": 0.8049773868718922, "grad_norm": 2.0, "learning_rate": 8.383019556488747e-06, "loss": 1.0145, "step": 4027 }, { "epoch": 0.8051772819269883, "grad_norm": 2.015625, "learning_rate": 8.38224351770436e-06, "loss": 1.0756, "step": 4028 }, { "epoch": 0.8053771769820844, "grad_norm": 2.109375, "learning_rate": 8.381467328681078e-06, "loss": 1.0536, "step": 4029 }, { "epoch": 0.8055770720371804, "grad_norm": 2.046875, "learning_rate": 8.380690989453379e-06, "loss": 1.0156, "step": 4030 }, { "epoch": 0.8057769670922765, "grad_norm": 2.015625, "learning_rate": 8.379914500055745e-06, "loss": 1.0135, "step": 4031 }, { "epoch": 0.8059768621473726, "grad_norm": 2.0625, "learning_rate": 8.379137860522672e-06, "loss": 1.0025, "step": 4032 }, { "epoch": 0.8061767572024687, "grad_norm": 2.015625, "learning_rate": 8.378361070888656e-06, "loss": 1.1067, "step": 4033 }, { "epoch": 0.8063766522575648, "grad_norm": 1.9375, "learning_rate": 8.3775841311882e-06, "loss": 1.0107, "step": 4034 }, { "epoch": 0.8065765473126608, "grad_norm": 1.9921875, "learning_rate": 8.376807041455822e-06, "loss": 0.9656, "step": 4035 }, { "epoch": 0.8067764423677569, "grad_norm": 2.046875, "learning_rate": 8.376029801726033e-06, "loss": 1.0475, "step": 4036 }, { "epoch": 0.806976337422853, "grad_norm": 2.0625, "learning_rate": 8.375252412033361e-06, "loss": 1.0904, "step": 4037 }, { "epoch": 0.8071762324779491, "grad_norm": 2.046875, "learning_rate": 8.374474872412338e-06, "loss": 1.0797, "step": 4038 }, { "epoch": 0.8073761275330451, "grad_norm": 2.0, "learning_rate": 8.3736971828975e-06, "loss": 1.0428, "step": 4039 }, { "epoch": 0.8075760225881412, "grad_norm": 2.171875, "learning_rate": 8.372919343523395e-06, "loss": 0.9768, "step": 4040 }, { "epoch": 0.8077759176432373, "grad_norm": 2.015625, "learning_rate": 8.372141354324573e-06, "loss": 1.057, "step": 4041 }, { "epoch": 0.8079758126983334, "grad_norm": 2.125, "learning_rate": 8.37136321533559e-06, "loss": 0.9486, "step": 4042 }, { "epoch": 0.8081757077534294, "grad_norm": 2.015625, "learning_rate": 8.370584926591015e-06, "loss": 1.0627, "step": 4043 }, { "epoch": 0.8083756028085255, "grad_norm": 2.265625, "learning_rate": 8.369806488125418e-06, "loss": 1.0046, "step": 4044 }, { "epoch": 0.8085754978636216, "grad_norm": 2.046875, "learning_rate": 8.369027899973377e-06, "loss": 0.9561, "step": 4045 }, { "epoch": 0.8087753929187177, "grad_norm": 2.125, "learning_rate": 8.368249162169474e-06, "loss": 1.1483, "step": 4046 }, { "epoch": 0.8089752879738138, "grad_norm": 2.296875, "learning_rate": 8.367470274748303e-06, "loss": 1.0364, "step": 4047 }, { "epoch": 0.8091751830289098, "grad_norm": 2.015625, "learning_rate": 8.366691237744465e-06, "loss": 1.0638, "step": 4048 }, { "epoch": 0.8093750780840059, "grad_norm": 1.9921875, "learning_rate": 8.365912051192559e-06, "loss": 1.0725, "step": 4049 }, { "epoch": 0.809574973139102, "grad_norm": 2.03125, "learning_rate": 8.365132715127201e-06, "loss": 1.0622, "step": 4050 }, { "epoch": 0.8097748681941981, "grad_norm": 2.1875, "learning_rate": 8.364353229583007e-06, "loss": 1.115, "step": 4051 }, { "epoch": 0.8099747632492941, "grad_norm": 1.953125, "learning_rate": 8.363573594594603e-06, "loss": 0.9759, "step": 4052 }, { "epoch": 0.8101746583043902, "grad_norm": 2.015625, "learning_rate": 8.362793810196616e-06, "loss": 0.9862, "step": 4053 }, { "epoch": 0.8103745533594863, "grad_norm": 2.03125, "learning_rate": 8.362013876423689e-06, "loss": 1.0371, "step": 4054 }, { "epoch": 0.8105744484145824, "grad_norm": 2.140625, "learning_rate": 8.361233793310466e-06, "loss": 1.0631, "step": 4055 }, { "epoch": 0.8107743434696785, "grad_norm": 2.125, "learning_rate": 8.360453560891594e-06, "loss": 1.0111, "step": 4056 }, { "epoch": 0.8109742385247745, "grad_norm": 2.0625, "learning_rate": 8.359673179201734e-06, "loss": 1.0972, "step": 4057 }, { "epoch": 0.8111741335798706, "grad_norm": 2.0625, "learning_rate": 8.358892648275554e-06, "loss": 0.9885, "step": 4058 }, { "epoch": 0.8113740286349667, "grad_norm": 2.0625, "learning_rate": 8.358111968147717e-06, "loss": 0.994, "step": 4059 }, { "epoch": 0.8115739236900628, "grad_norm": 1.984375, "learning_rate": 8.357331138852907e-06, "loss": 0.9565, "step": 4060 }, { "epoch": 0.8117738187451587, "grad_norm": 2.078125, "learning_rate": 8.356550160425806e-06, "loss": 1.1143, "step": 4061 }, { "epoch": 0.8119737138002548, "grad_norm": 2.1875, "learning_rate": 8.355769032901105e-06, "loss": 1.0377, "step": 4062 }, { "epoch": 0.812173608855351, "grad_norm": 1.9453125, "learning_rate": 8.354987756313501e-06, "loss": 0.9248, "step": 4063 }, { "epoch": 0.812373503910447, "grad_norm": 2.171875, "learning_rate": 8.354206330697702e-06, "loss": 0.9865, "step": 4064 }, { "epoch": 0.812573398965543, "grad_norm": 1.9921875, "learning_rate": 8.353424756088415e-06, "loss": 0.9604, "step": 4065 }, { "epoch": 0.8127732940206391, "grad_norm": 2.171875, "learning_rate": 8.352643032520357e-06, "loss": 1.1125, "step": 4066 }, { "epoch": 0.8129731890757352, "grad_norm": 2.03125, "learning_rate": 8.351861160028256e-06, "loss": 1.1097, "step": 4067 }, { "epoch": 0.8131730841308313, "grad_norm": 2.0625, "learning_rate": 8.351079138646838e-06, "loss": 1.0526, "step": 4068 }, { "epoch": 0.8133729791859274, "grad_norm": 2.125, "learning_rate": 8.350296968410845e-06, "loss": 1.0931, "step": 4069 }, { "epoch": 0.8135728742410234, "grad_norm": 2.09375, "learning_rate": 8.349514649355016e-06, "loss": 1.0257, "step": 4070 }, { "epoch": 0.8137727692961195, "grad_norm": 2.09375, "learning_rate": 8.348732181514105e-06, "loss": 1.0811, "step": 4071 }, { "epoch": 0.8139726643512156, "grad_norm": 2.09375, "learning_rate": 8.347949564922869e-06, "loss": 1.1355, "step": 4072 }, { "epoch": 0.8141725594063117, "grad_norm": 2.015625, "learning_rate": 8.347166799616069e-06, "loss": 1.0506, "step": 4073 }, { "epoch": 0.8143724544614077, "grad_norm": 2.015625, "learning_rate": 8.346383885628478e-06, "loss": 1.0503, "step": 4074 }, { "epoch": 0.8145723495165038, "grad_norm": 2.078125, "learning_rate": 8.345600822994872e-06, "loss": 1.0772, "step": 4075 }, { "epoch": 0.8147722445715999, "grad_norm": 2.03125, "learning_rate": 8.344817611750036e-06, "loss": 1.0449, "step": 4076 }, { "epoch": 0.814972139626696, "grad_norm": 1.9921875, "learning_rate": 8.344034251928759e-06, "loss": 0.9987, "step": 4077 }, { "epoch": 0.8151720346817921, "grad_norm": 2.203125, "learning_rate": 8.343250743565837e-06, "loss": 1.0547, "step": 4078 }, { "epoch": 0.8153719297368881, "grad_norm": 2.125, "learning_rate": 8.342467086696073e-06, "loss": 1.1069, "step": 4079 }, { "epoch": 0.8155718247919842, "grad_norm": 2.109375, "learning_rate": 8.341683281354277e-06, "loss": 1.0543, "step": 4080 }, { "epoch": 0.8157717198470803, "grad_norm": 2.1875, "learning_rate": 8.34089932757527e-06, "loss": 1.0877, "step": 4081 }, { "epoch": 0.8159716149021764, "grad_norm": 2.109375, "learning_rate": 8.34011522539387e-06, "loss": 1.1096, "step": 4082 }, { "epoch": 0.8161715099572724, "grad_norm": 2.09375, "learning_rate": 8.339330974844908e-06, "loss": 1.0155, "step": 4083 }, { "epoch": 0.8163714050123685, "grad_norm": 2.171875, "learning_rate": 8.338546575963223e-06, "loss": 1.0979, "step": 4084 }, { "epoch": 0.8165713000674646, "grad_norm": 2.046875, "learning_rate": 8.337762028783653e-06, "loss": 1.0712, "step": 4085 }, { "epoch": 0.8167711951225607, "grad_norm": 2.015625, "learning_rate": 8.336977333341052e-06, "loss": 1.034, "step": 4086 }, { "epoch": 0.8169710901776567, "grad_norm": 1.984375, "learning_rate": 8.336192489670273e-06, "loss": 1.0542, "step": 4087 }, { "epoch": 0.8171709852327528, "grad_norm": 2.046875, "learning_rate": 8.33540749780618e-06, "loss": 0.9744, "step": 4088 }, { "epoch": 0.8173708802878489, "grad_norm": 2.0625, "learning_rate": 8.334622357783642e-06, "loss": 1.0116, "step": 4089 }, { "epoch": 0.817570775342945, "grad_norm": 2.1875, "learning_rate": 8.333837069637536e-06, "loss": 1.1014, "step": 4090 }, { "epoch": 0.8177706703980411, "grad_norm": 1.9453125, "learning_rate": 8.333051633402743e-06, "loss": 0.9645, "step": 4091 }, { "epoch": 0.8179705654531371, "grad_norm": 1.984375, "learning_rate": 8.332266049114152e-06, "loss": 0.9681, "step": 4092 }, { "epoch": 0.8181704605082332, "grad_norm": 2.078125, "learning_rate": 8.33148031680666e-06, "loss": 1.0724, "step": 4093 }, { "epoch": 0.8183703555633293, "grad_norm": 2.078125, "learning_rate": 8.330694436515168e-06, "loss": 1.0307, "step": 4094 }, { "epoch": 0.8185702506184254, "grad_norm": 2.265625, "learning_rate": 8.329908408274583e-06, "loss": 1.1436, "step": 4095 }, { "epoch": 0.8187701456735214, "grad_norm": 2.046875, "learning_rate": 8.329122232119824e-06, "loss": 1.0197, "step": 4096 }, { "epoch": 0.8189700407286175, "grad_norm": 1.921875, "learning_rate": 8.32833590808581e-06, "loss": 0.9844, "step": 4097 }, { "epoch": 0.8191699357837136, "grad_norm": 2.0, "learning_rate": 8.327549436207472e-06, "loss": 0.9957, "step": 4098 }, { "epoch": 0.8193698308388097, "grad_norm": 1.9921875, "learning_rate": 8.326762816519743e-06, "loss": 0.9574, "step": 4099 }, { "epoch": 0.8195697258939058, "grad_norm": 2.0, "learning_rate": 8.325976049057565e-06, "loss": 0.9851, "step": 4100 }, { "epoch": 0.8197696209490017, "grad_norm": 2.359375, "learning_rate": 8.325189133855884e-06, "loss": 1.1136, "step": 4101 }, { "epoch": 0.8199695160040978, "grad_norm": 1.9921875, "learning_rate": 8.324402070949658e-06, "loss": 1.0151, "step": 4102 }, { "epoch": 0.8201694110591939, "grad_norm": 2.03125, "learning_rate": 8.323614860373848e-06, "loss": 1.0295, "step": 4103 }, { "epoch": 0.82036930611429, "grad_norm": 1.984375, "learning_rate": 8.322827502163422e-06, "loss": 1.0286, "step": 4104 }, { "epoch": 0.820569201169386, "grad_norm": 2.1875, "learning_rate": 8.32203999635335e-06, "loss": 1.1238, "step": 4105 }, { "epoch": 0.8207690962244821, "grad_norm": 2.078125, "learning_rate": 8.321252342978617e-06, "loss": 1.1182, "step": 4106 }, { "epoch": 0.8209689912795782, "grad_norm": 2.109375, "learning_rate": 8.32046454207421e-06, "loss": 1.0793, "step": 4107 }, { "epoch": 0.8211688863346743, "grad_norm": 2.0, "learning_rate": 8.319676593675124e-06, "loss": 0.9489, "step": 4108 }, { "epoch": 0.8213687813897703, "grad_norm": 1.9609375, "learning_rate": 8.318888497816357e-06, "loss": 0.9854, "step": 4109 }, { "epoch": 0.8215686764448664, "grad_norm": 2.0, "learning_rate": 8.318100254532917e-06, "loss": 0.8775, "step": 4110 }, { "epoch": 0.8217685714999625, "grad_norm": 2.0625, "learning_rate": 8.31731186385982e-06, "loss": 1.0759, "step": 4111 }, { "epoch": 0.8219684665550586, "grad_norm": 2.140625, "learning_rate": 8.316523325832083e-06, "loss": 0.9916, "step": 4112 }, { "epoch": 0.8221683616101547, "grad_norm": 2.0625, "learning_rate": 8.315734640484734e-06, "loss": 1.062, "step": 4113 }, { "epoch": 0.8223682566652507, "grad_norm": 2.109375, "learning_rate": 8.314945807852808e-06, "loss": 1.061, "step": 4114 }, { "epoch": 0.8225681517203468, "grad_norm": 2.078125, "learning_rate": 8.31415682797134e-06, "loss": 1.0866, "step": 4115 }, { "epoch": 0.8227680467754429, "grad_norm": 2.21875, "learning_rate": 8.313367700875381e-06, "loss": 1.17, "step": 4116 }, { "epoch": 0.822967941830539, "grad_norm": 1.984375, "learning_rate": 8.312578426599984e-06, "loss": 0.9527, "step": 4117 }, { "epoch": 0.823167836885635, "grad_norm": 2.109375, "learning_rate": 8.311789005180207e-06, "loss": 1.085, "step": 4118 }, { "epoch": 0.8233677319407311, "grad_norm": 2.0625, "learning_rate": 8.310999436651115e-06, "loss": 1.0659, "step": 4119 }, { "epoch": 0.8235676269958272, "grad_norm": 2.15625, "learning_rate": 8.310209721047782e-06, "loss": 1.0947, "step": 4120 }, { "epoch": 0.8237675220509233, "grad_norm": 1.96875, "learning_rate": 8.309419858405287e-06, "loss": 0.9207, "step": 4121 }, { "epoch": 0.8239674171060194, "grad_norm": 2.0, "learning_rate": 8.308629848758714e-06, "loss": 1.0384, "step": 4122 }, { "epoch": 0.8241673121611154, "grad_norm": 2.25, "learning_rate": 8.307839692143158e-06, "loss": 1.0301, "step": 4123 }, { "epoch": 0.8243672072162115, "grad_norm": 2.15625, "learning_rate": 8.307049388593716e-06, "loss": 1.0894, "step": 4124 }, { "epoch": 0.8245671022713076, "grad_norm": 2.03125, "learning_rate": 8.306258938145493e-06, "loss": 0.9666, "step": 4125 }, { "epoch": 0.8247669973264037, "grad_norm": 2.21875, "learning_rate": 8.3054683408336e-06, "loss": 1.0584, "step": 4126 }, { "epoch": 0.8249668923814997, "grad_norm": 2.15625, "learning_rate": 8.304677596693158e-06, "loss": 1.0143, "step": 4127 }, { "epoch": 0.8251667874365958, "grad_norm": 2.03125, "learning_rate": 8.30388670575929e-06, "loss": 0.9847, "step": 4128 }, { "epoch": 0.8253666824916919, "grad_norm": 2.015625, "learning_rate": 8.303095668067127e-06, "loss": 0.9561, "step": 4129 }, { "epoch": 0.825566577546788, "grad_norm": 2.046875, "learning_rate": 8.302304483651806e-06, "loss": 1.0133, "step": 4130 }, { "epoch": 0.825766472601884, "grad_norm": 2.234375, "learning_rate": 8.301513152548474e-06, "loss": 1.0884, "step": 4131 }, { "epoch": 0.8259663676569801, "grad_norm": 2.078125, "learning_rate": 8.30072167479228e-06, "loss": 1.0631, "step": 4132 }, { "epoch": 0.8261662627120762, "grad_norm": 2.15625, "learning_rate": 8.299930050418383e-06, "loss": 1.0408, "step": 4133 }, { "epoch": 0.8263661577671723, "grad_norm": 2.109375, "learning_rate": 8.299138279461945e-06, "loss": 1.0644, "step": 4134 }, { "epoch": 0.8265660528222684, "grad_norm": 2.0625, "learning_rate": 8.298346361958136e-06, "loss": 0.9942, "step": 4135 }, { "epoch": 0.8267659478773643, "grad_norm": 1.953125, "learning_rate": 8.297554297942134e-06, "loss": 1.0154, "step": 4136 }, { "epoch": 0.8269658429324604, "grad_norm": 2.28125, "learning_rate": 8.296762087449122e-06, "loss": 1.07, "step": 4137 }, { "epoch": 0.8271657379875565, "grad_norm": 1.96875, "learning_rate": 8.29596973051429e-06, "loss": 1.014, "step": 4138 }, { "epoch": 0.8273656330426526, "grad_norm": 2.078125, "learning_rate": 8.295177227172837e-06, "loss": 0.9717, "step": 4139 }, { "epoch": 0.8275655280977486, "grad_norm": 2.03125, "learning_rate": 8.294384577459961e-06, "loss": 1.0929, "step": 4140 }, { "epoch": 0.8277654231528447, "grad_norm": 2.0625, "learning_rate": 8.293591781410874e-06, "loss": 0.992, "step": 4141 }, { "epoch": 0.8279653182079408, "grad_norm": 2.3125, "learning_rate": 8.292798839060794e-06, "loss": 1.1135, "step": 4142 }, { "epoch": 0.8281652132630369, "grad_norm": 1.9765625, "learning_rate": 8.29200575044494e-06, "loss": 1.0303, "step": 4143 }, { "epoch": 0.8283651083181329, "grad_norm": 2.171875, "learning_rate": 8.29121251559854e-06, "loss": 0.9511, "step": 4144 }, { "epoch": 0.828565003373229, "grad_norm": 2.046875, "learning_rate": 8.290419134556835e-06, "loss": 1.0883, "step": 4145 }, { "epoch": 0.8287648984283251, "grad_norm": 2.21875, "learning_rate": 8.289625607355062e-06, "loss": 1.0834, "step": 4146 }, { "epoch": 0.8289647934834212, "grad_norm": 2.078125, "learning_rate": 8.288831934028471e-06, "loss": 1.0117, "step": 4147 }, { "epoch": 0.8291646885385173, "grad_norm": 2.046875, "learning_rate": 8.288038114612316e-06, "loss": 0.9673, "step": 4148 }, { "epoch": 0.8293645835936133, "grad_norm": 2.21875, "learning_rate": 8.287244149141861e-06, "loss": 1.0559, "step": 4149 }, { "epoch": 0.8295644786487094, "grad_norm": 2.0625, "learning_rate": 8.286450037652369e-06, "loss": 1.0355, "step": 4150 }, { "epoch": 0.8297643737038055, "grad_norm": 2.09375, "learning_rate": 8.28565578017912e-06, "loss": 1.0386, "step": 4151 }, { "epoch": 0.8299642687589016, "grad_norm": 2.078125, "learning_rate": 8.284861376757391e-06, "loss": 1.0309, "step": 4152 }, { "epoch": 0.8301641638139976, "grad_norm": 1.9765625, "learning_rate": 8.28406682742247e-06, "loss": 0.9418, "step": 4153 }, { "epoch": 0.8303640588690937, "grad_norm": 2.09375, "learning_rate": 8.283272132209653e-06, "loss": 1.0459, "step": 4154 }, { "epoch": 0.8305639539241898, "grad_norm": 1.96875, "learning_rate": 8.282477291154238e-06, "loss": 1.0016, "step": 4155 }, { "epoch": 0.8307638489792859, "grad_norm": 2.109375, "learning_rate": 8.281682304291531e-06, "loss": 1.0282, "step": 4156 }, { "epoch": 0.830963744034382, "grad_norm": 2.109375, "learning_rate": 8.280887171656848e-06, "loss": 1.0862, "step": 4157 }, { "epoch": 0.831163639089478, "grad_norm": 2.078125, "learning_rate": 8.280091893285508e-06, "loss": 1.0085, "step": 4158 }, { "epoch": 0.8313635341445741, "grad_norm": 2.140625, "learning_rate": 8.279296469212833e-06, "loss": 1.0787, "step": 4159 }, { "epoch": 0.8315634291996702, "grad_norm": 2.109375, "learning_rate": 8.278500899474162e-06, "loss": 1.0031, "step": 4160 }, { "epoch": 0.8317633242547663, "grad_norm": 2.015625, "learning_rate": 8.277705184104831e-06, "loss": 0.9732, "step": 4161 }, { "epoch": 0.8319632193098623, "grad_norm": 1.9765625, "learning_rate": 8.276909323140186e-06, "loss": 0.9604, "step": 4162 }, { "epoch": 0.8321631143649584, "grad_norm": 2.203125, "learning_rate": 8.276113316615577e-06, "loss": 1.0415, "step": 4163 }, { "epoch": 0.8323630094200545, "grad_norm": 2.234375, "learning_rate": 8.275317164566365e-06, "loss": 1.0282, "step": 4164 }, { "epoch": 0.8325629044751506, "grad_norm": 2.140625, "learning_rate": 8.274520867027915e-06, "loss": 1.0074, "step": 4165 }, { "epoch": 0.8327627995302466, "grad_norm": 2.046875, "learning_rate": 8.273724424035599e-06, "loss": 1.022, "step": 4166 }, { "epoch": 0.8329626945853427, "grad_norm": 2.0625, "learning_rate": 8.272927835624791e-06, "loss": 1.0232, "step": 4167 }, { "epoch": 0.8331625896404388, "grad_norm": 2.0625, "learning_rate": 8.272131101830878e-06, "loss": 1.0433, "step": 4168 }, { "epoch": 0.8333624846955349, "grad_norm": 2.171875, "learning_rate": 8.271334222689254e-06, "loss": 1.0848, "step": 4169 }, { "epoch": 0.833562379750631, "grad_norm": 2.0, "learning_rate": 8.270537198235311e-06, "loss": 1.024, "step": 4170 }, { "epoch": 0.833762274805727, "grad_norm": 2.109375, "learning_rate": 8.269740028504455e-06, "loss": 1.0799, "step": 4171 }, { "epoch": 0.833962169860823, "grad_norm": 2.140625, "learning_rate": 8.268942713532098e-06, "loss": 1.02, "step": 4172 }, { "epoch": 0.8341620649159192, "grad_norm": 1.953125, "learning_rate": 8.268145253353653e-06, "loss": 0.9191, "step": 4173 }, { "epoch": 0.8343619599710153, "grad_norm": 2.015625, "learning_rate": 8.267347648004545e-06, "loss": 1.1071, "step": 4174 }, { "epoch": 0.8345618550261112, "grad_norm": 2.46875, "learning_rate": 8.266549897520204e-06, "loss": 1.0778, "step": 4175 }, { "epoch": 0.8347617500812073, "grad_norm": 2.0, "learning_rate": 8.265752001936067e-06, "loss": 1.0233, "step": 4176 }, { "epoch": 0.8349616451363034, "grad_norm": 2.0, "learning_rate": 8.264953961287573e-06, "loss": 1.0217, "step": 4177 }, { "epoch": 0.8351615401913995, "grad_norm": 2.078125, "learning_rate": 8.264155775610172e-06, "loss": 1.0185, "step": 4178 }, { "epoch": 0.8353614352464956, "grad_norm": 2.109375, "learning_rate": 8.263357444939321e-06, "loss": 1.0463, "step": 4179 }, { "epoch": 0.8355613303015916, "grad_norm": 2.390625, "learning_rate": 8.26255896931048e-06, "loss": 1.0506, "step": 4180 }, { "epoch": 0.8357612253566877, "grad_norm": 2.015625, "learning_rate": 8.26176034875912e-06, "loss": 1.0384, "step": 4181 }, { "epoch": 0.8359611204117838, "grad_norm": 2.203125, "learning_rate": 8.26096158332071e-06, "loss": 1.03, "step": 4182 }, { "epoch": 0.8361610154668799, "grad_norm": 1.984375, "learning_rate": 8.260162673030739e-06, "loss": 1.1151, "step": 4183 }, { "epoch": 0.8363609105219759, "grad_norm": 2.03125, "learning_rate": 8.259363617924689e-06, "loss": 0.9808, "step": 4184 }, { "epoch": 0.836560805577072, "grad_norm": 2.109375, "learning_rate": 8.258564418038053e-06, "loss": 1.0657, "step": 4185 }, { "epoch": 0.8367607006321681, "grad_norm": 2.09375, "learning_rate": 8.257765073406337e-06, "loss": 1.0732, "step": 4186 }, { "epoch": 0.8369605956872642, "grad_norm": 2.0625, "learning_rate": 8.256965584065042e-06, "loss": 0.9965, "step": 4187 }, { "epoch": 0.8371604907423602, "grad_norm": 2.09375, "learning_rate": 8.256165950049684e-06, "loss": 1.0115, "step": 4188 }, { "epoch": 0.8373603857974563, "grad_norm": 2.203125, "learning_rate": 8.255366171395783e-06, "loss": 1.1086, "step": 4189 }, { "epoch": 0.8375602808525524, "grad_norm": 2.015625, "learning_rate": 8.254566248138865e-06, "loss": 0.9886, "step": 4190 }, { "epoch": 0.8377601759076485, "grad_norm": 2.03125, "learning_rate": 8.25376618031446e-06, "loss": 1.0307, "step": 4191 }, { "epoch": 0.8379600709627446, "grad_norm": 2.09375, "learning_rate": 8.252965967958108e-06, "loss": 1.1474, "step": 4192 }, { "epoch": 0.8381599660178406, "grad_norm": 2.0625, "learning_rate": 8.252165611105358e-06, "loss": 1.0694, "step": 4193 }, { "epoch": 0.8383598610729367, "grad_norm": 2.09375, "learning_rate": 8.251365109791758e-06, "loss": 0.9362, "step": 4194 }, { "epoch": 0.8385597561280328, "grad_norm": 1.984375, "learning_rate": 8.250564464052865e-06, "loss": 0.9577, "step": 4195 }, { "epoch": 0.8387596511831289, "grad_norm": 2.46875, "learning_rate": 8.249763673924248e-06, "loss": 0.9991, "step": 4196 }, { "epoch": 0.8389595462382249, "grad_norm": 2.015625, "learning_rate": 8.248962739441475e-06, "loss": 0.9452, "step": 4197 }, { "epoch": 0.839159441293321, "grad_norm": 2.078125, "learning_rate": 8.248161660640123e-06, "loss": 1.0779, "step": 4198 }, { "epoch": 0.8393593363484171, "grad_norm": 2.109375, "learning_rate": 8.24736043755578e-06, "loss": 1.0068, "step": 4199 }, { "epoch": 0.8395592314035132, "grad_norm": 2.0, "learning_rate": 8.24655907022403e-06, "loss": 0.9692, "step": 4200 }, { "epoch": 0.8397591264586093, "grad_norm": 2.078125, "learning_rate": 8.245757558680474e-06, "loss": 0.9732, "step": 4201 }, { "epoch": 0.8399590215137053, "grad_norm": 2.0, "learning_rate": 8.244955902960713e-06, "loss": 0.9686, "step": 4202 }, { "epoch": 0.8401589165688014, "grad_norm": 2.34375, "learning_rate": 8.24415410310036e-06, "loss": 0.9976, "step": 4203 }, { "epoch": 0.8403588116238975, "grad_norm": 2.1875, "learning_rate": 8.243352159135026e-06, "loss": 1.1032, "step": 4204 }, { "epoch": 0.8405587066789936, "grad_norm": 1.984375, "learning_rate": 8.242550071100336e-06, "loss": 1.012, "step": 4205 }, { "epoch": 0.8407586017340896, "grad_norm": 2.15625, "learning_rate": 8.24174783903192e-06, "loss": 1.1062, "step": 4206 }, { "epoch": 0.8409584967891857, "grad_norm": 1.9921875, "learning_rate": 8.240945462965408e-06, "loss": 1.0424, "step": 4207 }, { "epoch": 0.8411583918442818, "grad_norm": 2.078125, "learning_rate": 8.240142942936446e-06, "loss": 1.0927, "step": 4208 }, { "epoch": 0.8413582868993779, "grad_norm": 2.125, "learning_rate": 8.239340278980681e-06, "loss": 1.0483, "step": 4209 }, { "epoch": 0.8415581819544738, "grad_norm": 2.03125, "learning_rate": 8.238537471133768e-06, "loss": 1.0106, "step": 4210 }, { "epoch": 0.84175807700957, "grad_norm": 2.1875, "learning_rate": 8.237734519431365e-06, "loss": 0.9823, "step": 4211 }, { "epoch": 0.841957972064666, "grad_norm": 2.21875, "learning_rate": 8.23693142390914e-06, "loss": 1.0696, "step": 4212 }, { "epoch": 0.8421578671197621, "grad_norm": 2.1875, "learning_rate": 8.236128184602766e-06, "loss": 0.9739, "step": 4213 }, { "epoch": 0.8423577621748582, "grad_norm": 2.15625, "learning_rate": 8.235324801547926e-06, "loss": 1.0381, "step": 4214 }, { "epoch": 0.8425576572299542, "grad_norm": 2.015625, "learning_rate": 8.234521274780302e-06, "loss": 0.9209, "step": 4215 }, { "epoch": 0.8427575522850503, "grad_norm": 1.9375, "learning_rate": 8.233717604335589e-06, "loss": 1.057, "step": 4216 }, { "epoch": 0.8429574473401464, "grad_norm": 2.03125, "learning_rate": 8.232913790249486e-06, "loss": 0.9963, "step": 4217 }, { "epoch": 0.8431573423952425, "grad_norm": 2.171875, "learning_rate": 8.232109832557696e-06, "loss": 1.087, "step": 4218 }, { "epoch": 0.8433572374503385, "grad_norm": 2.171875, "learning_rate": 8.231305731295935e-06, "loss": 1.128, "step": 4219 }, { "epoch": 0.8435571325054346, "grad_norm": 2.203125, "learning_rate": 8.230501486499915e-06, "loss": 1.0614, "step": 4220 }, { "epoch": 0.8437570275605307, "grad_norm": 2.109375, "learning_rate": 8.22969709820537e-06, "loss": 1.0407, "step": 4221 }, { "epoch": 0.8439569226156268, "grad_norm": 2.15625, "learning_rate": 8.228892566448018e-06, "loss": 1.0015, "step": 4222 }, { "epoch": 0.8441568176707229, "grad_norm": 2.171875, "learning_rate": 8.228087891263608e-06, "loss": 1.0536, "step": 4223 }, { "epoch": 0.8443567127258189, "grad_norm": 2.03125, "learning_rate": 8.227283072687877e-06, "loss": 1.0252, "step": 4224 }, { "epoch": 0.844556607780915, "grad_norm": 2.015625, "learning_rate": 8.226478110756574e-06, "loss": 1.0179, "step": 4225 }, { "epoch": 0.8447565028360111, "grad_norm": 2.109375, "learning_rate": 8.225673005505461e-06, "loss": 1.0577, "step": 4226 }, { "epoch": 0.8449563978911072, "grad_norm": 1.9765625, "learning_rate": 8.224867756970298e-06, "loss": 0.9411, "step": 4227 }, { "epoch": 0.8451562929462032, "grad_norm": 2.125, "learning_rate": 8.224062365186852e-06, "loss": 1.1332, "step": 4228 }, { "epoch": 0.8453561880012993, "grad_norm": 2.125, "learning_rate": 8.223256830190901e-06, "loss": 1.0812, "step": 4229 }, { "epoch": 0.8455560830563954, "grad_norm": 2.046875, "learning_rate": 8.222451152018225e-06, "loss": 0.9662, "step": 4230 }, { "epoch": 0.8457559781114915, "grad_norm": 2.0625, "learning_rate": 8.221645330704615e-06, "loss": 1.0622, "step": 4231 }, { "epoch": 0.8459558731665875, "grad_norm": 1.90625, "learning_rate": 8.220839366285862e-06, "loss": 0.948, "step": 4232 }, { "epoch": 0.8461557682216836, "grad_norm": 2.046875, "learning_rate": 8.220033258797767e-06, "loss": 1.0634, "step": 4233 }, { "epoch": 0.8463556632767797, "grad_norm": 1.9375, "learning_rate": 8.21922700827614e-06, "loss": 1.0048, "step": 4234 }, { "epoch": 0.8465555583318758, "grad_norm": 2.03125, "learning_rate": 8.218420614756793e-06, "loss": 1.0041, "step": 4235 }, { "epoch": 0.8467554533869719, "grad_norm": 2.09375, "learning_rate": 8.217614078275547e-06, "loss": 1.008, "step": 4236 }, { "epoch": 0.8469553484420679, "grad_norm": 1.9140625, "learning_rate": 8.216807398868225e-06, "loss": 0.9848, "step": 4237 }, { "epoch": 0.847155243497164, "grad_norm": 2.046875, "learning_rate": 8.216000576570664e-06, "loss": 1.0451, "step": 4238 }, { "epoch": 0.8473551385522601, "grad_norm": 2.0625, "learning_rate": 8.2151936114187e-06, "loss": 0.9849, "step": 4239 }, { "epoch": 0.8475550336073562, "grad_norm": 2.15625, "learning_rate": 8.21438650344818e-06, "loss": 1.1296, "step": 4240 }, { "epoch": 0.8477549286624522, "grad_norm": 2.078125, "learning_rate": 8.213579252694954e-06, "loss": 1.0473, "step": 4241 }, { "epoch": 0.8479548237175483, "grad_norm": 2.046875, "learning_rate": 8.212771859194881e-06, "loss": 1.056, "step": 4242 }, { "epoch": 0.8481547187726444, "grad_norm": 2.03125, "learning_rate": 8.211964322983824e-06, "loss": 1.0194, "step": 4243 }, { "epoch": 0.8483546138277405, "grad_norm": 2.109375, "learning_rate": 8.211156644097656e-06, "loss": 1.0683, "step": 4244 }, { "epoch": 0.8485545088828365, "grad_norm": 1.9375, "learning_rate": 8.210348822572253e-06, "loss": 1.0429, "step": 4245 }, { "epoch": 0.8487544039379326, "grad_norm": 2.0625, "learning_rate": 8.209540858443499e-06, "loss": 1.0126, "step": 4246 }, { "epoch": 0.8489542989930287, "grad_norm": 2.140625, "learning_rate": 8.208732751747281e-06, "loss": 1.0767, "step": 4247 }, { "epoch": 0.8491541940481248, "grad_norm": 2.234375, "learning_rate": 8.207924502519498e-06, "loss": 1.0443, "step": 4248 }, { "epoch": 0.8493540891032209, "grad_norm": 2.203125, "learning_rate": 8.20711611079605e-06, "loss": 1.0341, "step": 4249 }, { "epoch": 0.8495539841583168, "grad_norm": 1.984375, "learning_rate": 8.206307576612848e-06, "loss": 0.9718, "step": 4250 }, { "epoch": 0.8497538792134129, "grad_norm": 2.0625, "learning_rate": 8.205498900005806e-06, "loss": 1.1404, "step": 4251 }, { "epoch": 0.849953774268509, "grad_norm": 2.046875, "learning_rate": 8.204690081010845e-06, "loss": 0.9937, "step": 4252 }, { "epoch": 0.8501536693236051, "grad_norm": 2.078125, "learning_rate": 8.203881119663893e-06, "loss": 1.0616, "step": 4253 }, { "epoch": 0.8503535643787011, "grad_norm": 2.0625, "learning_rate": 8.203072016000884e-06, "loss": 1.064, "step": 4254 }, { "epoch": 0.8505534594337972, "grad_norm": 2.15625, "learning_rate": 8.202262770057756e-06, "loss": 1.0711, "step": 4255 }, { "epoch": 0.8507533544888933, "grad_norm": 2.046875, "learning_rate": 8.201453381870461e-06, "loss": 0.9932, "step": 4256 }, { "epoch": 0.8509532495439894, "grad_norm": 2.203125, "learning_rate": 8.200643851474947e-06, "loss": 1.0243, "step": 4257 }, { "epoch": 0.8511531445990855, "grad_norm": 2.125, "learning_rate": 8.199834178907174e-06, "loss": 1.0337, "step": 4258 }, { "epoch": 0.8513530396541815, "grad_norm": 1.9453125, "learning_rate": 8.19902436420311e-06, "loss": 1.0912, "step": 4259 }, { "epoch": 0.8515529347092776, "grad_norm": 2.046875, "learning_rate": 8.198214407398726e-06, "loss": 0.9725, "step": 4260 }, { "epoch": 0.8517528297643737, "grad_norm": 2.234375, "learning_rate": 8.197404308529997e-06, "loss": 1.0013, "step": 4261 }, { "epoch": 0.8519527248194698, "grad_norm": 1.96875, "learning_rate": 8.196594067632913e-06, "loss": 1.0067, "step": 4262 }, { "epoch": 0.8521526198745658, "grad_norm": 2.078125, "learning_rate": 8.195783684743461e-06, "loss": 0.9922, "step": 4263 }, { "epoch": 0.8523525149296619, "grad_norm": 1.96875, "learning_rate": 8.19497315989764e-06, "loss": 0.9992, "step": 4264 }, { "epoch": 0.852552409984758, "grad_norm": 2.109375, "learning_rate": 8.19416249313145e-06, "loss": 1.0969, "step": 4265 }, { "epoch": 0.8527523050398541, "grad_norm": 1.953125, "learning_rate": 8.193351684480904e-06, "loss": 1.0002, "step": 4266 }, { "epoch": 0.8529522000949501, "grad_norm": 1.953125, "learning_rate": 8.192540733982017e-06, "loss": 0.8837, "step": 4267 }, { "epoch": 0.8531520951500462, "grad_norm": 2.171875, "learning_rate": 8.191729641670813e-06, "loss": 1.0419, "step": 4268 }, { "epoch": 0.8533519902051423, "grad_norm": 2.0625, "learning_rate": 8.190918407583319e-06, "loss": 0.9997, "step": 4269 }, { "epoch": 0.8535518852602384, "grad_norm": 2.109375, "learning_rate": 8.190107031755569e-06, "loss": 1.0607, "step": 4270 }, { "epoch": 0.8537517803153345, "grad_norm": 1.9765625, "learning_rate": 8.189295514223607e-06, "loss": 1.0516, "step": 4271 }, { "epoch": 0.8539516753704305, "grad_norm": 2.046875, "learning_rate": 8.188483855023476e-06, "loss": 0.9929, "step": 4272 }, { "epoch": 0.8541515704255266, "grad_norm": 2.046875, "learning_rate": 8.187672054191236e-06, "loss": 1.0549, "step": 4273 }, { "epoch": 0.8543514654806227, "grad_norm": 2.109375, "learning_rate": 8.186860111762941e-06, "loss": 1.1324, "step": 4274 }, { "epoch": 0.8545513605357188, "grad_norm": 2.03125, "learning_rate": 8.186048027774661e-06, "loss": 1.1121, "step": 4275 }, { "epoch": 0.8547512555908148, "grad_norm": 2.046875, "learning_rate": 8.18523580226247e-06, "loss": 0.9482, "step": 4276 }, { "epoch": 0.8549511506459109, "grad_norm": 1.9609375, "learning_rate": 8.184423435262442e-06, "loss": 0.979, "step": 4277 }, { "epoch": 0.855151045701007, "grad_norm": 2.078125, "learning_rate": 8.183610926810667e-06, "loss": 1.0637, "step": 4278 }, { "epoch": 0.8553509407561031, "grad_norm": 2.140625, "learning_rate": 8.182798276943236e-06, "loss": 1.1295, "step": 4279 }, { "epoch": 0.8555508358111992, "grad_norm": 2.03125, "learning_rate": 8.181985485696242e-06, "loss": 1.0336, "step": 4280 }, { "epoch": 0.8557507308662952, "grad_norm": 2.140625, "learning_rate": 8.181172553105793e-06, "loss": 1.0707, "step": 4281 }, { "epoch": 0.8559506259213913, "grad_norm": 2.078125, "learning_rate": 8.180359479208002e-06, "loss": 1.1241, "step": 4282 }, { "epoch": 0.8561505209764874, "grad_norm": 2.046875, "learning_rate": 8.179546264038982e-06, "loss": 1.0029, "step": 4283 }, { "epoch": 0.8563504160315835, "grad_norm": 2.078125, "learning_rate": 8.178732907634854e-06, "loss": 1.0113, "step": 4284 }, { "epoch": 0.8565503110866795, "grad_norm": 2.296875, "learning_rate": 8.177919410031752e-06, "loss": 1.0303, "step": 4285 }, { "epoch": 0.8567502061417755, "grad_norm": 2.0625, "learning_rate": 8.177105771265808e-06, "loss": 0.9785, "step": 4286 }, { "epoch": 0.8569501011968716, "grad_norm": 2.078125, "learning_rate": 8.176291991373164e-06, "loss": 0.9925, "step": 4287 }, { "epoch": 0.8571499962519677, "grad_norm": 2.125, "learning_rate": 8.17547807038997e-06, "loss": 1.0772, "step": 4288 }, { "epoch": 0.8573498913070637, "grad_norm": 2.109375, "learning_rate": 8.17466400835238e-06, "loss": 1.0025, "step": 4289 }, { "epoch": 0.8575497863621598, "grad_norm": 1.9921875, "learning_rate": 8.173849805296553e-06, "loss": 1.0628, "step": 4290 }, { "epoch": 0.8577496814172559, "grad_norm": 2.1875, "learning_rate": 8.173035461258658e-06, "loss": 1.0494, "step": 4291 }, { "epoch": 0.857949576472352, "grad_norm": 2.046875, "learning_rate": 8.172220976274865e-06, "loss": 1.0009, "step": 4292 }, { "epoch": 0.8581494715274481, "grad_norm": 2.09375, "learning_rate": 8.171406350381354e-06, "loss": 1.0389, "step": 4293 }, { "epoch": 0.8583493665825441, "grad_norm": 2.046875, "learning_rate": 8.170591583614313e-06, "loss": 0.9508, "step": 4294 }, { "epoch": 0.8585492616376402, "grad_norm": 2.09375, "learning_rate": 8.169776676009935e-06, "loss": 1.0055, "step": 4295 }, { "epoch": 0.8587491566927363, "grad_norm": 2.015625, "learning_rate": 8.168961627604413e-06, "loss": 0.9748, "step": 4296 }, { "epoch": 0.8589490517478324, "grad_norm": 2.15625, "learning_rate": 8.168146438433952e-06, "loss": 1.0029, "step": 4297 }, { "epoch": 0.8591489468029284, "grad_norm": 2.140625, "learning_rate": 8.167331108534769e-06, "loss": 1.1574, "step": 4298 }, { "epoch": 0.8593488418580245, "grad_norm": 2.046875, "learning_rate": 8.166515637943072e-06, "loss": 0.9615, "step": 4299 }, { "epoch": 0.8595487369131206, "grad_norm": 2.015625, "learning_rate": 8.165700026695094e-06, "loss": 1.0508, "step": 4300 }, { "epoch": 0.8597486319682167, "grad_norm": 2.109375, "learning_rate": 8.164884274827055e-06, "loss": 1.0879, "step": 4301 }, { "epoch": 0.8599485270233128, "grad_norm": 2.0625, "learning_rate": 8.164068382375195e-06, "loss": 0.9528, "step": 4302 }, { "epoch": 0.8601484220784088, "grad_norm": 2.125, "learning_rate": 8.163252349375755e-06, "loss": 1.0222, "step": 4303 }, { "epoch": 0.8603483171335049, "grad_norm": 2.03125, "learning_rate": 8.162436175864985e-06, "loss": 0.9972, "step": 4304 }, { "epoch": 0.860548212188601, "grad_norm": 2.140625, "learning_rate": 8.161619861879136e-06, "loss": 1.0338, "step": 4305 }, { "epoch": 0.8607481072436971, "grad_norm": 2.03125, "learning_rate": 8.160803407454472e-06, "loss": 1.0123, "step": 4306 }, { "epoch": 0.8609480022987931, "grad_norm": 2.203125, "learning_rate": 8.159986812627258e-06, "loss": 1.0679, "step": 4307 }, { "epoch": 0.8611478973538892, "grad_norm": 2.1875, "learning_rate": 8.159170077433766e-06, "loss": 1.1495, "step": 4308 }, { "epoch": 0.8613477924089853, "grad_norm": 1.9453125, "learning_rate": 8.158353201910279e-06, "loss": 1.0023, "step": 4309 }, { "epoch": 0.8615476874640814, "grad_norm": 2.15625, "learning_rate": 8.157536186093079e-06, "loss": 1.037, "step": 4310 }, { "epoch": 0.8617475825191774, "grad_norm": 2.015625, "learning_rate": 8.156719030018456e-06, "loss": 1.0871, "step": 4311 }, { "epoch": 0.8619474775742735, "grad_norm": 2.0, "learning_rate": 8.155901733722714e-06, "loss": 1.0027, "step": 4312 }, { "epoch": 0.8621473726293696, "grad_norm": 2.078125, "learning_rate": 8.155084297242152e-06, "loss": 1.0906, "step": 4313 }, { "epoch": 0.8623472676844657, "grad_norm": 2.125, "learning_rate": 8.154266720613086e-06, "loss": 1.0564, "step": 4314 }, { "epoch": 0.8625471627395618, "grad_norm": 2.1875, "learning_rate": 8.153449003871828e-06, "loss": 1.1208, "step": 4315 }, { "epoch": 0.8627470577946578, "grad_norm": 2.015625, "learning_rate": 8.1526311470547e-06, "loss": 0.9666, "step": 4316 }, { "epoch": 0.8629469528497539, "grad_norm": 2.171875, "learning_rate": 8.151813150198033e-06, "loss": 1.0289, "step": 4317 }, { "epoch": 0.86314684790485, "grad_norm": 2.15625, "learning_rate": 8.150995013338165e-06, "loss": 1.0856, "step": 4318 }, { "epoch": 0.8633467429599461, "grad_norm": 2.1875, "learning_rate": 8.150176736511432e-06, "loss": 1.0077, "step": 4319 }, { "epoch": 0.8635466380150421, "grad_norm": 2.015625, "learning_rate": 8.149358319754188e-06, "loss": 1.0339, "step": 4320 }, { "epoch": 0.8637465330701382, "grad_norm": 2.09375, "learning_rate": 8.148539763102782e-06, "loss": 1.0569, "step": 4321 }, { "epoch": 0.8639464281252343, "grad_norm": 2.125, "learning_rate": 8.147721066593577e-06, "loss": 1.0126, "step": 4322 }, { "epoch": 0.8641463231803304, "grad_norm": 2.171875, "learning_rate": 8.146902230262936e-06, "loss": 1.0008, "step": 4323 }, { "epoch": 0.8643462182354265, "grad_norm": 2.125, "learning_rate": 8.146083254147237e-06, "loss": 1.0532, "step": 4324 }, { "epoch": 0.8645461132905224, "grad_norm": 2.15625, "learning_rate": 8.145264138282853e-06, "loss": 1.0592, "step": 4325 }, { "epoch": 0.8647460083456185, "grad_norm": 1.9765625, "learning_rate": 8.144444882706175e-06, "loss": 0.9666, "step": 4326 }, { "epoch": 0.8649459034007146, "grad_norm": 1.9921875, "learning_rate": 8.14362548745359e-06, "loss": 1.0502, "step": 4327 }, { "epoch": 0.8651457984558107, "grad_norm": 2.203125, "learning_rate": 8.142805952561495e-06, "loss": 1.0719, "step": 4328 }, { "epoch": 0.8653456935109067, "grad_norm": 2.140625, "learning_rate": 8.141986278066296e-06, "loss": 1.0825, "step": 4329 }, { "epoch": 0.8655455885660028, "grad_norm": 2.171875, "learning_rate": 8.141166464004404e-06, "loss": 1.0434, "step": 4330 }, { "epoch": 0.8657454836210989, "grad_norm": 2.140625, "learning_rate": 8.140346510412232e-06, "loss": 1.0814, "step": 4331 }, { "epoch": 0.865945378676195, "grad_norm": 1.9921875, "learning_rate": 8.139526417326203e-06, "loss": 0.9666, "step": 4332 }, { "epoch": 0.866145273731291, "grad_norm": 2.15625, "learning_rate": 8.138706184782745e-06, "loss": 1.0682, "step": 4333 }, { "epoch": 0.8663451687863871, "grad_norm": 2.15625, "learning_rate": 8.137885812818296e-06, "loss": 1.0596, "step": 4334 }, { "epoch": 0.8665450638414832, "grad_norm": 2.015625, "learning_rate": 8.137065301469292e-06, "loss": 0.9758, "step": 4335 }, { "epoch": 0.8667449588965793, "grad_norm": 2.03125, "learning_rate": 8.136244650772183e-06, "loss": 1.0406, "step": 4336 }, { "epoch": 0.8669448539516754, "grad_norm": 2.09375, "learning_rate": 8.135423860763422e-06, "loss": 1.0339, "step": 4337 }, { "epoch": 0.8671447490067714, "grad_norm": 1.9609375, "learning_rate": 8.134602931479468e-06, "loss": 0.9688, "step": 4338 }, { "epoch": 0.8673446440618675, "grad_norm": 1.90625, "learning_rate": 8.133781862956787e-06, "loss": 0.9505, "step": 4339 }, { "epoch": 0.8675445391169636, "grad_norm": 2.09375, "learning_rate": 8.132960655231849e-06, "loss": 1.0011, "step": 4340 }, { "epoch": 0.8677444341720597, "grad_norm": 2.140625, "learning_rate": 8.132139308341134e-06, "loss": 0.988, "step": 4341 }, { "epoch": 0.8679443292271557, "grad_norm": 2.015625, "learning_rate": 8.131317822321125e-06, "loss": 1.0682, "step": 4342 }, { "epoch": 0.8681442242822518, "grad_norm": 2.15625, "learning_rate": 8.130496197208313e-06, "loss": 1.0255, "step": 4343 }, { "epoch": 0.8683441193373479, "grad_norm": 1.984375, "learning_rate": 8.129674433039196e-06, "loss": 1.0647, "step": 4344 }, { "epoch": 0.868544014392444, "grad_norm": 1.9453125, "learning_rate": 8.128852529850272e-06, "loss": 0.9956, "step": 4345 }, { "epoch": 0.8687439094475401, "grad_norm": 2.078125, "learning_rate": 8.128030487678055e-06, "loss": 1.0578, "step": 4346 }, { "epoch": 0.8689438045026361, "grad_norm": 2.09375, "learning_rate": 8.127208306559058e-06, "loss": 1.0504, "step": 4347 }, { "epoch": 0.8691436995577322, "grad_norm": 2.140625, "learning_rate": 8.126385986529802e-06, "loss": 1.038, "step": 4348 }, { "epoch": 0.8693435946128283, "grad_norm": 2.125, "learning_rate": 8.125563527626812e-06, "loss": 0.996, "step": 4349 }, { "epoch": 0.8695434896679244, "grad_norm": 2.1875, "learning_rate": 8.124740929886625e-06, "loss": 1.0464, "step": 4350 }, { "epoch": 0.8697433847230204, "grad_norm": 2.078125, "learning_rate": 8.12391819334578e-06, "loss": 0.9245, "step": 4351 }, { "epoch": 0.8699432797781165, "grad_norm": 2.125, "learning_rate": 8.123095318040824e-06, "loss": 1.0916, "step": 4352 }, { "epoch": 0.8701431748332126, "grad_norm": 2.109375, "learning_rate": 8.122272304008306e-06, "loss": 1.0831, "step": 4353 }, { "epoch": 0.8703430698883087, "grad_norm": 2.09375, "learning_rate": 8.121449151284784e-06, "loss": 0.9706, "step": 4354 }, { "epoch": 0.8705429649434047, "grad_norm": 2.046875, "learning_rate": 8.120625859906825e-06, "loss": 1.0293, "step": 4355 }, { "epoch": 0.8707428599985008, "grad_norm": 2.140625, "learning_rate": 8.119802429911002e-06, "loss": 0.9694, "step": 4356 }, { "epoch": 0.8709427550535969, "grad_norm": 2.046875, "learning_rate": 8.118978861333883e-06, "loss": 0.9881, "step": 4357 }, { "epoch": 0.871142650108693, "grad_norm": 2.03125, "learning_rate": 8.11815515421206e-06, "loss": 1.0694, "step": 4358 }, { "epoch": 0.8713425451637891, "grad_norm": 2.046875, "learning_rate": 8.117331308582116e-06, "loss": 0.9553, "step": 4359 }, { "epoch": 0.871542440218885, "grad_norm": 2.078125, "learning_rate": 8.116507324480651e-06, "loss": 1.0958, "step": 4360 }, { "epoch": 0.8717423352739812, "grad_norm": 2.0625, "learning_rate": 8.115683201944262e-06, "loss": 1.1264, "step": 4361 }, { "epoch": 0.8719422303290773, "grad_norm": 2.03125, "learning_rate": 8.114858941009556e-06, "loss": 0.9975, "step": 4362 }, { "epoch": 0.8721421253841734, "grad_norm": 1.9453125, "learning_rate": 8.114034541713152e-06, "loss": 1.0681, "step": 4363 }, { "epoch": 0.8723420204392693, "grad_norm": 2.25, "learning_rate": 8.113210004091663e-06, "loss": 1.083, "step": 4364 }, { "epoch": 0.8725419154943654, "grad_norm": 2.046875, "learning_rate": 8.11238532818172e-06, "loss": 1.044, "step": 4365 }, { "epoch": 0.8727418105494615, "grad_norm": 2.09375, "learning_rate": 8.111560514019951e-06, "loss": 0.9365, "step": 4366 }, { "epoch": 0.8729417056045576, "grad_norm": 2.234375, "learning_rate": 8.110735561643e-06, "loss": 1.128, "step": 4367 }, { "epoch": 0.8731416006596536, "grad_norm": 2.03125, "learning_rate": 8.109910471087505e-06, "loss": 1.0742, "step": 4368 }, { "epoch": 0.8733414957147497, "grad_norm": 1.984375, "learning_rate": 8.109085242390118e-06, "loss": 1.0137, "step": 4369 }, { "epoch": 0.8735413907698458, "grad_norm": 2.0625, "learning_rate": 8.108259875587498e-06, "loss": 1.023, "step": 4370 }, { "epoch": 0.8737412858249419, "grad_norm": 2.03125, "learning_rate": 8.107434370716307e-06, "loss": 1.027, "step": 4371 }, { "epoch": 0.873941180880038, "grad_norm": 2.0, "learning_rate": 8.106608727813212e-06, "loss": 0.9632, "step": 4372 }, { "epoch": 0.874141075935134, "grad_norm": 2.09375, "learning_rate": 8.105782946914891e-06, "loss": 1.0354, "step": 4373 }, { "epoch": 0.8743409709902301, "grad_norm": 2.109375, "learning_rate": 8.10495702805802e-06, "loss": 0.9849, "step": 4374 }, { "epoch": 0.8745408660453262, "grad_norm": 2.4375, "learning_rate": 8.104130971279292e-06, "loss": 1.0414, "step": 4375 }, { "epoch": 0.8747407611004223, "grad_norm": 2.15625, "learning_rate": 8.103304776615399e-06, "loss": 1.1185, "step": 4376 }, { "epoch": 0.8749406561555183, "grad_norm": 2.171875, "learning_rate": 8.102478444103037e-06, "loss": 1.1865, "step": 4377 }, { "epoch": 0.8751405512106144, "grad_norm": 2.1875, "learning_rate": 8.101651973778914e-06, "loss": 1.025, "step": 4378 }, { "epoch": 0.8753404462657105, "grad_norm": 2.09375, "learning_rate": 8.100825365679741e-06, "loss": 0.9985, "step": 4379 }, { "epoch": 0.8755403413208066, "grad_norm": 2.15625, "learning_rate": 8.099998619842238e-06, "loss": 1.0752, "step": 4380 }, { "epoch": 0.8757402363759027, "grad_norm": 2.125, "learning_rate": 8.099171736303126e-06, "loss": 0.985, "step": 4381 }, { "epoch": 0.8759401314309987, "grad_norm": 1.9453125, "learning_rate": 8.098344715099136e-06, "loss": 1.0516, "step": 4382 }, { "epoch": 0.8761400264860948, "grad_norm": 2.109375, "learning_rate": 8.097517556267007e-06, "loss": 1.0916, "step": 4383 }, { "epoch": 0.8763399215411909, "grad_norm": 2.15625, "learning_rate": 8.096690259843478e-06, "loss": 0.8999, "step": 4384 }, { "epoch": 0.876539816596287, "grad_norm": 2.21875, "learning_rate": 8.095862825865297e-06, "loss": 1.0837, "step": 4385 }, { "epoch": 0.876739711651383, "grad_norm": 2.03125, "learning_rate": 8.09503525436922e-06, "loss": 0.9613, "step": 4386 }, { "epoch": 0.8769396067064791, "grad_norm": 2.078125, "learning_rate": 8.09420754539201e-06, "loss": 1.0288, "step": 4387 }, { "epoch": 0.8771395017615752, "grad_norm": 2.0625, "learning_rate": 8.09337969897043e-06, "loss": 0.987, "step": 4388 }, { "epoch": 0.8773393968166713, "grad_norm": 2.078125, "learning_rate": 8.092551715141254e-06, "loss": 1.0139, "step": 4389 }, { "epoch": 0.8775392918717673, "grad_norm": 1.9609375, "learning_rate": 8.091723593941261e-06, "loss": 1.0515, "step": 4390 }, { "epoch": 0.8777391869268634, "grad_norm": 2.015625, "learning_rate": 8.090895335407238e-06, "loss": 1.0595, "step": 4391 }, { "epoch": 0.8779390819819595, "grad_norm": 2.03125, "learning_rate": 8.090066939575972e-06, "loss": 1.1062, "step": 4392 }, { "epoch": 0.8781389770370556, "grad_norm": 2.15625, "learning_rate": 8.089238406484263e-06, "loss": 0.975, "step": 4393 }, { "epoch": 0.8783388720921517, "grad_norm": 1.9296875, "learning_rate": 8.088409736168915e-06, "loss": 0.9842, "step": 4394 }, { "epoch": 0.8785387671472477, "grad_norm": 2.078125, "learning_rate": 8.087580928666736e-06, "loss": 1.084, "step": 4395 }, { "epoch": 0.8787386622023438, "grad_norm": 2.015625, "learning_rate": 8.08675198401454e-06, "loss": 0.9676, "step": 4396 }, { "epoch": 0.8789385572574399, "grad_norm": 1.9296875, "learning_rate": 8.085922902249153e-06, "loss": 0.9899, "step": 4397 }, { "epoch": 0.879138452312536, "grad_norm": 2.03125, "learning_rate": 8.085093683407399e-06, "loss": 1.0496, "step": 4398 }, { "epoch": 0.879338347367632, "grad_norm": 2.03125, "learning_rate": 8.084264327526112e-06, "loss": 1.014, "step": 4399 }, { "epoch": 0.879538242422728, "grad_norm": 2.234375, "learning_rate": 8.083434834642133e-06, "loss": 1.085, "step": 4400 }, { "epoch": 0.8797381374778241, "grad_norm": 2.109375, "learning_rate": 8.08260520479231e-06, "loss": 1.0846, "step": 4401 }, { "epoch": 0.8799380325329202, "grad_norm": 2.0625, "learning_rate": 8.081775438013493e-06, "loss": 1.0657, "step": 4402 }, { "epoch": 0.8801379275880163, "grad_norm": 1.953125, "learning_rate": 8.08094553434254e-06, "loss": 0.9896, "step": 4403 }, { "epoch": 0.8803378226431123, "grad_norm": 2.03125, "learning_rate": 8.080115493816314e-06, "loss": 1.0088, "step": 4404 }, { "epoch": 0.8805377176982084, "grad_norm": 2.203125, "learning_rate": 8.079285316471688e-06, "loss": 1.0624, "step": 4405 }, { "epoch": 0.8807376127533045, "grad_norm": 2.0625, "learning_rate": 8.078455002345538e-06, "loss": 0.9994, "step": 4406 }, { "epoch": 0.8809375078084006, "grad_norm": 2.03125, "learning_rate": 8.077624551474744e-06, "loss": 1.0085, "step": 4407 }, { "epoch": 0.8811374028634966, "grad_norm": 1.9921875, "learning_rate": 8.076793963896197e-06, "loss": 0.9928, "step": 4408 }, { "epoch": 0.8813372979185927, "grad_norm": 2.046875, "learning_rate": 8.07596323964679e-06, "loss": 1.0614, "step": 4409 }, { "epoch": 0.8815371929736888, "grad_norm": 2.109375, "learning_rate": 8.075132378763424e-06, "loss": 1.0672, "step": 4410 }, { "epoch": 0.8817370880287849, "grad_norm": 2.0625, "learning_rate": 8.074301381283007e-06, "loss": 0.9155, "step": 4411 }, { "epoch": 0.8819369830838809, "grad_norm": 2.046875, "learning_rate": 8.073470247242452e-06, "loss": 0.9595, "step": 4412 }, { "epoch": 0.882136878138977, "grad_norm": 2.046875, "learning_rate": 8.072638976678675e-06, "loss": 0.9704, "step": 4413 }, { "epoch": 0.8823367731940731, "grad_norm": 2.015625, "learning_rate": 8.071807569628602e-06, "loss": 1.0088, "step": 4414 }, { "epoch": 0.8825366682491692, "grad_norm": 1.921875, "learning_rate": 8.070976026129166e-06, "loss": 0.9518, "step": 4415 }, { "epoch": 0.8827365633042653, "grad_norm": 2.109375, "learning_rate": 8.070144346217305e-06, "loss": 1.0652, "step": 4416 }, { "epoch": 0.8829364583593613, "grad_norm": 2.0625, "learning_rate": 8.069312529929958e-06, "loss": 0.9716, "step": 4417 }, { "epoch": 0.8831363534144574, "grad_norm": 2.125, "learning_rate": 8.068480577304076e-06, "loss": 1.0798, "step": 4418 }, { "epoch": 0.8833362484695535, "grad_norm": 2.265625, "learning_rate": 8.067648488376616e-06, "loss": 0.9723, "step": 4419 }, { "epoch": 0.8835361435246496, "grad_norm": 1.9375, "learning_rate": 8.066816263184535e-06, "loss": 0.9742, "step": 4420 }, { "epoch": 0.8837360385797456, "grad_norm": 2.09375, "learning_rate": 8.065983901764807e-06, "loss": 0.9657, "step": 4421 }, { "epoch": 0.8839359336348417, "grad_norm": 2.03125, "learning_rate": 8.0651514041544e-06, "loss": 1.02, "step": 4422 }, { "epoch": 0.8841358286899378, "grad_norm": 1.984375, "learning_rate": 8.064318770390293e-06, "loss": 1.0419, "step": 4423 }, { "epoch": 0.8843357237450339, "grad_norm": 2.078125, "learning_rate": 8.063486000509475e-06, "loss": 1.085, "step": 4424 }, { "epoch": 0.88453561880013, "grad_norm": 2.046875, "learning_rate": 8.062653094548936e-06, "loss": 1.083, "step": 4425 }, { "epoch": 0.884735513855226, "grad_norm": 2.03125, "learning_rate": 8.061820052545675e-06, "loss": 1.1178, "step": 4426 }, { "epoch": 0.8849354089103221, "grad_norm": 2.078125, "learning_rate": 8.060986874536691e-06, "loss": 1.0527, "step": 4427 }, { "epoch": 0.8851353039654182, "grad_norm": 2.359375, "learning_rate": 8.060153560559e-06, "loss": 1.0529, "step": 4428 }, { "epoch": 0.8853351990205143, "grad_norm": 2.09375, "learning_rate": 8.059320110649614e-06, "loss": 0.948, "step": 4429 }, { "epoch": 0.8855350940756103, "grad_norm": 2.078125, "learning_rate": 8.058486524845554e-06, "loss": 1.0805, "step": 4430 }, { "epoch": 0.8857349891307064, "grad_norm": 2.03125, "learning_rate": 8.05765280318385e-06, "loss": 1.0965, "step": 4431 }, { "epoch": 0.8859348841858025, "grad_norm": 2.109375, "learning_rate": 8.056818945701537e-06, "loss": 1.0667, "step": 4432 }, { "epoch": 0.8861347792408986, "grad_norm": 2.03125, "learning_rate": 8.05598495243565e-06, "loss": 1.0619, "step": 4433 }, { "epoch": 0.8863346742959946, "grad_norm": 2.0, "learning_rate": 8.055150823423239e-06, "loss": 0.8879, "step": 4434 }, { "epoch": 0.8865345693510907, "grad_norm": 2.140625, "learning_rate": 8.054316558701355e-06, "loss": 1.0322, "step": 4435 }, { "epoch": 0.8867344644061868, "grad_norm": 2.1875, "learning_rate": 8.053482158307055e-06, "loss": 1.1139, "step": 4436 }, { "epoch": 0.8869343594612829, "grad_norm": 1.9921875, "learning_rate": 8.052647622277405e-06, "loss": 1.0347, "step": 4437 }, { "epoch": 0.887134254516379, "grad_norm": 2.125, "learning_rate": 8.051812950649474e-06, "loss": 1.0791, "step": 4438 }, { "epoch": 0.8873341495714749, "grad_norm": 2.078125, "learning_rate": 8.050978143460335e-06, "loss": 0.9837, "step": 4439 }, { "epoch": 0.887534044626571, "grad_norm": 2.015625, "learning_rate": 8.050143200747073e-06, "loss": 0.9943, "step": 4440 }, { "epoch": 0.8877339396816671, "grad_norm": 2.28125, "learning_rate": 8.049308122546776e-06, "loss": 1.16, "step": 4441 }, { "epoch": 0.8879338347367632, "grad_norm": 2.015625, "learning_rate": 8.04847290889654e-06, "loss": 1.0046, "step": 4442 }, { "epoch": 0.8881337297918592, "grad_norm": 2.046875, "learning_rate": 8.047637559833464e-06, "loss": 1.0639, "step": 4443 }, { "epoch": 0.8883336248469553, "grad_norm": 2.03125, "learning_rate": 8.04680207539465e-06, "loss": 1.0163, "step": 4444 }, { "epoch": 0.8885335199020514, "grad_norm": 1.984375, "learning_rate": 8.045966455617214e-06, "loss": 1.0285, "step": 4445 }, { "epoch": 0.8887334149571475, "grad_norm": 2.0625, "learning_rate": 8.045130700538273e-06, "loss": 1.0341, "step": 4446 }, { "epoch": 0.8889333100122436, "grad_norm": 2.1875, "learning_rate": 8.044294810194953e-06, "loss": 0.9628, "step": 4447 }, { "epoch": 0.8891332050673396, "grad_norm": 2.21875, "learning_rate": 8.04345878462438e-06, "loss": 1.0712, "step": 4448 }, { "epoch": 0.8893331001224357, "grad_norm": 2.046875, "learning_rate": 8.042622623863694e-06, "loss": 1.027, "step": 4449 }, { "epoch": 0.8895329951775318, "grad_norm": 2.0625, "learning_rate": 8.041786327950037e-06, "loss": 1.0139, "step": 4450 }, { "epoch": 0.8897328902326279, "grad_norm": 2.125, "learning_rate": 8.040949896920556e-06, "loss": 1.0845, "step": 4451 }, { "epoch": 0.8899327852877239, "grad_norm": 2.171875, "learning_rate": 8.040113330812404e-06, "loss": 1.0785, "step": 4452 }, { "epoch": 0.89013268034282, "grad_norm": 2.09375, "learning_rate": 8.039276629662745e-06, "loss": 1.0703, "step": 4453 }, { "epoch": 0.8903325753979161, "grad_norm": 2.078125, "learning_rate": 8.038439793508741e-06, "loss": 1.0732, "step": 4454 }, { "epoch": 0.8905324704530122, "grad_norm": 2.1875, "learning_rate": 8.037602822387566e-06, "loss": 1.0146, "step": 4455 }, { "epoch": 0.8907323655081082, "grad_norm": 2.0625, "learning_rate": 8.036765716336399e-06, "loss": 0.9116, "step": 4456 }, { "epoch": 0.8909322605632043, "grad_norm": 2.03125, "learning_rate": 8.035928475392422e-06, "loss": 0.9984, "step": 4457 }, { "epoch": 0.8911321556183004, "grad_norm": 2.03125, "learning_rate": 8.035091099592827e-06, "loss": 1.0102, "step": 4458 }, { "epoch": 0.8913320506733965, "grad_norm": 2.03125, "learning_rate": 8.034253588974809e-06, "loss": 0.8845, "step": 4459 }, { "epoch": 0.8915319457284926, "grad_norm": 2.390625, "learning_rate": 8.033415943575572e-06, "loss": 0.9525, "step": 4460 }, { "epoch": 0.8917318407835886, "grad_norm": 2.078125, "learning_rate": 8.03257816343232e-06, "loss": 1.005, "step": 4461 }, { "epoch": 0.8919317358386847, "grad_norm": 2.109375, "learning_rate": 8.031740248582272e-06, "loss": 1.0035, "step": 4462 }, { "epoch": 0.8921316308937808, "grad_norm": 2.125, "learning_rate": 8.030902199062646e-06, "loss": 1.0726, "step": 4463 }, { "epoch": 0.8923315259488769, "grad_norm": 2.0, "learning_rate": 8.030064014910668e-06, "loss": 1.0157, "step": 4464 }, { "epoch": 0.8925314210039729, "grad_norm": 2.0625, "learning_rate": 8.02922569616357e-06, "loss": 1.0501, "step": 4465 }, { "epoch": 0.892731316059069, "grad_norm": 2.1875, "learning_rate": 8.028387242858588e-06, "loss": 0.9781, "step": 4466 }, { "epoch": 0.8929312111141651, "grad_norm": 2.09375, "learning_rate": 8.02754865503297e-06, "loss": 1.0365, "step": 4467 }, { "epoch": 0.8931311061692612, "grad_norm": 2.0625, "learning_rate": 8.026709932723964e-06, "loss": 0.9437, "step": 4468 }, { "epoch": 0.8933310012243572, "grad_norm": 1.9921875, "learning_rate": 8.025871075968828e-06, "loss": 0.9995, "step": 4469 }, { "epoch": 0.8935308962794533, "grad_norm": 2.0625, "learning_rate": 8.02503208480482e-06, "loss": 1.0265, "step": 4470 }, { "epoch": 0.8937307913345494, "grad_norm": 2.203125, "learning_rate": 8.024192959269209e-06, "loss": 1.1485, "step": 4471 }, { "epoch": 0.8939306863896455, "grad_norm": 1.96875, "learning_rate": 8.02335369939927e-06, "loss": 1.0292, "step": 4472 }, { "epoch": 0.8941305814447416, "grad_norm": 2.03125, "learning_rate": 8.022514305232283e-06, "loss": 0.9181, "step": 4473 }, { "epoch": 0.8943304764998375, "grad_norm": 2.140625, "learning_rate": 8.021674776805534e-06, "loss": 1.0503, "step": 4474 }, { "epoch": 0.8945303715549336, "grad_norm": 2.1875, "learning_rate": 8.020835114156313e-06, "loss": 1.0704, "step": 4475 }, { "epoch": 0.8947302666100297, "grad_norm": 2.078125, "learning_rate": 8.019995317321921e-06, "loss": 1.0324, "step": 4476 }, { "epoch": 0.8949301616651258, "grad_norm": 2.078125, "learning_rate": 8.019155386339657e-06, "loss": 0.9435, "step": 4477 }, { "epoch": 0.8951300567202218, "grad_norm": 2.203125, "learning_rate": 8.018315321246834e-06, "loss": 1.0742, "step": 4478 }, { "epoch": 0.8953299517753179, "grad_norm": 2.140625, "learning_rate": 8.017475122080767e-06, "loss": 1.0427, "step": 4479 }, { "epoch": 0.895529846830414, "grad_norm": 2.03125, "learning_rate": 8.016634788878779e-06, "loss": 1.0984, "step": 4480 }, { "epoch": 0.8957297418855101, "grad_norm": 2.046875, "learning_rate": 8.015794321678194e-06, "loss": 0.9826, "step": 4481 }, { "epoch": 0.8959296369406062, "grad_norm": 2.046875, "learning_rate": 8.014953720516347e-06, "loss": 1.0419, "step": 4482 }, { "epoch": 0.8961295319957022, "grad_norm": 2.0, "learning_rate": 8.014112985430578e-06, "loss": 0.9784, "step": 4483 }, { "epoch": 0.8963294270507983, "grad_norm": 2.03125, "learning_rate": 8.013272116458233e-06, "loss": 1.0174, "step": 4484 }, { "epoch": 0.8965293221058944, "grad_norm": 2.09375, "learning_rate": 8.012431113636662e-06, "loss": 1.0536, "step": 4485 }, { "epoch": 0.8967292171609905, "grad_norm": 2.171875, "learning_rate": 8.011589977003222e-06, "loss": 1.0687, "step": 4486 }, { "epoch": 0.8969291122160865, "grad_norm": 2.1875, "learning_rate": 8.01074870659528e-06, "loss": 1.0259, "step": 4487 }, { "epoch": 0.8971290072711826, "grad_norm": 2.140625, "learning_rate": 8.009907302450199e-06, "loss": 1.0712, "step": 4488 }, { "epoch": 0.8973289023262787, "grad_norm": 2.0, "learning_rate": 8.009065764605358e-06, "loss": 1.0412, "step": 4489 }, { "epoch": 0.8975287973813748, "grad_norm": 2.0625, "learning_rate": 8.008224093098136e-06, "loss": 1.1454, "step": 4490 }, { "epoch": 0.8977286924364708, "grad_norm": 2.3125, "learning_rate": 8.007382287965921e-06, "loss": 1.054, "step": 4491 }, { "epoch": 0.8979285874915669, "grad_norm": 2.296875, "learning_rate": 8.006540349246107e-06, "loss": 0.8789, "step": 4492 }, { "epoch": 0.898128482546663, "grad_norm": 2.109375, "learning_rate": 8.005698276976092e-06, "loss": 1.0072, "step": 4493 }, { "epoch": 0.8983283776017591, "grad_norm": 2.125, "learning_rate": 8.004856071193278e-06, "loss": 1.0211, "step": 4494 }, { "epoch": 0.8985282726568552, "grad_norm": 2.21875, "learning_rate": 8.004013731935082e-06, "loss": 1.0182, "step": 4495 }, { "epoch": 0.8987281677119512, "grad_norm": 2.140625, "learning_rate": 8.003171259238915e-06, "loss": 1.0249, "step": 4496 }, { "epoch": 0.8989280627670473, "grad_norm": 2.046875, "learning_rate": 8.002328653142203e-06, "loss": 1.0605, "step": 4497 }, { "epoch": 0.8991279578221434, "grad_norm": 2.0625, "learning_rate": 8.00148591368237e-06, "loss": 0.9666, "step": 4498 }, { "epoch": 0.8993278528772395, "grad_norm": 2.0625, "learning_rate": 8.000643040896855e-06, "loss": 1.0767, "step": 4499 }, { "epoch": 0.8995277479323355, "grad_norm": 2.078125, "learning_rate": 7.999800034823097e-06, "loss": 1.0541, "step": 4500 }, { "epoch": 0.8995277479323355, "eval_loss": 0.9116131067276001, "eval_runtime": 594.896, "eval_samples_per_second": 3.594, "eval_steps_per_second": 3.594, "step": 4500 }, { "epoch": 0.8997276429874316, "grad_norm": 2.125, "learning_rate": 7.998956895498542e-06, "loss": 1.1048, "step": 4501 }, { "epoch": 0.8999275380425277, "grad_norm": 1.9765625, "learning_rate": 7.99811362296064e-06, "loss": 0.9938, "step": 4502 }, { "epoch": 0.9001274330976238, "grad_norm": 2.0625, "learning_rate": 7.997270217246853e-06, "loss": 1.1029, "step": 4503 }, { "epoch": 0.9003273281527199, "grad_norm": 2.171875, "learning_rate": 7.996426678394642e-06, "loss": 1.1396, "step": 4504 }, { "epoch": 0.9005272232078159, "grad_norm": 1.9453125, "learning_rate": 7.99558300644148e-06, "loss": 0.9531, "step": 4505 }, { "epoch": 0.900727118262912, "grad_norm": 2.03125, "learning_rate": 7.994739201424836e-06, "loss": 1.0054, "step": 4506 }, { "epoch": 0.9009270133180081, "grad_norm": 2.203125, "learning_rate": 7.993895263382201e-06, "loss": 1.0793, "step": 4507 }, { "epoch": 0.9011269083731042, "grad_norm": 2.09375, "learning_rate": 7.993051192351056e-06, "loss": 1.0196, "step": 4508 }, { "epoch": 0.9013268034282002, "grad_norm": 1.984375, "learning_rate": 7.992206988368898e-06, "loss": 0.954, "step": 4509 }, { "epoch": 0.9015266984832963, "grad_norm": 2.0, "learning_rate": 7.991362651473225e-06, "loss": 1.0146, "step": 4510 }, { "epoch": 0.9017265935383924, "grad_norm": 2.109375, "learning_rate": 7.990518181701542e-06, "loss": 1.0859, "step": 4511 }, { "epoch": 0.9019264885934885, "grad_norm": 2.109375, "learning_rate": 7.989673579091361e-06, "loss": 1.0375, "step": 4512 }, { "epoch": 0.9021263836485844, "grad_norm": 2.171875, "learning_rate": 7.988828843680198e-06, "loss": 1.0987, "step": 4513 }, { "epoch": 0.9023262787036805, "grad_norm": 2.03125, "learning_rate": 7.987983975505579e-06, "loss": 0.984, "step": 4514 }, { "epoch": 0.9025261737587766, "grad_norm": 2.15625, "learning_rate": 7.98713897460503e-06, "loss": 1.049, "step": 4515 }, { "epoch": 0.9027260688138727, "grad_norm": 2.0625, "learning_rate": 7.986293841016087e-06, "loss": 1.0108, "step": 4516 }, { "epoch": 0.9029259638689688, "grad_norm": 2.046875, "learning_rate": 7.98544857477629e-06, "loss": 1.0748, "step": 4517 }, { "epoch": 0.9031258589240648, "grad_norm": 2.078125, "learning_rate": 7.984603175923186e-06, "loss": 1.043, "step": 4518 }, { "epoch": 0.9033257539791609, "grad_norm": 2.203125, "learning_rate": 7.983757644494327e-06, "loss": 1.0418, "step": 4519 }, { "epoch": 0.903525649034257, "grad_norm": 2.09375, "learning_rate": 7.982911980527276e-06, "loss": 1.1628, "step": 4520 }, { "epoch": 0.9037255440893531, "grad_norm": 1.9140625, "learning_rate": 7.98206618405959e-06, "loss": 0.9724, "step": 4521 }, { "epoch": 0.9039254391444491, "grad_norm": 2.078125, "learning_rate": 7.981220255128842e-06, "loss": 1.0297, "step": 4522 }, { "epoch": 0.9041253341995452, "grad_norm": 2.078125, "learning_rate": 7.98037419377261e-06, "loss": 0.9934, "step": 4523 }, { "epoch": 0.9043252292546413, "grad_norm": 2.171875, "learning_rate": 7.979528000028474e-06, "loss": 1.0693, "step": 4524 }, { "epoch": 0.9045251243097374, "grad_norm": 2.109375, "learning_rate": 7.978681673934023e-06, "loss": 1.015, "step": 4525 }, { "epoch": 0.9047250193648335, "grad_norm": 1.96875, "learning_rate": 7.97783521552685e-06, "loss": 0.9598, "step": 4526 }, { "epoch": 0.9049249144199295, "grad_norm": 2.015625, "learning_rate": 7.976988624844556e-06, "loss": 1.0851, "step": 4527 }, { "epoch": 0.9051248094750256, "grad_norm": 1.9921875, "learning_rate": 7.976141901924743e-06, "loss": 0.9594, "step": 4528 }, { "epoch": 0.9053247045301217, "grad_norm": 2.046875, "learning_rate": 7.975295046805026e-06, "loss": 0.9813, "step": 4529 }, { "epoch": 0.9055245995852178, "grad_norm": 2.0625, "learning_rate": 7.974448059523018e-06, "loss": 0.9971, "step": 4530 }, { "epoch": 0.9057244946403138, "grad_norm": 2.1875, "learning_rate": 7.97360094011635e-06, "loss": 1.1101, "step": 4531 }, { "epoch": 0.9059243896954099, "grad_norm": 2.015625, "learning_rate": 7.972753688622644e-06, "loss": 1.0363, "step": 4532 }, { "epoch": 0.906124284750506, "grad_norm": 2.109375, "learning_rate": 7.971906305079535e-06, "loss": 1.0802, "step": 4533 }, { "epoch": 0.9063241798056021, "grad_norm": 2.015625, "learning_rate": 7.971058789524666e-06, "loss": 1.0146, "step": 4534 }, { "epoch": 0.9065240748606981, "grad_norm": 2.0, "learning_rate": 7.970211141995682e-06, "loss": 0.9591, "step": 4535 }, { "epoch": 0.9067239699157942, "grad_norm": 2.109375, "learning_rate": 7.969363362530238e-06, "loss": 1.0927, "step": 4536 }, { "epoch": 0.9069238649708903, "grad_norm": 2.0, "learning_rate": 7.96851545116599e-06, "loss": 0.9781, "step": 4537 }, { "epoch": 0.9071237600259864, "grad_norm": 2.0, "learning_rate": 7.967667407940603e-06, "loss": 0.9595, "step": 4538 }, { "epoch": 0.9073236550810825, "grad_norm": 2.125, "learning_rate": 7.966819232891744e-06, "loss": 1.0559, "step": 4539 }, { "epoch": 0.9075235501361785, "grad_norm": 2.125, "learning_rate": 7.965970926057095e-06, "loss": 1.0765, "step": 4540 }, { "epoch": 0.9077234451912746, "grad_norm": 2.1875, "learning_rate": 7.965122487474333e-06, "loss": 1.1197, "step": 4541 }, { "epoch": 0.9079233402463707, "grad_norm": 2.171875, "learning_rate": 7.964273917181147e-06, "loss": 1.0798, "step": 4542 }, { "epoch": 0.9081232353014668, "grad_norm": 2.28125, "learning_rate": 7.96342521521523e-06, "loss": 0.9719, "step": 4543 }, { "epoch": 0.9083231303565628, "grad_norm": 2.140625, "learning_rate": 7.962576381614282e-06, "loss": 1.0806, "step": 4544 }, { "epoch": 0.9085230254116589, "grad_norm": 2.078125, "learning_rate": 7.961727416416007e-06, "loss": 1.099, "step": 4545 }, { "epoch": 0.908722920466755, "grad_norm": 2.078125, "learning_rate": 7.960878319658117e-06, "loss": 1.1292, "step": 4546 }, { "epoch": 0.9089228155218511, "grad_norm": 2.046875, "learning_rate": 7.960029091378327e-06, "loss": 0.9665, "step": 4547 }, { "epoch": 0.9091227105769472, "grad_norm": 2.1875, "learning_rate": 7.959179731614363e-06, "loss": 1.1101, "step": 4548 }, { "epoch": 0.9093226056320431, "grad_norm": 2.0625, "learning_rate": 7.95833024040395e-06, "loss": 1.0884, "step": 4549 }, { "epoch": 0.9095225006871392, "grad_norm": 2.015625, "learning_rate": 7.957480617784823e-06, "loss": 0.9976, "step": 4550 }, { "epoch": 0.9097223957422353, "grad_norm": 2.140625, "learning_rate": 7.956630863794723e-06, "loss": 1.0861, "step": 4551 }, { "epoch": 0.9099222907973314, "grad_norm": 2.03125, "learning_rate": 7.955780978471396e-06, "loss": 1.0039, "step": 4552 }, { "epoch": 0.9101221858524274, "grad_norm": 2.15625, "learning_rate": 7.954930961852594e-06, "loss": 1.0688, "step": 4553 }, { "epoch": 0.9103220809075235, "grad_norm": 2.15625, "learning_rate": 7.954080813976075e-06, "loss": 1.0032, "step": 4554 }, { "epoch": 0.9105219759626196, "grad_norm": 2.140625, "learning_rate": 7.953230534879601e-06, "loss": 1.0197, "step": 4555 }, { "epoch": 0.9107218710177157, "grad_norm": 2.140625, "learning_rate": 7.952380124600943e-06, "loss": 0.9938, "step": 4556 }, { "epoch": 0.9109217660728117, "grad_norm": 2.09375, "learning_rate": 7.951529583177874e-06, "loss": 1.0392, "step": 4557 }, { "epoch": 0.9111216611279078, "grad_norm": 2.0, "learning_rate": 7.950678910648176e-06, "loss": 1.0021, "step": 4558 }, { "epoch": 0.9113215561830039, "grad_norm": 2.046875, "learning_rate": 7.949828107049638e-06, "loss": 1.0592, "step": 4559 }, { "epoch": 0.9115214512381, "grad_norm": 2.046875, "learning_rate": 7.948977172420046e-06, "loss": 0.975, "step": 4560 }, { "epoch": 0.9117213462931961, "grad_norm": 2.0, "learning_rate": 7.948126106797208e-06, "loss": 1.0879, "step": 4561 }, { "epoch": 0.9119212413482921, "grad_norm": 2.015625, "learning_rate": 7.94727491021892e-06, "loss": 0.9099, "step": 4562 }, { "epoch": 0.9121211364033882, "grad_norm": 2.34375, "learning_rate": 7.946423582722998e-06, "loss": 1.1033, "step": 4563 }, { "epoch": 0.9123210314584843, "grad_norm": 2.03125, "learning_rate": 7.945572124347253e-06, "loss": 0.9591, "step": 4564 }, { "epoch": 0.9125209265135804, "grad_norm": 2.0, "learning_rate": 7.944720535129509e-06, "loss": 1.0181, "step": 4565 }, { "epoch": 0.9127208215686764, "grad_norm": 2.140625, "learning_rate": 7.943868815107594e-06, "loss": 1.0219, "step": 4566 }, { "epoch": 0.9129207166237725, "grad_norm": 2.015625, "learning_rate": 7.94301696431934e-06, "loss": 1.0529, "step": 4567 }, { "epoch": 0.9131206116788686, "grad_norm": 2.28125, "learning_rate": 7.942164982802588e-06, "loss": 1.0201, "step": 4568 }, { "epoch": 0.9133205067339647, "grad_norm": 2.140625, "learning_rate": 7.941312870595179e-06, "loss": 0.9623, "step": 4569 }, { "epoch": 0.9135204017890608, "grad_norm": 2.078125, "learning_rate": 7.940460627734969e-06, "loss": 1.0634, "step": 4570 }, { "epoch": 0.9137202968441568, "grad_norm": 2.140625, "learning_rate": 7.939608254259812e-06, "loss": 1.0753, "step": 4571 }, { "epoch": 0.9139201918992529, "grad_norm": 2.09375, "learning_rate": 7.938755750207569e-06, "loss": 1.0382, "step": 4572 }, { "epoch": 0.914120086954349, "grad_norm": 2.171875, "learning_rate": 7.93790311561611e-06, "loss": 1.0883, "step": 4573 }, { "epoch": 0.9143199820094451, "grad_norm": 2.09375, "learning_rate": 7.937050350523308e-06, "loss": 1.0178, "step": 4574 }, { "epoch": 0.9145198770645411, "grad_norm": 2.296875, "learning_rate": 7.936197454967043e-06, "loss": 0.9342, "step": 4575 }, { "epoch": 0.9147197721196372, "grad_norm": 2.125, "learning_rate": 7.935344428985202e-06, "loss": 1.1515, "step": 4576 }, { "epoch": 0.9149196671747333, "grad_norm": 2.046875, "learning_rate": 7.934491272615674e-06, "loss": 1.0595, "step": 4577 }, { "epoch": 0.9151195622298294, "grad_norm": 2.046875, "learning_rate": 7.933637985896356e-06, "loss": 1.0235, "step": 4578 }, { "epoch": 0.9153194572849254, "grad_norm": 2.0625, "learning_rate": 7.932784568865155e-06, "loss": 0.9725, "step": 4579 }, { "epoch": 0.9155193523400215, "grad_norm": 2.09375, "learning_rate": 7.931931021559973e-06, "loss": 1.1227, "step": 4580 }, { "epoch": 0.9157192473951176, "grad_norm": 2.0, "learning_rate": 7.931077344018731e-06, "loss": 1.0332, "step": 4581 }, { "epoch": 0.9159191424502137, "grad_norm": 2.046875, "learning_rate": 7.930223536279344e-06, "loss": 1.025, "step": 4582 }, { "epoch": 0.9161190375053098, "grad_norm": 2.09375, "learning_rate": 7.929369598379743e-06, "loss": 1.0924, "step": 4583 }, { "epoch": 0.9163189325604058, "grad_norm": 2.234375, "learning_rate": 7.928515530357857e-06, "loss": 1.0261, "step": 4584 }, { "epoch": 0.9165188276155019, "grad_norm": 2.09375, "learning_rate": 7.927661332251622e-06, "loss": 1.0325, "step": 4585 }, { "epoch": 0.916718722670598, "grad_norm": 2.09375, "learning_rate": 7.926807004098985e-06, "loss": 0.9855, "step": 4586 }, { "epoch": 0.916918617725694, "grad_norm": 1.9921875, "learning_rate": 7.925952545937892e-06, "loss": 1.0322, "step": 4587 }, { "epoch": 0.91711851278079, "grad_norm": 2.015625, "learning_rate": 7.9250979578063e-06, "loss": 0.9784, "step": 4588 }, { "epoch": 0.9173184078358861, "grad_norm": 2.15625, "learning_rate": 7.924243239742171e-06, "loss": 1.0395, "step": 4589 }, { "epoch": 0.9175183028909822, "grad_norm": 2.125, "learning_rate": 7.923388391783467e-06, "loss": 1.1187, "step": 4590 }, { "epoch": 0.9177181979460783, "grad_norm": 1.9609375, "learning_rate": 7.922533413968164e-06, "loss": 0.9561, "step": 4591 }, { "epoch": 0.9179180930011743, "grad_norm": 2.09375, "learning_rate": 7.92167830633424e-06, "loss": 1.0588, "step": 4592 }, { "epoch": 0.9181179880562704, "grad_norm": 1.9609375, "learning_rate": 7.920823068919676e-06, "loss": 0.9722, "step": 4593 }, { "epoch": 0.9183178831113665, "grad_norm": 2.109375, "learning_rate": 7.919967701762464e-06, "loss": 1.0818, "step": 4594 }, { "epoch": 0.9185177781664626, "grad_norm": 2.140625, "learning_rate": 7.919112204900597e-06, "loss": 0.9887, "step": 4595 }, { "epoch": 0.9187176732215587, "grad_norm": 1.9296875, "learning_rate": 7.918256578372079e-06, "loss": 1.0106, "step": 4596 }, { "epoch": 0.9189175682766547, "grad_norm": 2.046875, "learning_rate": 7.917400822214916e-06, "loss": 0.9737, "step": 4597 }, { "epoch": 0.9191174633317508, "grad_norm": 2.03125, "learning_rate": 7.916544936467119e-06, "loss": 1.1294, "step": 4598 }, { "epoch": 0.9193173583868469, "grad_norm": 2.15625, "learning_rate": 7.915688921166709e-06, "loss": 1.0798, "step": 4599 }, { "epoch": 0.919517253441943, "grad_norm": 2.203125, "learning_rate": 7.914832776351707e-06, "loss": 1.0665, "step": 4600 }, { "epoch": 0.919717148497039, "grad_norm": 2.03125, "learning_rate": 7.913976502060143e-06, "loss": 0.9593, "step": 4601 }, { "epoch": 0.9199170435521351, "grad_norm": 2.09375, "learning_rate": 7.913120098330056e-06, "loss": 1.0382, "step": 4602 }, { "epoch": 0.9201169386072312, "grad_norm": 1.953125, "learning_rate": 7.912263565199486e-06, "loss": 0.9877, "step": 4603 }, { "epoch": 0.9203168336623273, "grad_norm": 2.0625, "learning_rate": 7.911406902706478e-06, "loss": 0.9745, "step": 4604 }, { "epoch": 0.9205167287174234, "grad_norm": 2.0625, "learning_rate": 7.910550110889086e-06, "loss": 0.9713, "step": 4605 }, { "epoch": 0.9207166237725194, "grad_norm": 2.046875, "learning_rate": 7.909693189785371e-06, "loss": 1.0932, "step": 4606 }, { "epoch": 0.9209165188276155, "grad_norm": 1.9921875, "learning_rate": 7.908836139433393e-06, "loss": 1.0144, "step": 4607 }, { "epoch": 0.9211164138827116, "grad_norm": 1.984375, "learning_rate": 7.907978959871228e-06, "loss": 0.9774, "step": 4608 }, { "epoch": 0.9213163089378077, "grad_norm": 2.1875, "learning_rate": 7.907121651136944e-06, "loss": 1.0085, "step": 4609 }, { "epoch": 0.9215162039929037, "grad_norm": 2.125, "learning_rate": 7.90626421326863e-06, "loss": 1.0132, "step": 4610 }, { "epoch": 0.9217160990479998, "grad_norm": 2.09375, "learning_rate": 7.905406646304367e-06, "loss": 1.053, "step": 4611 }, { "epoch": 0.9219159941030959, "grad_norm": 2.0625, "learning_rate": 7.904548950282254e-06, "loss": 1.0387, "step": 4612 }, { "epoch": 0.922115889158192, "grad_norm": 2.09375, "learning_rate": 7.903691125240385e-06, "loss": 1.0253, "step": 4613 }, { "epoch": 0.922315784213288, "grad_norm": 2.328125, "learning_rate": 7.902833171216867e-06, "loss": 1.0571, "step": 4614 }, { "epoch": 0.9225156792683841, "grad_norm": 2.015625, "learning_rate": 7.901975088249808e-06, "loss": 1.0589, "step": 4615 }, { "epoch": 0.9227155743234802, "grad_norm": 2.125, "learning_rate": 7.901116876377326e-06, "loss": 1.0856, "step": 4616 }, { "epoch": 0.9229154693785763, "grad_norm": 2.0625, "learning_rate": 7.900258535637544e-06, "loss": 1.1088, "step": 4617 }, { "epoch": 0.9231153644336724, "grad_norm": 2.046875, "learning_rate": 7.899400066068588e-06, "loss": 1.0745, "step": 4618 }, { "epoch": 0.9233152594887684, "grad_norm": 2.09375, "learning_rate": 7.898541467708588e-06, "loss": 0.9503, "step": 4619 }, { "epoch": 0.9235151545438645, "grad_norm": 2.046875, "learning_rate": 7.897682740595686e-06, "loss": 0.9778, "step": 4620 }, { "epoch": 0.9237150495989606, "grad_norm": 2.25, "learning_rate": 7.896823884768028e-06, "loss": 0.9761, "step": 4621 }, { "epoch": 0.9239149446540567, "grad_norm": 2.078125, "learning_rate": 7.895964900263762e-06, "loss": 1.0342, "step": 4622 }, { "epoch": 0.9241148397091526, "grad_norm": 2.09375, "learning_rate": 7.895105787121045e-06, "loss": 1.0463, "step": 4623 }, { "epoch": 0.9243147347642487, "grad_norm": 1.9609375, "learning_rate": 7.894246545378037e-06, "loss": 1.0387, "step": 4624 }, { "epoch": 0.9245146298193448, "grad_norm": 2.015625, "learning_rate": 7.893387175072907e-06, "loss": 1.055, "step": 4625 }, { "epoch": 0.924714524874441, "grad_norm": 2.015625, "learning_rate": 7.892527676243825e-06, "loss": 0.9812, "step": 4626 }, { "epoch": 0.924914419929537, "grad_norm": 2.078125, "learning_rate": 7.891668048928975e-06, "loss": 1.0316, "step": 4627 }, { "epoch": 0.925114314984633, "grad_norm": 2.0, "learning_rate": 7.89080829316654e-06, "loss": 0.9835, "step": 4628 }, { "epoch": 0.9253142100397291, "grad_norm": 2.0625, "learning_rate": 7.889948408994707e-06, "loss": 1.0214, "step": 4629 }, { "epoch": 0.9255141050948252, "grad_norm": 2.0, "learning_rate": 7.889088396451676e-06, "loss": 1.0421, "step": 4630 }, { "epoch": 0.9257140001499213, "grad_norm": 2.078125, "learning_rate": 7.888228255575648e-06, "loss": 0.9934, "step": 4631 }, { "epoch": 0.9259138952050173, "grad_norm": 2.015625, "learning_rate": 7.887367986404827e-06, "loss": 0.9687, "step": 4632 }, { "epoch": 0.9261137902601134, "grad_norm": 2.0625, "learning_rate": 7.88650758897743e-06, "loss": 1.1221, "step": 4633 }, { "epoch": 0.9263136853152095, "grad_norm": 2.09375, "learning_rate": 7.885647063331674e-06, "loss": 1.0123, "step": 4634 }, { "epoch": 0.9265135803703056, "grad_norm": 2.03125, "learning_rate": 7.884786409505782e-06, "loss": 0.9704, "step": 4635 }, { "epoch": 0.9267134754254016, "grad_norm": 2.078125, "learning_rate": 7.883925627537987e-06, "loss": 1.0636, "step": 4636 }, { "epoch": 0.9269133704804977, "grad_norm": 2.015625, "learning_rate": 7.883064717466524e-06, "loss": 1.0541, "step": 4637 }, { "epoch": 0.9271132655355938, "grad_norm": 2.015625, "learning_rate": 7.882203679329635e-06, "loss": 1.0097, "step": 4638 }, { "epoch": 0.9273131605906899, "grad_norm": 2.046875, "learning_rate": 7.881342513165567e-06, "loss": 1.0571, "step": 4639 }, { "epoch": 0.927513055645786, "grad_norm": 1.9765625, "learning_rate": 7.88048121901257e-06, "loss": 0.9967, "step": 4640 }, { "epoch": 0.927712950700882, "grad_norm": 2.078125, "learning_rate": 7.879619796908905e-06, "loss": 0.9924, "step": 4641 }, { "epoch": 0.9279128457559781, "grad_norm": 2.03125, "learning_rate": 7.878758246892836e-06, "loss": 0.9907, "step": 4642 }, { "epoch": 0.9281127408110742, "grad_norm": 2.234375, "learning_rate": 7.877896569002634e-06, "loss": 1.0302, "step": 4643 }, { "epoch": 0.9283126358661703, "grad_norm": 1.9765625, "learning_rate": 7.877034763276575e-06, "loss": 1.0495, "step": 4644 }, { "epoch": 0.9285125309212663, "grad_norm": 2.359375, "learning_rate": 7.876172829752937e-06, "loss": 1.1101, "step": 4645 }, { "epoch": 0.9287124259763624, "grad_norm": 2.140625, "learning_rate": 7.87531076847001e-06, "loss": 1.0882, "step": 4646 }, { "epoch": 0.9289123210314585, "grad_norm": 1.9765625, "learning_rate": 7.874448579466085e-06, "loss": 1.0235, "step": 4647 }, { "epoch": 0.9291122160865546, "grad_norm": 2.015625, "learning_rate": 7.873586262779462e-06, "loss": 1.0154, "step": 4648 }, { "epoch": 0.9293121111416507, "grad_norm": 2.078125, "learning_rate": 7.872723818448443e-06, "loss": 1.083, "step": 4649 }, { "epoch": 0.9295120061967467, "grad_norm": 2.109375, "learning_rate": 7.87186124651134e-06, "loss": 1.0428, "step": 4650 }, { "epoch": 0.9297119012518428, "grad_norm": 2.203125, "learning_rate": 7.870998547006467e-06, "loss": 1.0099, "step": 4651 }, { "epoch": 0.9299117963069389, "grad_norm": 2.109375, "learning_rate": 7.870135719972146e-06, "loss": 0.9716, "step": 4652 }, { "epoch": 0.930111691362035, "grad_norm": 2.03125, "learning_rate": 7.869272765446701e-06, "loss": 1.0331, "step": 4653 }, { "epoch": 0.930311586417131, "grad_norm": 1.9375, "learning_rate": 7.868409683468466e-06, "loss": 0.9574, "step": 4654 }, { "epoch": 0.9305114814722271, "grad_norm": 2.15625, "learning_rate": 7.867546474075782e-06, "loss": 1.1178, "step": 4655 }, { "epoch": 0.9307113765273232, "grad_norm": 2.046875, "learning_rate": 7.866683137306987e-06, "loss": 1.0142, "step": 4656 }, { "epoch": 0.9309112715824193, "grad_norm": 2.09375, "learning_rate": 7.865819673200435e-06, "loss": 0.9947, "step": 4657 }, { "epoch": 0.9311111666375153, "grad_norm": 2.046875, "learning_rate": 7.864956081794477e-06, "loss": 0.9642, "step": 4658 }, { "epoch": 0.9313110616926114, "grad_norm": 2.140625, "learning_rate": 7.864092363127478e-06, "loss": 1.0742, "step": 4659 }, { "epoch": 0.9315109567477075, "grad_norm": 1.921875, "learning_rate": 7.8632285172378e-06, "loss": 0.8985, "step": 4660 }, { "epoch": 0.9317108518028036, "grad_norm": 2.0, "learning_rate": 7.86236454416382e-06, "loss": 0.9271, "step": 4661 }, { "epoch": 0.9319107468578997, "grad_norm": 2.171875, "learning_rate": 7.86150044394391e-06, "loss": 1.0636, "step": 4662 }, { "epoch": 0.9321106419129956, "grad_norm": 2.109375, "learning_rate": 7.860636216616458e-06, "loss": 1.073, "step": 4663 }, { "epoch": 0.9323105369680917, "grad_norm": 2.109375, "learning_rate": 7.85977186221985e-06, "loss": 1.0081, "step": 4664 }, { "epoch": 0.9325104320231878, "grad_norm": 2.03125, "learning_rate": 7.85890738079248e-06, "loss": 1.0321, "step": 4665 }, { "epoch": 0.9327103270782839, "grad_norm": 2.015625, "learning_rate": 7.858042772372751e-06, "loss": 1.0386, "step": 4666 }, { "epoch": 0.9329102221333799, "grad_norm": 2.171875, "learning_rate": 7.857178036999066e-06, "loss": 0.9717, "step": 4667 }, { "epoch": 0.933110117188476, "grad_norm": 2.34375, "learning_rate": 7.85631317470984e-06, "loss": 1.0604, "step": 4668 }, { "epoch": 0.9333100122435721, "grad_norm": 2.078125, "learning_rate": 7.855448185543486e-06, "loss": 0.9417, "step": 4669 }, { "epoch": 0.9335099072986682, "grad_norm": 2.03125, "learning_rate": 7.854583069538431e-06, "loss": 1.0199, "step": 4670 }, { "epoch": 0.9337098023537643, "grad_norm": 2.1875, "learning_rate": 7.853717826733098e-06, "loss": 1.1401, "step": 4671 }, { "epoch": 0.9339096974088603, "grad_norm": 2.140625, "learning_rate": 7.852852457165924e-06, "loss": 1.0263, "step": 4672 }, { "epoch": 0.9341095924639564, "grad_norm": 1.9765625, "learning_rate": 7.851986960875351e-06, "loss": 0.9771, "step": 4673 }, { "epoch": 0.9343094875190525, "grad_norm": 2.15625, "learning_rate": 7.851121337899819e-06, "loss": 1.0479, "step": 4674 }, { "epoch": 0.9345093825741486, "grad_norm": 2.046875, "learning_rate": 7.850255588277784e-06, "loss": 0.9217, "step": 4675 }, { "epoch": 0.9347092776292446, "grad_norm": 2.03125, "learning_rate": 7.8493897120477e-06, "loss": 0.9912, "step": 4676 }, { "epoch": 0.9349091726843407, "grad_norm": 2.1875, "learning_rate": 7.848523709248026e-06, "loss": 1.0977, "step": 4677 }, { "epoch": 0.9351090677394368, "grad_norm": 1.9453125, "learning_rate": 7.847657579917237e-06, "loss": 0.9783, "step": 4678 }, { "epoch": 0.9353089627945329, "grad_norm": 2.03125, "learning_rate": 7.8467913240938e-06, "loss": 1.0672, "step": 4679 }, { "epoch": 0.9355088578496289, "grad_norm": 2.03125, "learning_rate": 7.845924941816198e-06, "loss": 0.968, "step": 4680 }, { "epoch": 0.935708752904725, "grad_norm": 2.0625, "learning_rate": 7.845058433122914e-06, "loss": 1.0014, "step": 4681 }, { "epoch": 0.9359086479598211, "grad_norm": 2.03125, "learning_rate": 7.844191798052438e-06, "loss": 1.1018, "step": 4682 }, { "epoch": 0.9361085430149172, "grad_norm": 2.03125, "learning_rate": 7.843325036643265e-06, "loss": 1.003, "step": 4683 }, { "epoch": 0.9363084380700133, "grad_norm": 2.140625, "learning_rate": 7.842458148933898e-06, "loss": 1.0554, "step": 4684 }, { "epoch": 0.9365083331251093, "grad_norm": 1.9140625, "learning_rate": 7.841591134962845e-06, "loss": 0.9854, "step": 4685 }, { "epoch": 0.9367082281802054, "grad_norm": 2.0625, "learning_rate": 7.840723994768616e-06, "loss": 0.9764, "step": 4686 }, { "epoch": 0.9369081232353015, "grad_norm": 2.15625, "learning_rate": 7.83985672838973e-06, "loss": 1.0718, "step": 4687 }, { "epoch": 0.9371080182903976, "grad_norm": 2.015625, "learning_rate": 7.838989335864714e-06, "loss": 0.9564, "step": 4688 }, { "epoch": 0.9373079133454936, "grad_norm": 2.140625, "learning_rate": 7.838121817232093e-06, "loss": 1.0227, "step": 4689 }, { "epoch": 0.9375078084005897, "grad_norm": 2.109375, "learning_rate": 7.837254172530404e-06, "loss": 1.0139, "step": 4690 }, { "epoch": 0.9377077034556858, "grad_norm": 2.046875, "learning_rate": 7.836386401798188e-06, "loss": 1.0004, "step": 4691 }, { "epoch": 0.9379075985107819, "grad_norm": 2.25, "learning_rate": 7.83551850507399e-06, "loss": 1.1292, "step": 4692 }, { "epoch": 0.9381074935658779, "grad_norm": 2.15625, "learning_rate": 7.834650482396364e-06, "loss": 1.0614, "step": 4693 }, { "epoch": 0.938307388620974, "grad_norm": 2.09375, "learning_rate": 7.833782333803865e-06, "loss": 1.076, "step": 4694 }, { "epoch": 0.9385072836760701, "grad_norm": 2.078125, "learning_rate": 7.83291405933506e-06, "loss": 0.9859, "step": 4695 }, { "epoch": 0.9387071787311662, "grad_norm": 2.046875, "learning_rate": 7.832045659028513e-06, "loss": 1.0348, "step": 4696 }, { "epoch": 0.9389070737862623, "grad_norm": 2.0625, "learning_rate": 7.831177132922801e-06, "loss": 1.0461, "step": 4697 }, { "epoch": 0.9391069688413582, "grad_norm": 2.109375, "learning_rate": 7.830308481056503e-06, "loss": 1.0873, "step": 4698 }, { "epoch": 0.9393068638964543, "grad_norm": 2.3125, "learning_rate": 7.829439703468203e-06, "loss": 0.9891, "step": 4699 }, { "epoch": 0.9395067589515504, "grad_norm": 2.015625, "learning_rate": 7.828570800196495e-06, "loss": 0.9237, "step": 4700 }, { "epoch": 0.9397066540066465, "grad_norm": 2.1875, "learning_rate": 7.827701771279976e-06, "loss": 1.0243, "step": 4701 }, { "epoch": 0.9399065490617425, "grad_norm": 2.359375, "learning_rate": 7.826832616757244e-06, "loss": 1.0948, "step": 4702 }, { "epoch": 0.9401064441168386, "grad_norm": 2.125, "learning_rate": 7.825963336666909e-06, "loss": 1.0013, "step": 4703 }, { "epoch": 0.9403063391719347, "grad_norm": 2.09375, "learning_rate": 7.825093931047585e-06, "loss": 1.0162, "step": 4704 }, { "epoch": 0.9405062342270308, "grad_norm": 2.09375, "learning_rate": 7.824224399937891e-06, "loss": 0.9432, "step": 4705 }, { "epoch": 0.9407061292821269, "grad_norm": 2.046875, "learning_rate": 7.82335474337645e-06, "loss": 1.0542, "step": 4706 }, { "epoch": 0.9409060243372229, "grad_norm": 2.09375, "learning_rate": 7.822484961401893e-06, "loss": 1.0713, "step": 4707 }, { "epoch": 0.941105919392319, "grad_norm": 2.125, "learning_rate": 7.821615054052856e-06, "loss": 0.9936, "step": 4708 }, { "epoch": 0.9413058144474151, "grad_norm": 2.015625, "learning_rate": 7.820745021367977e-06, "loss": 0.9805, "step": 4709 }, { "epoch": 0.9415057095025112, "grad_norm": 2.109375, "learning_rate": 7.819874863385908e-06, "loss": 1.0279, "step": 4710 }, { "epoch": 0.9417056045576072, "grad_norm": 2.203125, "learning_rate": 7.819004580145298e-06, "loss": 1.1394, "step": 4711 }, { "epoch": 0.9419054996127033, "grad_norm": 2.03125, "learning_rate": 7.818134171684805e-06, "loss": 0.972, "step": 4712 }, { "epoch": 0.9421053946677994, "grad_norm": 2.25, "learning_rate": 7.817263638043096e-06, "loss": 1.0544, "step": 4713 }, { "epoch": 0.9423052897228955, "grad_norm": 2.25, "learning_rate": 7.816392979258834e-06, "loss": 1.0741, "step": 4714 }, { "epoch": 0.9425051847779915, "grad_norm": 1.96875, "learning_rate": 7.815522195370697e-06, "loss": 0.9426, "step": 4715 }, { "epoch": 0.9427050798330876, "grad_norm": 1.953125, "learning_rate": 7.814651286417367e-06, "loss": 1.0312, "step": 4716 }, { "epoch": 0.9429049748881837, "grad_norm": 2.046875, "learning_rate": 7.813780252437526e-06, "loss": 1.0476, "step": 4717 }, { "epoch": 0.9431048699432798, "grad_norm": 2.125, "learning_rate": 7.812909093469868e-06, "loss": 1.0199, "step": 4718 }, { "epoch": 0.9433047649983759, "grad_norm": 2.203125, "learning_rate": 7.812037809553086e-06, "loss": 1.0714, "step": 4719 }, { "epoch": 0.9435046600534719, "grad_norm": 2.28125, "learning_rate": 7.811166400725884e-06, "loss": 1.1525, "step": 4720 }, { "epoch": 0.943704555108568, "grad_norm": 2.125, "learning_rate": 7.810294867026974e-06, "loss": 1.0328, "step": 4721 }, { "epoch": 0.9439044501636641, "grad_norm": 1.953125, "learning_rate": 7.809423208495064e-06, "loss": 0.9166, "step": 4722 }, { "epoch": 0.9441043452187602, "grad_norm": 1.9453125, "learning_rate": 7.808551425168878e-06, "loss": 1.0082, "step": 4723 }, { "epoch": 0.9443042402738562, "grad_norm": 2.03125, "learning_rate": 7.807679517087135e-06, "loss": 1.0474, "step": 4724 }, { "epoch": 0.9445041353289523, "grad_norm": 1.9921875, "learning_rate": 7.806807484288567e-06, "loss": 0.9546, "step": 4725 }, { "epoch": 0.9447040303840484, "grad_norm": 2.03125, "learning_rate": 7.805935326811913e-06, "loss": 1.0596, "step": 4726 }, { "epoch": 0.9449039254391445, "grad_norm": 2.03125, "learning_rate": 7.805063044695909e-06, "loss": 1.0443, "step": 4727 }, { "epoch": 0.9451038204942406, "grad_norm": 2.296875, "learning_rate": 7.804190637979305e-06, "loss": 0.9845, "step": 4728 }, { "epoch": 0.9453037155493366, "grad_norm": 2.140625, "learning_rate": 7.803318106700853e-06, "loss": 1.0737, "step": 4729 }, { "epoch": 0.9455036106044327, "grad_norm": 2.03125, "learning_rate": 7.80244545089931e-06, "loss": 1.0044, "step": 4730 }, { "epoch": 0.9457035056595288, "grad_norm": 2.03125, "learning_rate": 7.80157267061344e-06, "loss": 1.0396, "step": 4731 }, { "epoch": 0.9459034007146249, "grad_norm": 2.109375, "learning_rate": 7.800699765882009e-06, "loss": 1.052, "step": 4732 }, { "epoch": 0.9461032957697209, "grad_norm": 2.15625, "learning_rate": 7.799826736743796e-06, "loss": 1.0078, "step": 4733 }, { "epoch": 0.946303190824817, "grad_norm": 2.15625, "learning_rate": 7.798953583237578e-06, "loss": 1.017, "step": 4734 }, { "epoch": 0.946503085879913, "grad_norm": 2.09375, "learning_rate": 7.79808030540214e-06, "loss": 1.0441, "step": 4735 }, { "epoch": 0.9467029809350092, "grad_norm": 1.9921875, "learning_rate": 7.797206903276274e-06, "loss": 0.9931, "step": 4736 }, { "epoch": 0.9469028759901051, "grad_norm": 2.109375, "learning_rate": 7.796333376898774e-06, "loss": 0.9818, "step": 4737 }, { "epoch": 0.9471027710452012, "grad_norm": 2.03125, "learning_rate": 7.795459726308446e-06, "loss": 1.0046, "step": 4738 }, { "epoch": 0.9473026661002973, "grad_norm": 2.0, "learning_rate": 7.794585951544096e-06, "loss": 1.0416, "step": 4739 }, { "epoch": 0.9475025611553934, "grad_norm": 2.140625, "learning_rate": 7.793712052644535e-06, "loss": 1.0064, "step": 4740 }, { "epoch": 0.9477024562104895, "grad_norm": 2.0625, "learning_rate": 7.792838029648584e-06, "loss": 0.9996, "step": 4741 }, { "epoch": 0.9479023512655855, "grad_norm": 2.203125, "learning_rate": 7.791963882595066e-06, "loss": 1.101, "step": 4742 }, { "epoch": 0.9481022463206816, "grad_norm": 2.1875, "learning_rate": 7.791089611522811e-06, "loss": 1.0511, "step": 4743 }, { "epoch": 0.9483021413757777, "grad_norm": 2.15625, "learning_rate": 7.790215216470654e-06, "loss": 1.1489, "step": 4744 }, { "epoch": 0.9485020364308738, "grad_norm": 2.0, "learning_rate": 7.789340697477432e-06, "loss": 1.0727, "step": 4745 }, { "epoch": 0.9487019314859698, "grad_norm": 2.140625, "learning_rate": 7.788466054581997e-06, "loss": 1.0585, "step": 4746 }, { "epoch": 0.9489018265410659, "grad_norm": 2.078125, "learning_rate": 7.787591287823197e-06, "loss": 0.9812, "step": 4747 }, { "epoch": 0.949101721596162, "grad_norm": 2.078125, "learning_rate": 7.78671639723989e-06, "loss": 0.9217, "step": 4748 }, { "epoch": 0.9493016166512581, "grad_norm": 2.109375, "learning_rate": 7.785841382870938e-06, "loss": 1.0322, "step": 4749 }, { "epoch": 0.9495015117063542, "grad_norm": 2.046875, "learning_rate": 7.78496624475521e-06, "loss": 1.0459, "step": 4750 }, { "epoch": 0.9497014067614502, "grad_norm": 2.0625, "learning_rate": 7.784090982931577e-06, "loss": 1.0993, "step": 4751 }, { "epoch": 0.9499013018165463, "grad_norm": 2.0625, "learning_rate": 7.78321559743892e-06, "loss": 0.9609, "step": 4752 }, { "epoch": 0.9501011968716424, "grad_norm": 2.140625, "learning_rate": 7.782340088316125e-06, "loss": 1.0295, "step": 4753 }, { "epoch": 0.9503010919267385, "grad_norm": 2.046875, "learning_rate": 7.78146445560208e-06, "loss": 1.0306, "step": 4754 }, { "epoch": 0.9505009869818345, "grad_norm": 2.078125, "learning_rate": 7.78058869933568e-06, "loss": 0.9982, "step": 4755 }, { "epoch": 0.9507008820369306, "grad_norm": 2.171875, "learning_rate": 7.77971281955583e-06, "loss": 1.0818, "step": 4756 }, { "epoch": 0.9509007770920267, "grad_norm": 1.9765625, "learning_rate": 7.778836816301429e-06, "loss": 0.9594, "step": 4757 }, { "epoch": 0.9511006721471228, "grad_norm": 1.984375, "learning_rate": 7.777960689611396e-06, "loss": 1.085, "step": 4758 }, { "epoch": 0.9513005672022188, "grad_norm": 2.0, "learning_rate": 7.777084439524644e-06, "loss": 1.0684, "step": 4759 }, { "epoch": 0.9515004622573149, "grad_norm": 1.9609375, "learning_rate": 7.7762080660801e-06, "loss": 1.0481, "step": 4760 }, { "epoch": 0.951700357312411, "grad_norm": 2.03125, "learning_rate": 7.775331569316688e-06, "loss": 0.9095, "step": 4761 }, { "epoch": 0.9519002523675071, "grad_norm": 1.9609375, "learning_rate": 7.774454949273348e-06, "loss": 0.9383, "step": 4762 }, { "epoch": 0.9521001474226032, "grad_norm": 2.3125, "learning_rate": 7.773578205989013e-06, "loss": 0.9806, "step": 4763 }, { "epoch": 0.9523000424776992, "grad_norm": 2.15625, "learning_rate": 7.77270133950263e-06, "loss": 1.0464, "step": 4764 }, { "epoch": 0.9524999375327953, "grad_norm": 2.28125, "learning_rate": 7.77182434985315e-06, "loss": 1.2045, "step": 4765 }, { "epoch": 0.9526998325878914, "grad_norm": 2.15625, "learning_rate": 7.770947237079528e-06, "loss": 1.0449, "step": 4766 }, { "epoch": 0.9528997276429875, "grad_norm": 2.140625, "learning_rate": 7.770070001220727e-06, "loss": 1.0402, "step": 4767 }, { "epoch": 0.9530996226980835, "grad_norm": 2.234375, "learning_rate": 7.76919264231571e-06, "loss": 1.1098, "step": 4768 }, { "epoch": 0.9532995177531796, "grad_norm": 2.046875, "learning_rate": 7.768315160403453e-06, "loss": 1.0016, "step": 4769 }, { "epoch": 0.9534994128082757, "grad_norm": 2.109375, "learning_rate": 7.767437555522934e-06, "loss": 0.996, "step": 4770 }, { "epoch": 0.9536993078633718, "grad_norm": 2.046875, "learning_rate": 7.766559827713131e-06, "loss": 1.0092, "step": 4771 }, { "epoch": 0.9538992029184679, "grad_norm": 2.078125, "learning_rate": 7.765681977013037e-06, "loss": 1.0211, "step": 4772 }, { "epoch": 0.9540990979735638, "grad_norm": 2.15625, "learning_rate": 7.764804003461646e-06, "loss": 0.9899, "step": 4773 }, { "epoch": 0.95429899302866, "grad_norm": 2.140625, "learning_rate": 7.763925907097956e-06, "loss": 1.0812, "step": 4774 }, { "epoch": 0.954498888083756, "grad_norm": 2.0625, "learning_rate": 7.763047687960971e-06, "loss": 0.984, "step": 4775 }, { "epoch": 0.9546987831388521, "grad_norm": 2.125, "learning_rate": 7.762169346089705e-06, "loss": 1.0208, "step": 4776 }, { "epoch": 0.9548986781939481, "grad_norm": 1.96875, "learning_rate": 7.76129088152317e-06, "loss": 0.9695, "step": 4777 }, { "epoch": 0.9550985732490442, "grad_norm": 2.09375, "learning_rate": 7.760412294300392e-06, "loss": 1.0673, "step": 4778 }, { "epoch": 0.9552984683041403, "grad_norm": 2.0625, "learning_rate": 7.759533584460392e-06, "loss": 0.9945, "step": 4779 }, { "epoch": 0.9554983633592364, "grad_norm": 2.015625, "learning_rate": 7.758654752042205e-06, "loss": 1.0414, "step": 4780 }, { "epoch": 0.9556982584143324, "grad_norm": 2.0625, "learning_rate": 7.75777579708487e-06, "loss": 1.0086, "step": 4781 }, { "epoch": 0.9558981534694285, "grad_norm": 2.25, "learning_rate": 7.756896719627428e-06, "loss": 1.0577, "step": 4782 }, { "epoch": 0.9560980485245246, "grad_norm": 2.0625, "learning_rate": 7.756017519708926e-06, "loss": 1.0469, "step": 4783 }, { "epoch": 0.9562979435796207, "grad_norm": 2.03125, "learning_rate": 7.755138197368423e-06, "loss": 1.0342, "step": 4784 }, { "epoch": 0.9564978386347168, "grad_norm": 2.03125, "learning_rate": 7.754258752644974e-06, "loss": 0.9765, "step": 4785 }, { "epoch": 0.9566977336898128, "grad_norm": 2.0625, "learning_rate": 7.753379185577645e-06, "loss": 1.0252, "step": 4786 }, { "epoch": 0.9568976287449089, "grad_norm": 2.09375, "learning_rate": 7.752499496205509e-06, "loss": 0.975, "step": 4787 }, { "epoch": 0.957097523800005, "grad_norm": 2.03125, "learning_rate": 7.751619684567638e-06, "loss": 1.1005, "step": 4788 }, { "epoch": 0.9572974188551011, "grad_norm": 2.046875, "learning_rate": 7.750739750703114e-06, "loss": 1.0138, "step": 4789 }, { "epoch": 0.9574973139101971, "grad_norm": 2.171875, "learning_rate": 7.749859694651023e-06, "loss": 1.1132, "step": 4790 }, { "epoch": 0.9576972089652932, "grad_norm": 2.09375, "learning_rate": 7.74897951645046e-06, "loss": 1.0403, "step": 4791 }, { "epoch": 0.9578971040203893, "grad_norm": 1.9453125, "learning_rate": 7.74809921614052e-06, "loss": 0.9768, "step": 4792 }, { "epoch": 0.9580969990754854, "grad_norm": 2.140625, "learning_rate": 7.747218793760308e-06, "loss": 1.0403, "step": 4793 }, { "epoch": 0.9582968941305814, "grad_norm": 1.9296875, "learning_rate": 7.746338249348928e-06, "loss": 1.0055, "step": 4794 }, { "epoch": 0.9584967891856775, "grad_norm": 2.09375, "learning_rate": 7.745457582945497e-06, "loss": 1.0358, "step": 4795 }, { "epoch": 0.9586966842407736, "grad_norm": 2.0625, "learning_rate": 7.744576794589132e-06, "loss": 1.0853, "step": 4796 }, { "epoch": 0.9588965792958697, "grad_norm": 2.140625, "learning_rate": 7.743695884318961e-06, "loss": 1.0774, "step": 4797 }, { "epoch": 0.9590964743509658, "grad_norm": 2.140625, "learning_rate": 7.742814852174112e-06, "loss": 1.0557, "step": 4798 }, { "epoch": 0.9592963694060618, "grad_norm": 1.9765625, "learning_rate": 7.741933698193719e-06, "loss": 0.9809, "step": 4799 }, { "epoch": 0.9594962644611579, "grad_norm": 2.21875, "learning_rate": 7.741052422416923e-06, "loss": 0.9666, "step": 4800 }, { "epoch": 0.959696159516254, "grad_norm": 1.9921875, "learning_rate": 7.740171024882875e-06, "loss": 1.0036, "step": 4801 }, { "epoch": 0.9598960545713501, "grad_norm": 2.109375, "learning_rate": 7.73928950563072e-06, "loss": 1.0543, "step": 4802 }, { "epoch": 0.9600959496264461, "grad_norm": 2.046875, "learning_rate": 7.738407864699618e-06, "loss": 1.0295, "step": 4803 }, { "epoch": 0.9602958446815422, "grad_norm": 2.125, "learning_rate": 7.737526102128729e-06, "loss": 1.0324, "step": 4804 }, { "epoch": 0.9604957397366383, "grad_norm": 2.03125, "learning_rate": 7.736644217957226e-06, "loss": 0.9947, "step": 4805 }, { "epoch": 0.9606956347917344, "grad_norm": 2.359375, "learning_rate": 7.735762212224278e-06, "loss": 1.0961, "step": 4806 }, { "epoch": 0.9608955298468305, "grad_norm": 2.125, "learning_rate": 7.734880084969065e-06, "loss": 0.999, "step": 4807 }, { "epoch": 0.9610954249019265, "grad_norm": 2.234375, "learning_rate": 7.733997836230771e-06, "loss": 1.0628, "step": 4808 }, { "epoch": 0.9612953199570226, "grad_norm": 2.0, "learning_rate": 7.733115466048585e-06, "loss": 1.0227, "step": 4809 }, { "epoch": 0.9614952150121187, "grad_norm": 2.171875, "learning_rate": 7.732232974461701e-06, "loss": 1.0994, "step": 4810 }, { "epoch": 0.9616951100672148, "grad_norm": 2.140625, "learning_rate": 7.731350361509322e-06, "loss": 1.0277, "step": 4811 }, { "epoch": 0.9618950051223107, "grad_norm": 2.015625, "learning_rate": 7.730467627230648e-06, "loss": 1.0349, "step": 4812 }, { "epoch": 0.9620949001774068, "grad_norm": 1.9609375, "learning_rate": 7.729584771664897e-06, "loss": 0.9012, "step": 4813 }, { "epoch": 0.9622947952325029, "grad_norm": 2.21875, "learning_rate": 7.728701794851281e-06, "loss": 0.9579, "step": 4814 }, { "epoch": 0.962494690287599, "grad_norm": 2.03125, "learning_rate": 7.727818696829023e-06, "loss": 1.1154, "step": 4815 }, { "epoch": 0.962694585342695, "grad_norm": 2.046875, "learning_rate": 7.72693547763735e-06, "loss": 0.9688, "step": 4816 }, { "epoch": 0.9628944803977911, "grad_norm": 2.15625, "learning_rate": 7.726052137315493e-06, "loss": 1.043, "step": 4817 }, { "epoch": 0.9630943754528872, "grad_norm": 2.390625, "learning_rate": 7.725168675902692e-06, "loss": 1.0939, "step": 4818 }, { "epoch": 0.9632942705079833, "grad_norm": 2.1875, "learning_rate": 7.72428509343819e-06, "loss": 1.0627, "step": 4819 }, { "epoch": 0.9634941655630794, "grad_norm": 2.125, "learning_rate": 7.723401389961235e-06, "loss": 1.0583, "step": 4820 }, { "epoch": 0.9636940606181754, "grad_norm": 2.140625, "learning_rate": 7.72251756551108e-06, "loss": 1.067, "step": 4821 }, { "epoch": 0.9638939556732715, "grad_norm": 2.046875, "learning_rate": 7.721633620126987e-06, "loss": 1.0691, "step": 4822 }, { "epoch": 0.9640938507283676, "grad_norm": 2.171875, "learning_rate": 7.72074955384822e-06, "loss": 1.027, "step": 4823 }, { "epoch": 0.9642937457834637, "grad_norm": 2.171875, "learning_rate": 7.719865366714046e-06, "loss": 1.1455, "step": 4824 }, { "epoch": 0.9644936408385597, "grad_norm": 1.9921875, "learning_rate": 7.718981058763744e-06, "loss": 0.9386, "step": 4825 }, { "epoch": 0.9646935358936558, "grad_norm": 2.25, "learning_rate": 7.718096630036593e-06, "loss": 1.0428, "step": 4826 }, { "epoch": 0.9648934309487519, "grad_norm": 1.9140625, "learning_rate": 7.71721208057188e-06, "loss": 0.941, "step": 4827 }, { "epoch": 0.965093326003848, "grad_norm": 2.078125, "learning_rate": 7.7163274104089e-06, "loss": 0.9998, "step": 4828 }, { "epoch": 0.9652932210589441, "grad_norm": 2.078125, "learning_rate": 7.715442619586943e-06, "loss": 1.0023, "step": 4829 }, { "epoch": 0.9654931161140401, "grad_norm": 1.875, "learning_rate": 7.714557708145315e-06, "loss": 0.9066, "step": 4830 }, { "epoch": 0.9656930111691362, "grad_norm": 2.125, "learning_rate": 7.713672676123324e-06, "loss": 1.0072, "step": 4831 }, { "epoch": 0.9658929062242323, "grad_norm": 2.046875, "learning_rate": 7.712787523560283e-06, "loss": 1.0759, "step": 4832 }, { "epoch": 0.9660928012793284, "grad_norm": 2.0, "learning_rate": 7.711902250495508e-06, "loss": 0.9749, "step": 4833 }, { "epoch": 0.9662926963344244, "grad_norm": 2.109375, "learning_rate": 7.711016856968327e-06, "loss": 1.0362, "step": 4834 }, { "epoch": 0.9664925913895205, "grad_norm": 2.234375, "learning_rate": 7.710131343018066e-06, "loss": 1.0392, "step": 4835 }, { "epoch": 0.9666924864446166, "grad_norm": 2.0, "learning_rate": 7.709245708684061e-06, "loss": 0.9769, "step": 4836 }, { "epoch": 0.9668923814997127, "grad_norm": 2.15625, "learning_rate": 7.708359954005651e-06, "loss": 1.0357, "step": 4837 }, { "epoch": 0.9670922765548087, "grad_norm": 2.28125, "learning_rate": 7.70747407902218e-06, "loss": 1.1927, "step": 4838 }, { "epoch": 0.9672921716099048, "grad_norm": 1.9765625, "learning_rate": 7.706588083772999e-06, "loss": 0.9573, "step": 4839 }, { "epoch": 0.9674920666650009, "grad_norm": 1.984375, "learning_rate": 7.705701968297466e-06, "loss": 0.9817, "step": 4840 }, { "epoch": 0.967691961720097, "grad_norm": 2.234375, "learning_rate": 7.704815732634941e-06, "loss": 1.0487, "step": 4841 }, { "epoch": 0.9678918567751931, "grad_norm": 1.9921875, "learning_rate": 7.703929376824787e-06, "loss": 0.9766, "step": 4842 }, { "epoch": 0.9680917518302891, "grad_norm": 2.0625, "learning_rate": 7.703042900906383e-06, "loss": 1.0385, "step": 4843 }, { "epoch": 0.9682916468853852, "grad_norm": 2.015625, "learning_rate": 7.702156304919098e-06, "loss": 0.9849, "step": 4844 }, { "epoch": 0.9684915419404813, "grad_norm": 2.125, "learning_rate": 7.70126958890232e-06, "loss": 1.0976, "step": 4845 }, { "epoch": 0.9686914369955774, "grad_norm": 2.03125, "learning_rate": 7.700382752895436e-06, "loss": 0.9467, "step": 4846 }, { "epoch": 0.9688913320506733, "grad_norm": 2.125, "learning_rate": 7.69949579693784e-06, "loss": 0.989, "step": 4847 }, { "epoch": 0.9690912271057694, "grad_norm": 2.015625, "learning_rate": 7.698608721068926e-06, "loss": 1.0281, "step": 4848 }, { "epoch": 0.9692911221608655, "grad_norm": 2.171875, "learning_rate": 7.697721525328104e-06, "loss": 1.0828, "step": 4849 }, { "epoch": 0.9694910172159616, "grad_norm": 2.078125, "learning_rate": 7.696834209754777e-06, "loss": 0.9711, "step": 4850 }, { "epoch": 0.9696909122710577, "grad_norm": 2.015625, "learning_rate": 7.695946774388364e-06, "loss": 1.026, "step": 4851 }, { "epoch": 0.9698908073261537, "grad_norm": 2.015625, "learning_rate": 7.695059219268281e-06, "loss": 1.0395, "step": 4852 }, { "epoch": 0.9700907023812498, "grad_norm": 2.09375, "learning_rate": 7.694171544433958e-06, "loss": 0.9939, "step": 4853 }, { "epoch": 0.9702905974363459, "grad_norm": 2.046875, "learning_rate": 7.693283749924821e-06, "loss": 0.9857, "step": 4854 }, { "epoch": 0.970490492491442, "grad_norm": 2.109375, "learning_rate": 7.69239583578031e-06, "loss": 1.0404, "step": 4855 }, { "epoch": 0.970690387546538, "grad_norm": 2.078125, "learning_rate": 7.691507802039861e-06, "loss": 1.0722, "step": 4856 }, { "epoch": 0.9708902826016341, "grad_norm": 2.046875, "learning_rate": 7.690619648742923e-06, "loss": 0.989, "step": 4857 }, { "epoch": 0.9710901776567302, "grad_norm": 2.203125, "learning_rate": 7.68973137592895e-06, "loss": 1.0558, "step": 4858 }, { "epoch": 0.9712900727118263, "grad_norm": 2.046875, "learning_rate": 7.688842983637395e-06, "loss": 1.0041, "step": 4859 }, { "epoch": 0.9714899677669223, "grad_norm": 2.078125, "learning_rate": 7.687954471907719e-06, "loss": 1.0483, "step": 4860 }, { "epoch": 0.9716898628220184, "grad_norm": 2.078125, "learning_rate": 7.687065840779397e-06, "loss": 1.0264, "step": 4861 }, { "epoch": 0.9718897578771145, "grad_norm": 2.171875, "learning_rate": 7.686177090291896e-06, "loss": 1.079, "step": 4862 }, { "epoch": 0.9720896529322106, "grad_norm": 2.09375, "learning_rate": 7.685288220484693e-06, "loss": 1.1362, "step": 4863 }, { "epoch": 0.9722895479873067, "grad_norm": 2.109375, "learning_rate": 7.684399231397278e-06, "loss": 1.0843, "step": 4864 }, { "epoch": 0.9724894430424027, "grad_norm": 2.15625, "learning_rate": 7.683510123069133e-06, "loss": 1.0176, "step": 4865 }, { "epoch": 0.9726893380974988, "grad_norm": 1.9453125, "learning_rate": 7.682620895539756e-06, "loss": 1.0219, "step": 4866 }, { "epoch": 0.9728892331525949, "grad_norm": 2.15625, "learning_rate": 7.681731548848645e-06, "loss": 1.1454, "step": 4867 }, { "epoch": 0.973089128207691, "grad_norm": 2.109375, "learning_rate": 7.680842083035305e-06, "loss": 1.0198, "step": 4868 }, { "epoch": 0.973289023262787, "grad_norm": 2.171875, "learning_rate": 7.679952498139248e-06, "loss": 0.9693, "step": 4869 }, { "epoch": 0.9734889183178831, "grad_norm": 2.078125, "learning_rate": 7.679062794199982e-06, "loss": 1.0717, "step": 4870 }, { "epoch": 0.9736888133729792, "grad_norm": 2.0625, "learning_rate": 7.678172971257038e-06, "loss": 1.0067, "step": 4871 }, { "epoch": 0.9738887084280753, "grad_norm": 2.0, "learning_rate": 7.677283029349936e-06, "loss": 1.0556, "step": 4872 }, { "epoch": 0.9740886034831714, "grad_norm": 1.9375, "learning_rate": 7.676392968518205e-06, "loss": 0.964, "step": 4873 }, { "epoch": 0.9742884985382674, "grad_norm": 2.296875, "learning_rate": 7.675502788801387e-06, "loss": 1.1312, "step": 4874 }, { "epoch": 0.9744883935933635, "grad_norm": 2.359375, "learning_rate": 7.67461249023902e-06, "loss": 0.971, "step": 4875 }, { "epoch": 0.9746882886484596, "grad_norm": 2.0625, "learning_rate": 7.67372207287065e-06, "loss": 0.9664, "step": 4876 }, { "epoch": 0.9748881837035557, "grad_norm": 1.984375, "learning_rate": 7.672831536735832e-06, "loss": 1.0971, "step": 4877 }, { "epoch": 0.9750880787586517, "grad_norm": 2.125, "learning_rate": 7.671940881874124e-06, "loss": 1.087, "step": 4878 }, { "epoch": 0.9752879738137478, "grad_norm": 2.125, "learning_rate": 7.671050108325087e-06, "loss": 0.9465, "step": 4879 }, { "epoch": 0.9754878688688439, "grad_norm": 2.4375, "learning_rate": 7.670159216128291e-06, "loss": 0.9395, "step": 4880 }, { "epoch": 0.97568776392394, "grad_norm": 2.078125, "learning_rate": 7.669268205323304e-06, "loss": 0.9663, "step": 4881 }, { "epoch": 0.975887658979036, "grad_norm": 2.015625, "learning_rate": 7.66837707594971e-06, "loss": 1.026, "step": 4882 }, { "epoch": 0.976087554034132, "grad_norm": 2.140625, "learning_rate": 7.667485828047091e-06, "loss": 1.1083, "step": 4883 }, { "epoch": 0.9762874490892282, "grad_norm": 2.0, "learning_rate": 7.666594461655039e-06, "loss": 1.0118, "step": 4884 }, { "epoch": 0.9764873441443243, "grad_norm": 2.078125, "learning_rate": 7.665702976813142e-06, "loss": 1.0811, "step": 4885 }, { "epoch": 0.9766872391994204, "grad_norm": 2.21875, "learning_rate": 7.664811373561008e-06, "loss": 1.1536, "step": 4886 }, { "epoch": 0.9768871342545163, "grad_norm": 2.15625, "learning_rate": 7.663919651938234e-06, "loss": 1.0617, "step": 4887 }, { "epoch": 0.9770870293096124, "grad_norm": 2.015625, "learning_rate": 7.663027811984433e-06, "loss": 1.0567, "step": 4888 }, { "epoch": 0.9772869243647085, "grad_norm": 2.171875, "learning_rate": 7.662135853739224e-06, "loss": 0.9078, "step": 4889 }, { "epoch": 0.9774868194198046, "grad_norm": 2.09375, "learning_rate": 7.661243777242223e-06, "loss": 1.0036, "step": 4890 }, { "epoch": 0.9776867144749006, "grad_norm": 1.9296875, "learning_rate": 7.660351582533057e-06, "loss": 0.9038, "step": 4891 }, { "epoch": 0.9778866095299967, "grad_norm": 2.09375, "learning_rate": 7.65945926965136e-06, "loss": 1.1392, "step": 4892 }, { "epoch": 0.9780865045850928, "grad_norm": 1.953125, "learning_rate": 7.658566838636762e-06, "loss": 0.93, "step": 4893 }, { "epoch": 0.9782863996401889, "grad_norm": 2.046875, "learning_rate": 7.657674289528914e-06, "loss": 0.9989, "step": 4894 }, { "epoch": 0.978486294695285, "grad_norm": 2.1875, "learning_rate": 7.656781622367455e-06, "loss": 1.0522, "step": 4895 }, { "epoch": 0.978686189750381, "grad_norm": 2.1875, "learning_rate": 7.65588883719204e-06, "loss": 1.0799, "step": 4896 }, { "epoch": 0.9788860848054771, "grad_norm": 1.953125, "learning_rate": 7.654995934042328e-06, "loss": 0.9497, "step": 4897 }, { "epoch": 0.9790859798605732, "grad_norm": 2.234375, "learning_rate": 7.65410291295798e-06, "loss": 1.1484, "step": 4898 }, { "epoch": 0.9792858749156693, "grad_norm": 2.21875, "learning_rate": 7.653209773978662e-06, "loss": 1.06, "step": 4899 }, { "epoch": 0.9794857699707653, "grad_norm": 2.078125, "learning_rate": 7.652316517144052e-06, "loss": 1.0536, "step": 4900 }, { "epoch": 0.9796856650258614, "grad_norm": 1.9921875, "learning_rate": 7.651423142493824e-06, "loss": 0.9357, "step": 4901 }, { "epoch": 0.9798855600809575, "grad_norm": 2.140625, "learning_rate": 7.650529650067665e-06, "loss": 1.0424, "step": 4902 }, { "epoch": 0.9800854551360536, "grad_norm": 2.09375, "learning_rate": 7.64963603990526e-06, "loss": 1.1032, "step": 4903 }, { "epoch": 0.9802853501911496, "grad_norm": 2.09375, "learning_rate": 7.648742312046306e-06, "loss": 1.0987, "step": 4904 }, { "epoch": 0.9804852452462457, "grad_norm": 2.109375, "learning_rate": 7.6478484665305e-06, "loss": 1.0874, "step": 4905 }, { "epoch": 0.9806851403013418, "grad_norm": 2.03125, "learning_rate": 7.64695450339755e-06, "loss": 0.9831, "step": 4906 }, { "epoch": 0.9808850353564379, "grad_norm": 2.015625, "learning_rate": 7.64606042268716e-06, "loss": 0.996, "step": 4907 }, { "epoch": 0.981084930411534, "grad_norm": 2.0625, "learning_rate": 7.645166224439053e-06, "loss": 1.0016, "step": 4908 }, { "epoch": 0.98128482546663, "grad_norm": 2.0, "learning_rate": 7.644271908692944e-06, "loss": 0.9861, "step": 4909 }, { "epoch": 0.9814847205217261, "grad_norm": 2.046875, "learning_rate": 7.643377475488558e-06, "loss": 1.081, "step": 4910 }, { "epoch": 0.9816846155768222, "grad_norm": 2.109375, "learning_rate": 7.642482924865627e-06, "loss": 1.0244, "step": 4911 }, { "epoch": 0.9818845106319183, "grad_norm": 2.203125, "learning_rate": 7.641588256863887e-06, "loss": 1.0412, "step": 4912 }, { "epoch": 0.9820844056870143, "grad_norm": 2.234375, "learning_rate": 7.640693471523078e-06, "loss": 1.1168, "step": 4913 }, { "epoch": 0.9822843007421104, "grad_norm": 2.109375, "learning_rate": 7.639798568882947e-06, "loss": 1.0678, "step": 4914 }, { "epoch": 0.9824841957972065, "grad_norm": 2.109375, "learning_rate": 7.638903548983248e-06, "loss": 1.0225, "step": 4915 }, { "epoch": 0.9826840908523026, "grad_norm": 2.0, "learning_rate": 7.63800841186373e-06, "loss": 0.9436, "step": 4916 }, { "epoch": 0.9828839859073986, "grad_norm": 2.09375, "learning_rate": 7.637113157564167e-06, "loss": 1.0828, "step": 4917 }, { "epoch": 0.9830838809624947, "grad_norm": 1.9765625, "learning_rate": 7.636217786124315e-06, "loss": 0.9891, "step": 4918 }, { "epoch": 0.9832837760175908, "grad_norm": 2.15625, "learning_rate": 7.635322297583952e-06, "loss": 1.1208, "step": 4919 }, { "epoch": 0.9834836710726869, "grad_norm": 1.9140625, "learning_rate": 7.634426691982852e-06, "loss": 0.9974, "step": 4920 }, { "epoch": 0.983683566127783, "grad_norm": 2.078125, "learning_rate": 7.633530969360801e-06, "loss": 1.0724, "step": 4921 }, { "epoch": 0.983883461182879, "grad_norm": 2.015625, "learning_rate": 7.632635129757585e-06, "loss": 1.0418, "step": 4922 }, { "epoch": 0.984083356237975, "grad_norm": 2.03125, "learning_rate": 7.631739173212998e-06, "loss": 0.9366, "step": 4923 }, { "epoch": 0.9842832512930711, "grad_norm": 2.015625, "learning_rate": 7.630843099766838e-06, "loss": 1.0145, "step": 4924 }, { "epoch": 0.9844831463481672, "grad_norm": 2.125, "learning_rate": 7.62994690945891e-06, "loss": 1.0334, "step": 4925 }, { "epoch": 0.9846830414032632, "grad_norm": 2.140625, "learning_rate": 7.62905060232902e-06, "loss": 1.0941, "step": 4926 }, { "epoch": 0.9848829364583593, "grad_norm": 2.09375, "learning_rate": 7.628154178416982e-06, "loss": 1.0462, "step": 4927 }, { "epoch": 0.9850828315134554, "grad_norm": 1.9296875, "learning_rate": 7.627257637762617e-06, "loss": 0.9218, "step": 4928 }, { "epoch": 0.9852827265685515, "grad_norm": 2.046875, "learning_rate": 7.626360980405748e-06, "loss": 0.9681, "step": 4929 }, { "epoch": 0.9854826216236476, "grad_norm": 2.265625, "learning_rate": 7.625464206386205e-06, "loss": 1.0727, "step": 4930 }, { "epoch": 0.9856825166787436, "grad_norm": 1.921875, "learning_rate": 7.624567315743823e-06, "loss": 0.9793, "step": 4931 }, { "epoch": 0.9858824117338397, "grad_norm": 1.9921875, "learning_rate": 7.623670308518441e-06, "loss": 0.9675, "step": 4932 }, { "epoch": 0.9860823067889358, "grad_norm": 2.0625, "learning_rate": 7.622773184749903e-06, "loss": 1.0283, "step": 4933 }, { "epoch": 0.9862822018440319, "grad_norm": 2.0625, "learning_rate": 7.621875944478062e-06, "loss": 1.1335, "step": 4934 }, { "epoch": 0.9864820968991279, "grad_norm": 2.1875, "learning_rate": 7.62097858774277e-06, "loss": 0.9563, "step": 4935 }, { "epoch": 0.986681991954224, "grad_norm": 1.984375, "learning_rate": 7.6200811145838895e-06, "loss": 0.9792, "step": 4936 }, { "epoch": 0.9868818870093201, "grad_norm": 2.171875, "learning_rate": 7.619183525041286e-06, "loss": 0.9916, "step": 4937 }, { "epoch": 0.9870817820644162, "grad_norm": 2.125, "learning_rate": 7.618285819154829e-06, "loss": 1.1046, "step": 4938 }, { "epoch": 0.9872816771195122, "grad_norm": 2.015625, "learning_rate": 7.617387996964396e-06, "loss": 1.0579, "step": 4939 }, { "epoch": 0.9874815721746083, "grad_norm": 2.109375, "learning_rate": 7.616490058509869e-06, "loss": 1.0783, "step": 4940 }, { "epoch": 0.9876814672297044, "grad_norm": 1.9609375, "learning_rate": 7.615592003831132e-06, "loss": 1.0153, "step": 4941 }, { "epoch": 0.9878813622848005, "grad_norm": 2.015625, "learning_rate": 7.614693832968079e-06, "loss": 1.0465, "step": 4942 }, { "epoch": 0.9880812573398966, "grad_norm": 2.0, "learning_rate": 7.613795545960602e-06, "loss": 1.0529, "step": 4943 }, { "epoch": 0.9882811523949926, "grad_norm": 2.078125, "learning_rate": 7.612897142848609e-06, "loss": 1.1096, "step": 4944 }, { "epoch": 0.9884810474500887, "grad_norm": 2.125, "learning_rate": 7.611998623672004e-06, "loss": 0.9959, "step": 4945 }, { "epoch": 0.9886809425051848, "grad_norm": 2.09375, "learning_rate": 7.611099988470697e-06, "loss": 0.986, "step": 4946 }, { "epoch": 0.9888808375602809, "grad_norm": 2.09375, "learning_rate": 7.610201237284608e-06, "loss": 1.0304, "step": 4947 }, { "epoch": 0.9890807326153769, "grad_norm": 1.921875, "learning_rate": 7.60930237015366e-06, "loss": 0.9567, "step": 4948 }, { "epoch": 0.989280627670473, "grad_norm": 2.078125, "learning_rate": 7.608403387117779e-06, "loss": 0.9946, "step": 4949 }, { "epoch": 0.9894805227255691, "grad_norm": 2.0625, "learning_rate": 7.607504288216898e-06, "loss": 1.0602, "step": 4950 }, { "epoch": 0.9896804177806652, "grad_norm": 2.109375, "learning_rate": 7.606605073490955e-06, "loss": 1.1024, "step": 4951 }, { "epoch": 0.9898803128357613, "grad_norm": 2.09375, "learning_rate": 7.605705742979892e-06, "loss": 1.0524, "step": 4952 }, { "epoch": 0.9900802078908573, "grad_norm": 2.015625, "learning_rate": 7.604806296723659e-06, "loss": 1.0077, "step": 4953 }, { "epoch": 0.9902801029459534, "grad_norm": 2.1875, "learning_rate": 7.603906734762209e-06, "loss": 1.0811, "step": 4954 }, { "epoch": 0.9904799980010495, "grad_norm": 2.203125, "learning_rate": 7.6030070571354986e-06, "loss": 1.0962, "step": 4955 }, { "epoch": 0.9906798930561456, "grad_norm": 1.96875, "learning_rate": 7.602107263883494e-06, "loss": 0.9136, "step": 4956 }, { "epoch": 0.9908797881112416, "grad_norm": 2.0625, "learning_rate": 7.601207355046163e-06, "loss": 1.1236, "step": 4957 }, { "epoch": 0.9910796831663377, "grad_norm": 2.0625, "learning_rate": 7.600307330663477e-06, "loss": 0.9953, "step": 4958 }, { "epoch": 0.9912795782214338, "grad_norm": 2.109375, "learning_rate": 7.5994071907754185e-06, "loss": 1.0416, "step": 4959 }, { "epoch": 0.9914794732765299, "grad_norm": 2.203125, "learning_rate": 7.598506935421969e-06, "loss": 1.0197, "step": 4960 }, { "epoch": 0.9916793683316258, "grad_norm": 2.0, "learning_rate": 7.59760656464312e-06, "loss": 0.9814, "step": 4961 }, { "epoch": 0.9918792633867219, "grad_norm": 2.015625, "learning_rate": 7.596706078478866e-06, "loss": 0.9847, "step": 4962 }, { "epoch": 0.992079158441818, "grad_norm": 2.109375, "learning_rate": 7.595805476969205e-06, "loss": 1.0569, "step": 4963 }, { "epoch": 0.9922790534969141, "grad_norm": 2.09375, "learning_rate": 7.594904760154142e-06, "loss": 1.0601, "step": 4964 }, { "epoch": 0.9924789485520102, "grad_norm": 2.1875, "learning_rate": 7.594003928073685e-06, "loss": 1.1193, "step": 4965 }, { "epoch": 0.9926788436071062, "grad_norm": 2.09375, "learning_rate": 7.593102980767853e-06, "loss": 1.0392, "step": 4966 }, { "epoch": 0.9928787386622023, "grad_norm": 2.109375, "learning_rate": 7.592201918276661e-06, "loss": 1.0762, "step": 4967 }, { "epoch": 0.9930786337172984, "grad_norm": 2.375, "learning_rate": 7.591300740640138e-06, "loss": 1.0531, "step": 4968 }, { "epoch": 0.9932785287723945, "grad_norm": 2.171875, "learning_rate": 7.590399447898312e-06, "loss": 0.9448, "step": 4969 }, { "epoch": 0.9934784238274905, "grad_norm": 2.03125, "learning_rate": 7.589498040091221e-06, "loss": 0.9441, "step": 4970 }, { "epoch": 0.9936783188825866, "grad_norm": 2.125, "learning_rate": 7.588596517258901e-06, "loss": 1.1056, "step": 4971 }, { "epoch": 0.9938782139376827, "grad_norm": 2.109375, "learning_rate": 7.5876948794414015e-06, "loss": 1.0203, "step": 4972 }, { "epoch": 0.9940781089927788, "grad_norm": 2.0625, "learning_rate": 7.586793126678773e-06, "loss": 0.9404, "step": 4973 }, { "epoch": 0.9942780040478749, "grad_norm": 1.9765625, "learning_rate": 7.585891259011069e-06, "loss": 1.062, "step": 4974 }, { "epoch": 0.9944778991029709, "grad_norm": 2.234375, "learning_rate": 7.58498927647835e-06, "loss": 1.132, "step": 4975 }, { "epoch": 0.994677794158067, "grad_norm": 2.09375, "learning_rate": 7.5840871791206845e-06, "loss": 1.0546, "step": 4976 }, { "epoch": 0.9948776892131631, "grad_norm": 2.046875, "learning_rate": 7.583184966978143e-06, "loss": 1.0652, "step": 4977 }, { "epoch": 0.9950775842682592, "grad_norm": 2.015625, "learning_rate": 7.582282640090801e-06, "loss": 1.0238, "step": 4978 }, { "epoch": 0.9952774793233552, "grad_norm": 2.09375, "learning_rate": 7.581380198498743e-06, "loss": 1.0439, "step": 4979 }, { "epoch": 0.9954773743784513, "grad_norm": 2.09375, "learning_rate": 7.580477642242048e-06, "loss": 0.9548, "step": 4980 }, { "epoch": 0.9956772694335474, "grad_norm": 1.953125, "learning_rate": 7.5795749713608125e-06, "loss": 0.9202, "step": 4981 }, { "epoch": 0.9958771644886435, "grad_norm": 4.34375, "learning_rate": 7.578672185895133e-06, "loss": 1.0457, "step": 4982 }, { "epoch": 0.9960770595437395, "grad_norm": 2.015625, "learning_rate": 7.57776928588511e-06, "loss": 0.9742, "step": 4983 }, { "epoch": 0.9962769545988356, "grad_norm": 2.15625, "learning_rate": 7.576866271370851e-06, "loss": 1.0402, "step": 4984 }, { "epoch": 0.9964768496539317, "grad_norm": 2.015625, "learning_rate": 7.575963142392466e-06, "loss": 0.9298, "step": 4985 }, { "epoch": 0.9966767447090278, "grad_norm": 2.109375, "learning_rate": 7.5750598989900745e-06, "loss": 1.0118, "step": 4986 }, { "epoch": 0.9968766397641239, "grad_norm": 2.03125, "learning_rate": 7.574156541203799e-06, "loss": 0.9927, "step": 4987 }, { "epoch": 0.9970765348192199, "grad_norm": 2.109375, "learning_rate": 7.573253069073763e-06, "loss": 1.0184, "step": 4988 }, { "epoch": 0.997276429874316, "grad_norm": 2.09375, "learning_rate": 7.5723494826401e-06, "loss": 0.9645, "step": 4989 }, { "epoch": 0.9974763249294121, "grad_norm": 2.0625, "learning_rate": 7.571445781942948e-06, "loss": 0.9988, "step": 4990 }, { "epoch": 0.9976762199845082, "grad_norm": 2.0625, "learning_rate": 7.570541967022449e-06, "loss": 1.0216, "step": 4991 }, { "epoch": 0.9978761150396042, "grad_norm": 2.015625, "learning_rate": 7.569638037918751e-06, "loss": 1.0075, "step": 4992 }, { "epoch": 0.9980760100947003, "grad_norm": 2.09375, "learning_rate": 7.568733994672006e-06, "loss": 1.1056, "step": 4993 }, { "epoch": 0.9982759051497964, "grad_norm": 2.046875, "learning_rate": 7.567829837322371e-06, "loss": 0.9353, "step": 4994 }, { "epoch": 0.9984758002048925, "grad_norm": 2.15625, "learning_rate": 7.56692556591001e-06, "loss": 1.153, "step": 4995 }, { "epoch": 0.9986756952599886, "grad_norm": 2.078125, "learning_rate": 7.566021180475088e-06, "loss": 1.0002, "step": 4996 }, { "epoch": 0.9988755903150845, "grad_norm": 2.015625, "learning_rate": 7.565116681057779e-06, "loss": 0.9701, "step": 4997 }, { "epoch": 0.9990754853701806, "grad_norm": 2.203125, "learning_rate": 7.564212067698262e-06, "loss": 1.0824, "step": 4998 }, { "epoch": 0.9992753804252767, "grad_norm": 2.0625, "learning_rate": 7.563307340436718e-06, "loss": 0.9802, "step": 4999 }, { "epoch": 0.9994752754803728, "grad_norm": 2.03125, "learning_rate": 7.562402499313336e-06, "loss": 1.1098, "step": 5000 }, { "epoch": 0.9996751705354688, "grad_norm": 2.0625, "learning_rate": 7.561497544368309e-06, "loss": 1.0144, "step": 5001 }, { "epoch": 0.9998750655905649, "grad_norm": 2.1875, "learning_rate": 7.560592475641835e-06, "loss": 1.0494, "step": 5002 }, { "epoch": 1.000074960645661, "grad_norm": 1.9765625, "learning_rate": 7.559687293174115e-06, "loss": 0.9346, "step": 5003 }, { "epoch": 1.000274855700757, "grad_norm": 2.15625, "learning_rate": 7.55878199700536e-06, "loss": 1.0637, "step": 5004 }, { "epoch": 1.0004747507558531, "grad_norm": 2.015625, "learning_rate": 7.557876587175782e-06, "loss": 0.9782, "step": 5005 }, { "epoch": 1.0006746458109492, "grad_norm": 2.140625, "learning_rate": 7.5569710637255985e-06, "loss": 0.9836, "step": 5006 }, { "epoch": 1.0008745408660453, "grad_norm": 2.015625, "learning_rate": 7.556065426695035e-06, "loss": 1.0265, "step": 5007 }, { "epoch": 1.0010744359211414, "grad_norm": 2.09375, "learning_rate": 7.555159676124317e-06, "loss": 1.1274, "step": 5008 }, { "epoch": 1.0012743309762375, "grad_norm": 2.171875, "learning_rate": 7.55425381205368e-06, "loss": 1.0692, "step": 5009 }, { "epoch": 1.0014742260313336, "grad_norm": 2.078125, "learning_rate": 7.553347834523361e-06, "loss": 0.9171, "step": 5010 }, { "epoch": 1.0016741210864297, "grad_norm": 2.109375, "learning_rate": 7.552441743573604e-06, "loss": 1.0443, "step": 5011 }, { "epoch": 1.0018740161415256, "grad_norm": 2.1875, "learning_rate": 7.551535539244657e-06, "loss": 1.0757, "step": 5012 }, { "epoch": 1.0020739111966217, "grad_norm": 1.8984375, "learning_rate": 7.550629221576774e-06, "loss": 0.9581, "step": 5013 }, { "epoch": 1.0022738062517178, "grad_norm": 2.078125, "learning_rate": 7.5497227906102125e-06, "loss": 1.0106, "step": 5014 }, { "epoch": 1.002473701306814, "grad_norm": 2.046875, "learning_rate": 7.5488162463852385e-06, "loss": 1.0183, "step": 5015 }, { "epoch": 1.00267359636191, "grad_norm": 2.109375, "learning_rate": 7.547909588942118e-06, "loss": 0.9909, "step": 5016 }, { "epoch": 1.002873491417006, "grad_norm": 2.03125, "learning_rate": 7.547002818321125e-06, "loss": 0.9133, "step": 5017 }, { "epoch": 1.0030733864721022, "grad_norm": 2.078125, "learning_rate": 7.54609593456254e-06, "loss": 1.0139, "step": 5018 }, { "epoch": 1.0032732815271983, "grad_norm": 2.109375, "learning_rate": 7.545188937706647e-06, "loss": 1.036, "step": 5019 }, { "epoch": 1.0034731765822944, "grad_norm": 2.046875, "learning_rate": 7.544281827793731e-06, "loss": 1.0002, "step": 5020 }, { "epoch": 1.0036730716373903, "grad_norm": 2.046875, "learning_rate": 7.543374604864089e-06, "loss": 1.0068, "step": 5021 }, { "epoch": 1.0038729666924864, "grad_norm": 2.0625, "learning_rate": 7.5424672689580185e-06, "loss": 1.0954, "step": 5022 }, { "epoch": 1.0040728617475825, "grad_norm": 2.21875, "learning_rate": 7.541559820115823e-06, "loss": 1.0482, "step": 5023 }, { "epoch": 1.0042727568026786, "grad_norm": 1.96875, "learning_rate": 7.540652258377812e-06, "loss": 0.973, "step": 5024 }, { "epoch": 1.0044726518577747, "grad_norm": 2.0, "learning_rate": 7.539744583784299e-06, "loss": 1.0635, "step": 5025 }, { "epoch": 1.0046725469128708, "grad_norm": 2.0625, "learning_rate": 7.538836796375603e-06, "loss": 0.9313, "step": 5026 }, { "epoch": 1.0048724419679669, "grad_norm": 3.09375, "learning_rate": 7.537928896192048e-06, "loss": 0.9956, "step": 5027 }, { "epoch": 1.005072337023063, "grad_norm": 2.25, "learning_rate": 7.5370208832739614e-06, "loss": 1.0241, "step": 5028 }, { "epoch": 1.0052722320781589, "grad_norm": 2.21875, "learning_rate": 7.536112757661678e-06, "loss": 1.0751, "step": 5029 }, { "epoch": 1.005472127133255, "grad_norm": 2.03125, "learning_rate": 7.535204519395538e-06, "loss": 1.0667, "step": 5030 }, { "epoch": 1.005672022188351, "grad_norm": 2.15625, "learning_rate": 7.534296168515883e-06, "loss": 1.0316, "step": 5031 }, { "epoch": 1.0058719172434472, "grad_norm": 2.15625, "learning_rate": 7.5333877050630645e-06, "loss": 1.1334, "step": 5032 }, { "epoch": 1.0060718122985433, "grad_norm": 2.046875, "learning_rate": 7.532479129077433e-06, "loss": 1.0552, "step": 5033 }, { "epoch": 1.0062717073536394, "grad_norm": 2.125, "learning_rate": 7.53157044059935e-06, "loss": 1.0294, "step": 5034 }, { "epoch": 1.0064716024087355, "grad_norm": 2.015625, "learning_rate": 7.530661639669178e-06, "loss": 0.9817, "step": 5035 }, { "epoch": 1.0066714974638316, "grad_norm": 2.0, "learning_rate": 7.529752726327286e-06, "loss": 0.9589, "step": 5036 }, { "epoch": 1.0068713925189277, "grad_norm": 2.234375, "learning_rate": 7.5288437006140484e-06, "loss": 1.0801, "step": 5037 }, { "epoch": 1.0070712875740235, "grad_norm": 2.03125, "learning_rate": 7.527934562569843e-06, "loss": 0.9743, "step": 5038 }, { "epoch": 1.0072711826291196, "grad_norm": 2.015625, "learning_rate": 7.5270253122350555e-06, "loss": 0.9501, "step": 5039 }, { "epoch": 1.0074710776842157, "grad_norm": 2.21875, "learning_rate": 7.526115949650073e-06, "loss": 1.0456, "step": 5040 }, { "epoch": 1.0076709727393118, "grad_norm": 2.15625, "learning_rate": 7.5252064748552915e-06, "loss": 0.9935, "step": 5041 }, { "epoch": 1.007870867794408, "grad_norm": 2.046875, "learning_rate": 7.524296887891105e-06, "loss": 1.0365, "step": 5042 }, { "epoch": 1.008070762849504, "grad_norm": 2.078125, "learning_rate": 7.5233871887979215e-06, "loss": 1.073, "step": 5043 }, { "epoch": 1.0082706579046001, "grad_norm": 2.09375, "learning_rate": 7.5224773776161495e-06, "loss": 1.1247, "step": 5044 }, { "epoch": 1.0084705529596962, "grad_norm": 2.140625, "learning_rate": 7.5215674543862004e-06, "loss": 1.1517, "step": 5045 }, { "epoch": 1.0086704480147923, "grad_norm": 2.09375, "learning_rate": 7.520657419148496e-06, "loss": 1.0468, "step": 5046 }, { "epoch": 1.0088703430698882, "grad_norm": 1.984375, "learning_rate": 7.519747271943457e-06, "loss": 0.9336, "step": 5047 }, { "epoch": 1.0090702381249843, "grad_norm": 2.125, "learning_rate": 7.5188370128115125e-06, "loss": 1.0277, "step": 5048 }, { "epoch": 1.0092701331800804, "grad_norm": 2.078125, "learning_rate": 7.5179266417931e-06, "loss": 0.9911, "step": 5049 }, { "epoch": 1.0094700282351765, "grad_norm": 2.078125, "learning_rate": 7.517016158928652e-06, "loss": 0.9861, "step": 5050 }, { "epoch": 1.0096699232902726, "grad_norm": 2.078125, "learning_rate": 7.5161055642586154e-06, "loss": 1.0165, "step": 5051 }, { "epoch": 1.0098698183453687, "grad_norm": 2.140625, "learning_rate": 7.5151948578234405e-06, "loss": 1.0301, "step": 5052 }, { "epoch": 1.0100697134004648, "grad_norm": 2.078125, "learning_rate": 7.514284039663576e-06, "loss": 0.9706, "step": 5053 }, { "epoch": 1.010269608455561, "grad_norm": 2.1875, "learning_rate": 7.5133731098194854e-06, "loss": 1.0205, "step": 5054 }, { "epoch": 1.010469503510657, "grad_norm": 2.109375, "learning_rate": 7.5124620683316275e-06, "loss": 1.0774, "step": 5055 }, { "epoch": 1.0106693985657529, "grad_norm": 2.0625, "learning_rate": 7.511550915240475e-06, "loss": 1.0181, "step": 5056 }, { "epoch": 1.010869293620849, "grad_norm": 2.140625, "learning_rate": 7.510639650586498e-06, "loss": 0.9683, "step": 5057 }, { "epoch": 1.011069188675945, "grad_norm": 2.046875, "learning_rate": 7.509728274410175e-06, "loss": 1.035, "step": 5058 }, { "epoch": 1.0112690837310412, "grad_norm": 2.078125, "learning_rate": 7.508816786751991e-06, "loss": 1.0364, "step": 5059 }, { "epoch": 1.0114689787861373, "grad_norm": 2.125, "learning_rate": 7.507905187652433e-06, "loss": 1.0686, "step": 5060 }, { "epoch": 1.0116688738412334, "grad_norm": 2.046875, "learning_rate": 7.506993477151996e-06, "loss": 1.0212, "step": 5061 }, { "epoch": 1.0118687688963295, "grad_norm": 2.03125, "learning_rate": 7.506081655291174e-06, "loss": 1.054, "step": 5062 }, { "epoch": 1.0120686639514256, "grad_norm": 1.9453125, "learning_rate": 7.505169722110475e-06, "loss": 1.0896, "step": 5063 }, { "epoch": 1.0122685590065217, "grad_norm": 2.09375, "learning_rate": 7.504257677650402e-06, "loss": 1.1003, "step": 5064 }, { "epoch": 1.0124684540616176, "grad_norm": 2.140625, "learning_rate": 7.5033455219514705e-06, "loss": 1.0835, "step": 5065 }, { "epoch": 1.0126683491167137, "grad_norm": 2.046875, "learning_rate": 7.5024332550542e-06, "loss": 0.909, "step": 5066 }, { "epoch": 1.0128682441718098, "grad_norm": 2.234375, "learning_rate": 7.501520876999111e-06, "loss": 1.0562, "step": 5067 }, { "epoch": 1.0130681392269059, "grad_norm": 2.171875, "learning_rate": 7.500608387826731e-06, "loss": 1.0792, "step": 5068 }, { "epoch": 1.013268034282002, "grad_norm": 1.9453125, "learning_rate": 7.499695787577596e-06, "loss": 1.0013, "step": 5069 }, { "epoch": 1.013467929337098, "grad_norm": 1.984375, "learning_rate": 7.498783076292238e-06, "loss": 1.0855, "step": 5070 }, { "epoch": 1.0136678243921942, "grad_norm": 2.0, "learning_rate": 7.497870254011205e-06, "loss": 1.0653, "step": 5071 }, { "epoch": 1.0138677194472903, "grad_norm": 2.03125, "learning_rate": 7.496957320775042e-06, "loss": 0.9759, "step": 5072 }, { "epoch": 1.0140676145023861, "grad_norm": 2.03125, "learning_rate": 7.496044276624299e-06, "loss": 1.0511, "step": 5073 }, { "epoch": 1.0142675095574822, "grad_norm": 2.109375, "learning_rate": 7.495131121599537e-06, "loss": 0.9811, "step": 5074 }, { "epoch": 1.0144674046125783, "grad_norm": 1.984375, "learning_rate": 7.494217855741319e-06, "loss": 0.9164, "step": 5075 }, { "epoch": 1.0146672996676744, "grad_norm": 2.015625, "learning_rate": 7.493304479090208e-06, "loss": 0.971, "step": 5076 }, { "epoch": 1.0148671947227705, "grad_norm": 2.125, "learning_rate": 7.49239099168678e-06, "loss": 1.0386, "step": 5077 }, { "epoch": 1.0150670897778666, "grad_norm": 2.078125, "learning_rate": 7.491477393571609e-06, "loss": 0.9575, "step": 5078 }, { "epoch": 1.0152669848329627, "grad_norm": 2.09375, "learning_rate": 7.490563684785277e-06, "loss": 1.02, "step": 5079 }, { "epoch": 1.0154668798880588, "grad_norm": 2.03125, "learning_rate": 7.489649865368375e-06, "loss": 0.986, "step": 5080 }, { "epoch": 1.015666774943155, "grad_norm": 2.203125, "learning_rate": 7.488735935361491e-06, "loss": 1.0406, "step": 5081 }, { "epoch": 1.000124934409435, "grad_norm": 2.09375, "learning_rate": 7.4878218948052206e-06, "loss": 1.0103, "step": 5082 }, { "epoch": 1.000324829464531, "grad_norm": 2.03125, "learning_rate": 7.4869077437401705e-06, "loss": 0.9574, "step": 5083 }, { "epoch": 1.0005247245196272, "grad_norm": 1.9296875, "learning_rate": 7.485993482206941e-06, "loss": 0.916, "step": 5084 }, { "epoch": 1.0007246195747233, "grad_norm": 2.015625, "learning_rate": 7.48507911024615e-06, "loss": 1.0239, "step": 5085 }, { "epoch": 1.0009245146298194, "grad_norm": 2.015625, "learning_rate": 7.484164627898407e-06, "loss": 1.0095, "step": 5086 }, { "epoch": 1.0011244096849155, "grad_norm": 2.015625, "learning_rate": 7.483250035204338e-06, "loss": 0.9091, "step": 5087 }, { "epoch": 1.0013243047400116, "grad_norm": 2.171875, "learning_rate": 7.482335332204568e-06, "loss": 0.9781, "step": 5088 }, { "epoch": 1.0015241997951077, "grad_norm": 2.0, "learning_rate": 7.481420518939727e-06, "loss": 0.897, "step": 5089 }, { "epoch": 1.0017240948502038, "grad_norm": 2.0625, "learning_rate": 7.480505595450451e-06, "loss": 0.9291, "step": 5090 }, { "epoch": 1.0019239899052996, "grad_norm": 2.078125, "learning_rate": 7.4795905617773834e-06, "loss": 0.9006, "step": 5091 }, { "epoch": 1.0021238849603957, "grad_norm": 2.078125, "learning_rate": 7.4786754179611666e-06, "loss": 0.9878, "step": 5092 }, { "epoch": 1.0023237800154918, "grad_norm": 2.03125, "learning_rate": 7.477760164042451e-06, "loss": 0.9614, "step": 5093 }, { "epoch": 1.002523675070588, "grad_norm": 2.015625, "learning_rate": 7.476844800061896e-06, "loss": 0.9483, "step": 5094 }, { "epoch": 1.002723570125684, "grad_norm": 2.078125, "learning_rate": 7.4759293260601585e-06, "loss": 0.9782, "step": 5095 }, { "epoch": 1.0029234651807801, "grad_norm": 2.140625, "learning_rate": 7.475013742077905e-06, "loss": 0.9642, "step": 5096 }, { "epoch": 1.0031233602358762, "grad_norm": 2.0625, "learning_rate": 7.474098048155806e-06, "loss": 0.8765, "step": 5097 }, { "epoch": 1.0033232552909723, "grad_norm": 2.125, "learning_rate": 7.473182244334533e-06, "loss": 1.0415, "step": 5098 }, { "epoch": 1.0035231503460682, "grad_norm": 2.078125, "learning_rate": 7.472266330654773e-06, "loss": 0.9978, "step": 5099 }, { "epoch": 1.0037230454011643, "grad_norm": 2.390625, "learning_rate": 7.471350307157204e-06, "loss": 1.0603, "step": 5100 }, { "epoch": 1.0039229404562604, "grad_norm": 2.0625, "learning_rate": 7.470434173882519e-06, "loss": 0.9902, "step": 5101 }, { "epoch": 1.0041228355113565, "grad_norm": 1.9765625, "learning_rate": 7.469517930871411e-06, "loss": 0.8795, "step": 5102 }, { "epoch": 1.0043227305664526, "grad_norm": 2.046875, "learning_rate": 7.468601578164582e-06, "loss": 1.0094, "step": 5103 }, { "epoch": 1.0045226256215487, "grad_norm": 2.0625, "learning_rate": 7.467685115802734e-06, "loss": 0.9736, "step": 5104 }, { "epoch": 1.0047225206766448, "grad_norm": 2.03125, "learning_rate": 7.466768543826577e-06, "loss": 1.0023, "step": 5105 }, { "epoch": 1.004922415731741, "grad_norm": 2.0625, "learning_rate": 7.465851862276824e-06, "loss": 1.0279, "step": 5106 }, { "epoch": 1.005122310786837, "grad_norm": 2.09375, "learning_rate": 7.4649350711941935e-06, "loss": 1.079, "step": 5107 }, { "epoch": 1.0053222058419329, "grad_norm": 2.046875, "learning_rate": 7.464018170619413e-06, "loss": 0.9784, "step": 5108 }, { "epoch": 1.005522100897029, "grad_norm": 2.1875, "learning_rate": 7.4631011605932065e-06, "loss": 1.0549, "step": 5109 }, { "epoch": 1.005721995952125, "grad_norm": 2.0625, "learning_rate": 7.462184041156309e-06, "loss": 1.0397, "step": 5110 }, { "epoch": 1.0059218910072212, "grad_norm": 2.078125, "learning_rate": 7.461266812349462e-06, "loss": 1.0394, "step": 5111 }, { "epoch": 1.0061217860623173, "grad_norm": 2.171875, "learning_rate": 7.460349474213404e-06, "loss": 0.9565, "step": 5112 }, { "epoch": 1.0063216811174134, "grad_norm": 2.125, "learning_rate": 7.459432026788885e-06, "loss": 0.9286, "step": 5113 }, { "epoch": 1.0065215761725095, "grad_norm": 2.078125, "learning_rate": 7.45851447011666e-06, "loss": 0.9695, "step": 5114 }, { "epoch": 1.0067214712276056, "grad_norm": 2.171875, "learning_rate": 7.457596804237484e-06, "loss": 0.9977, "step": 5115 }, { "epoch": 1.0069213662827017, "grad_norm": 2.078125, "learning_rate": 7.45667902919212e-06, "loss": 0.9384, "step": 5116 }, { "epoch": 1.0071212613377976, "grad_norm": 2.109375, "learning_rate": 7.455761145021335e-06, "loss": 0.9987, "step": 5117 }, { "epoch": 1.0073211563928937, "grad_norm": 2.125, "learning_rate": 7.454843151765904e-06, "loss": 0.9157, "step": 5118 }, { "epoch": 1.0075210514479898, "grad_norm": 2.125, "learning_rate": 7.453925049466601e-06, "loss": 0.9917, "step": 5119 }, { "epoch": 1.0077209465030859, "grad_norm": 2.25, "learning_rate": 7.453006838164211e-06, "loss": 1.0897, "step": 5120 }, { "epoch": 1.007920841558182, "grad_norm": 1.9765625, "learning_rate": 7.452088517899518e-06, "loss": 0.9977, "step": 5121 }, { "epoch": 1.008120736613278, "grad_norm": 2.015625, "learning_rate": 7.451170088713315e-06, "loss": 0.938, "step": 5122 }, { "epoch": 1.0083206316683742, "grad_norm": 2.171875, "learning_rate": 7.450251550646398e-06, "loss": 1.0497, "step": 5123 }, { "epoch": 1.0085205267234703, "grad_norm": 2.15625, "learning_rate": 7.449332903739569e-06, "loss": 1.0163, "step": 5124 }, { "epoch": 1.0087204217785664, "grad_norm": 2.109375, "learning_rate": 7.4484141480336355e-06, "loss": 1.0633, "step": 5125 }, { "epoch": 1.0089203168336622, "grad_norm": 2.09375, "learning_rate": 7.447495283569406e-06, "loss": 0.9094, "step": 5126 }, { "epoch": 1.0091202118887583, "grad_norm": 2.125, "learning_rate": 7.446576310387696e-06, "loss": 0.9195, "step": 5127 }, { "epoch": 1.0093201069438544, "grad_norm": 2.21875, "learning_rate": 7.44565722852933e-06, "loss": 1.0892, "step": 5128 }, { "epoch": 1.0095200019989505, "grad_norm": 2.21875, "learning_rate": 7.44473803803513e-06, "loss": 0.9135, "step": 5129 }, { "epoch": 1.0097198970540466, "grad_norm": 2.078125, "learning_rate": 7.443818738945927e-06, "loss": 1.0406, "step": 5130 }, { "epoch": 1.0099197921091427, "grad_norm": 2.078125, "learning_rate": 7.442899331302557e-06, "loss": 0.9805, "step": 5131 }, { "epoch": 1.0101196871642388, "grad_norm": 2.1875, "learning_rate": 7.44197981514586e-06, "loss": 1.0349, "step": 5132 }, { "epoch": 1.010319582219335, "grad_norm": 1.96875, "learning_rate": 7.44106019051668e-06, "loss": 0.9732, "step": 5133 }, { "epoch": 1.010519477274431, "grad_norm": 2.0625, "learning_rate": 7.440140457455869e-06, "loss": 1.0351, "step": 5134 }, { "epoch": 1.010719372329527, "grad_norm": 2.046875, "learning_rate": 7.439220616004277e-06, "loss": 1.0287, "step": 5135 }, { "epoch": 1.010919267384623, "grad_norm": 2.171875, "learning_rate": 7.438300666202767e-06, "loss": 1.0394, "step": 5136 }, { "epoch": 1.011119162439719, "grad_norm": 2.203125, "learning_rate": 7.4373806080922005e-06, "loss": 0.9785, "step": 5137 }, { "epoch": 1.0113190574948152, "grad_norm": 2.109375, "learning_rate": 7.436460441713448e-06, "loss": 1.0159, "step": 5138 }, { "epoch": 1.0115189525499113, "grad_norm": 2.078125, "learning_rate": 7.435540167107384e-06, "loss": 0.9642, "step": 5139 }, { "epoch": 1.0117188476050074, "grad_norm": 2.21875, "learning_rate": 7.434619784314885e-06, "loss": 0.9716, "step": 5140 }, { "epoch": 1.0119187426601035, "grad_norm": 2.4375, "learning_rate": 7.433699293376835e-06, "loss": 0.9343, "step": 5141 }, { "epoch": 1.0121186377151996, "grad_norm": 2.171875, "learning_rate": 7.432778694334124e-06, "loss": 1.0314, "step": 5142 }, { "epoch": 1.0123185327702955, "grad_norm": 2.234375, "learning_rate": 7.431857987227642e-06, "loss": 1.0005, "step": 5143 }, { "epoch": 1.0125184278253916, "grad_norm": 2.046875, "learning_rate": 7.430937172098288e-06, "loss": 0.9713, "step": 5144 }, { "epoch": 1.0127183228804877, "grad_norm": 2.09375, "learning_rate": 7.430016248986964e-06, "loss": 1.0475, "step": 5145 }, { "epoch": 1.0129182179355838, "grad_norm": 2.203125, "learning_rate": 7.429095217934578e-06, "loss": 1.0999, "step": 5146 }, { "epoch": 1.0131181129906799, "grad_norm": 2.234375, "learning_rate": 7.428174078982042e-06, "loss": 1.0007, "step": 5147 }, { "epoch": 1.013318008045776, "grad_norm": 2.203125, "learning_rate": 7.427252832170273e-06, "loss": 0.9999, "step": 5148 }, { "epoch": 1.013517903100872, "grad_norm": 2.109375, "learning_rate": 7.426331477540193e-06, "loss": 1.0016, "step": 5149 }, { "epoch": 1.0137177981559682, "grad_norm": 1.9765625, "learning_rate": 7.425410015132728e-06, "loss": 0.9672, "step": 5150 }, { "epoch": 1.0139176932110643, "grad_norm": 2.09375, "learning_rate": 7.424488444988807e-06, "loss": 0.9684, "step": 5151 }, { "epoch": 1.0141175882661602, "grad_norm": 2.171875, "learning_rate": 7.42356676714937e-06, "loss": 0.8617, "step": 5152 }, { "epoch": 1.0143174833212563, "grad_norm": 2.046875, "learning_rate": 7.422644981655356e-06, "loss": 0.9923, "step": 5153 }, { "epoch": 1.0145173783763524, "grad_norm": 2.125, "learning_rate": 7.42172308854771e-06, "loss": 0.8722, "step": 5154 }, { "epoch": 1.0147172734314485, "grad_norm": 2.03125, "learning_rate": 7.420801087867382e-06, "loss": 0.9362, "step": 5155 }, { "epoch": 1.0149171684865446, "grad_norm": 2.109375, "learning_rate": 7.419878979655331e-06, "loss": 0.9697, "step": 5156 }, { "epoch": 1.0151170635416407, "grad_norm": 2.109375, "learning_rate": 7.418956763952512e-06, "loss": 1.011, "step": 5157 }, { "epoch": 1.0153169585967368, "grad_norm": 2.140625, "learning_rate": 7.418034440799892e-06, "loss": 1.0548, "step": 5158 }, { "epoch": 1.0155168536518329, "grad_norm": 2.046875, "learning_rate": 7.417112010238442e-06, "loss": 0.9735, "step": 5159 }, { "epoch": 1.015716748706929, "grad_norm": 2.109375, "learning_rate": 7.416189472309133e-06, "loss": 0.9858, "step": 5160 }, { "epoch": 1.0159166437620248, "grad_norm": 2.03125, "learning_rate": 7.415266827052947e-06, "loss": 0.9463, "step": 5161 }, { "epoch": 1.016116538817121, "grad_norm": 2.09375, "learning_rate": 7.414344074510865e-06, "loss": 0.9858, "step": 5162 }, { "epoch": 1.016316433872217, "grad_norm": 2.1875, "learning_rate": 7.413421214723878e-06, "loss": 1.0522, "step": 5163 }, { "epoch": 1.0165163289273131, "grad_norm": 2.09375, "learning_rate": 7.412498247732979e-06, "loss": 1.021, "step": 5164 }, { "epoch": 1.0167162239824092, "grad_norm": 1.921875, "learning_rate": 7.4115751735791655e-06, "loss": 0.9006, "step": 5165 }, { "epoch": 1.0169161190375053, "grad_norm": 2.1875, "learning_rate": 7.410651992303439e-06, "loss": 1.0421, "step": 5166 }, { "epoch": 1.0171160140926014, "grad_norm": 2.078125, "learning_rate": 7.40972870394681e-06, "loss": 1.002, "step": 5167 }, { "epoch": 1.0173159091476975, "grad_norm": 2.015625, "learning_rate": 7.408805308550288e-06, "loss": 0.9943, "step": 5168 }, { "epoch": 1.0175158042027936, "grad_norm": 2.21875, "learning_rate": 7.407881806154892e-06, "loss": 1.0892, "step": 5169 }, { "epoch": 1.0177156992578895, "grad_norm": 2.15625, "learning_rate": 7.406958196801644e-06, "loss": 0.9304, "step": 5170 }, { "epoch": 1.0179155943129856, "grad_norm": 2.171875, "learning_rate": 7.406034480531568e-06, "loss": 0.9051, "step": 5171 }, { "epoch": 1.0181154893680817, "grad_norm": 2.21875, "learning_rate": 7.405110657385699e-06, "loss": 0.9884, "step": 5172 }, { "epoch": 1.0183153844231778, "grad_norm": 2.1875, "learning_rate": 7.4041867274050715e-06, "loss": 1.0153, "step": 5173 }, { "epoch": 1.018515279478274, "grad_norm": 2.03125, "learning_rate": 7.403262690630725e-06, "loss": 0.8693, "step": 5174 }, { "epoch": 1.01871517453337, "grad_norm": 2.140625, "learning_rate": 7.402338547103708e-06, "loss": 1.012, "step": 5175 }, { "epoch": 1.0189150695884661, "grad_norm": 2.203125, "learning_rate": 7.401414296865068e-06, "loss": 1.0537, "step": 5176 }, { "epoch": 1.0191149646435622, "grad_norm": 2.109375, "learning_rate": 7.400489939955862e-06, "loss": 0.9194, "step": 5177 }, { "epoch": 1.019314859698658, "grad_norm": 2.171875, "learning_rate": 7.39956547641715e-06, "loss": 0.9411, "step": 5178 }, { "epoch": 1.0195147547537542, "grad_norm": 2.125, "learning_rate": 7.398640906289996e-06, "loss": 0.9623, "step": 5179 }, { "epoch": 1.0197146498088503, "grad_norm": 2.0, "learning_rate": 7.397716229615468e-06, "loss": 0.9034, "step": 5180 }, { "epoch": 1.0199145448639464, "grad_norm": 2.125, "learning_rate": 7.396791446434641e-06, "loss": 0.9748, "step": 5181 }, { "epoch": 1.0201144399190425, "grad_norm": 2.109375, "learning_rate": 7.3958665567885945e-06, "loss": 0.9026, "step": 5182 }, { "epoch": 1.0203143349741386, "grad_norm": 2.140625, "learning_rate": 7.394941560718412e-06, "loss": 0.9915, "step": 5183 }, { "epoch": 1.0205142300292347, "grad_norm": 2.140625, "learning_rate": 7.394016458265181e-06, "loss": 0.9947, "step": 5184 }, { "epoch": 1.0207141250843308, "grad_norm": 2.203125, "learning_rate": 7.3930912494699935e-06, "loss": 1.004, "step": 5185 }, { "epoch": 1.020914020139427, "grad_norm": 1.9765625, "learning_rate": 7.3921659343739485e-06, "loss": 0.9647, "step": 5186 }, { "epoch": 1.0211139151945228, "grad_norm": 2.0, "learning_rate": 7.391240513018149e-06, "loss": 0.9818, "step": 5187 }, { "epoch": 1.0213138102496189, "grad_norm": 2.109375, "learning_rate": 7.3903149854437e-06, "loss": 0.8793, "step": 5188 }, { "epoch": 1.021513705304715, "grad_norm": 2.046875, "learning_rate": 7.389389351691717e-06, "loss": 1.012, "step": 5189 }, { "epoch": 1.021713600359811, "grad_norm": 2.109375, "learning_rate": 7.3884636118033115e-06, "loss": 0.9668, "step": 5190 }, { "epoch": 1.0219134954149072, "grad_norm": 2.34375, "learning_rate": 7.387537765819609e-06, "loss": 0.9573, "step": 5191 }, { "epoch": 1.0221133904700033, "grad_norm": 2.046875, "learning_rate": 7.3866118137817344e-06, "loss": 0.9249, "step": 5192 }, { "epoch": 1.0223132855250994, "grad_norm": 2.09375, "learning_rate": 7.385685755730816e-06, "loss": 1.0011, "step": 5193 }, { "epoch": 1.0225131805801955, "grad_norm": 2.078125, "learning_rate": 7.384759591707993e-06, "loss": 1.0457, "step": 5194 }, { "epoch": 1.0227130756352916, "grad_norm": 2.171875, "learning_rate": 7.3838333217544035e-06, "loss": 0.986, "step": 5195 }, { "epoch": 1.0229129706903874, "grad_norm": 2.234375, "learning_rate": 7.3829069459111925e-06, "loss": 1.0031, "step": 5196 }, { "epoch": 1.0231128657454835, "grad_norm": 2.046875, "learning_rate": 7.381980464219508e-06, "loss": 0.9511, "step": 5197 }, { "epoch": 1.0233127608005796, "grad_norm": 2.15625, "learning_rate": 7.381053876720508e-06, "loss": 1.0288, "step": 5198 }, { "epoch": 1.0235126558556757, "grad_norm": 2.0, "learning_rate": 7.380127183455345e-06, "loss": 0.9307, "step": 5199 }, { "epoch": 1.0237125509107718, "grad_norm": 2.078125, "learning_rate": 7.379200384465191e-06, "loss": 0.9667, "step": 5200 }, { "epoch": 1.023912445965868, "grad_norm": 2.109375, "learning_rate": 7.378273479791208e-06, "loss": 0.968, "step": 5201 }, { "epoch": 1.024112341020964, "grad_norm": 2.109375, "learning_rate": 7.377346469474571e-06, "loss": 0.9898, "step": 5202 }, { "epoch": 1.0243122360760601, "grad_norm": 2.203125, "learning_rate": 7.376419353556458e-06, "loss": 0.9714, "step": 5203 }, { "epoch": 1.0245121311311562, "grad_norm": 2.078125, "learning_rate": 7.375492132078051e-06, "loss": 0.9354, "step": 5204 }, { "epoch": 1.0247120261862521, "grad_norm": 2.15625, "learning_rate": 7.374564805080537e-06, "loss": 1.0316, "step": 5205 }, { "epoch": 1.0249119212413482, "grad_norm": 2.09375, "learning_rate": 7.373637372605111e-06, "loss": 0.9557, "step": 5206 }, { "epoch": 1.0251118162964443, "grad_norm": 2.03125, "learning_rate": 7.372709834692962e-06, "loss": 0.9577, "step": 5207 }, { "epoch": 1.0253117113515404, "grad_norm": 2.0, "learning_rate": 7.371782191385297e-06, "loss": 0.8865, "step": 5208 }, { "epoch": 1.0255116064066365, "grad_norm": 2.171875, "learning_rate": 7.370854442723322e-06, "loss": 0.9397, "step": 5209 }, { "epoch": 1.0257115014617326, "grad_norm": 2.015625, "learning_rate": 7.369926588748244e-06, "loss": 0.9066, "step": 5210 }, { "epoch": 1.0259113965168287, "grad_norm": 2.171875, "learning_rate": 7.368998629501282e-06, "loss": 1.0038, "step": 5211 }, { "epoch": 1.0261112915719248, "grad_norm": 2.0625, "learning_rate": 7.368070565023653e-06, "loss": 0.9404, "step": 5212 }, { "epoch": 1.026311186627021, "grad_norm": 2.0625, "learning_rate": 7.367142395356581e-06, "loss": 0.8999, "step": 5213 }, { "epoch": 1.0265110816821168, "grad_norm": 2.125, "learning_rate": 7.3662141205412975e-06, "loss": 0.9461, "step": 5214 }, { "epoch": 1.026710976737213, "grad_norm": 2.296875, "learning_rate": 7.365285740619036e-06, "loss": 0.9219, "step": 5215 }, { "epoch": 1.026910871792309, "grad_norm": 2.15625, "learning_rate": 7.364357255631034e-06, "loss": 0.9997, "step": 5216 }, { "epoch": 1.027110766847405, "grad_norm": 2.109375, "learning_rate": 7.363428665618535e-06, "loss": 0.9891, "step": 5217 }, { "epoch": 1.0273106619025012, "grad_norm": 2.078125, "learning_rate": 7.3624999706227885e-06, "loss": 0.9606, "step": 5218 }, { "epoch": 1.0275105569575973, "grad_norm": 2.296875, "learning_rate": 7.361571170685043e-06, "loss": 1.0675, "step": 5219 }, { "epoch": 1.0277104520126934, "grad_norm": 2.046875, "learning_rate": 7.360642265846562e-06, "loss": 0.9213, "step": 5220 }, { "epoch": 1.0279103470677895, "grad_norm": 1.921875, "learning_rate": 7.359713256148601e-06, "loss": 0.8831, "step": 5221 }, { "epoch": 1.0281102421228854, "grad_norm": 2.0625, "learning_rate": 7.358784141632429e-06, "loss": 1.1255, "step": 5222 }, { "epoch": 1.0283101371779815, "grad_norm": 2.03125, "learning_rate": 7.357854922339318e-06, "loss": 0.9533, "step": 5223 }, { "epoch": 1.0285100322330776, "grad_norm": 2.125, "learning_rate": 7.356925598310544e-06, "loss": 0.9819, "step": 5224 }, { "epoch": 1.0287099272881737, "grad_norm": 2.046875, "learning_rate": 7.355996169587385e-06, "loss": 0.8877, "step": 5225 }, { "epoch": 1.0289098223432698, "grad_norm": 2.046875, "learning_rate": 7.35506663621113e-06, "loss": 0.9553, "step": 5226 }, { "epoch": 1.0291097173983659, "grad_norm": 2.15625, "learning_rate": 7.354136998223066e-06, "loss": 1.0505, "step": 5227 }, { "epoch": 1.029309612453462, "grad_norm": 2.15625, "learning_rate": 7.353207255664486e-06, "loss": 0.9877, "step": 5228 }, { "epoch": 1.029509507508558, "grad_norm": 2.15625, "learning_rate": 7.352277408576693e-06, "loss": 1.1288, "step": 5229 }, { "epoch": 1.0297094025636542, "grad_norm": 2.109375, "learning_rate": 7.3513474570009876e-06, "loss": 0.9763, "step": 5230 }, { "epoch": 1.02990929761875, "grad_norm": 2.078125, "learning_rate": 7.35041740097868e-06, "loss": 0.975, "step": 5231 }, { "epoch": 1.0301091926738462, "grad_norm": 2.234375, "learning_rate": 7.349487240551083e-06, "loss": 1.0409, "step": 5232 }, { "epoch": 1.0303090877289423, "grad_norm": 2.234375, "learning_rate": 7.348556975759512e-06, "loss": 1.0961, "step": 5233 }, { "epoch": 1.0305089827840384, "grad_norm": 2.015625, "learning_rate": 7.3476266066452925e-06, "loss": 1.0103, "step": 5234 }, { "epoch": 1.0307088778391345, "grad_norm": 2.15625, "learning_rate": 7.346696133249749e-06, "loss": 0.9519, "step": 5235 }, { "epoch": 1.0309087728942306, "grad_norm": 2.046875, "learning_rate": 7.345765555614214e-06, "loss": 0.9152, "step": 5236 }, { "epoch": 1.0311086679493267, "grad_norm": 2.078125, "learning_rate": 7.344834873780024e-06, "loss": 1.0414, "step": 5237 }, { "epoch": 1.0313085630044228, "grad_norm": 2.046875, "learning_rate": 7.3439040877885184e-06, "loss": 0.9877, "step": 5238 }, { "epoch": 1.0315084580595189, "grad_norm": 2.0625, "learning_rate": 7.342973197681045e-06, "loss": 0.9529, "step": 5239 }, { "epoch": 1.0317083531146147, "grad_norm": 2.046875, "learning_rate": 7.342042203498952e-06, "loss": 0.9597, "step": 5240 }, { "epoch": 1.0319082481697108, "grad_norm": 2.140625, "learning_rate": 7.341111105283594e-06, "loss": 0.9566, "step": 5241 }, { "epoch": 1.032108143224807, "grad_norm": 2.171875, "learning_rate": 7.34017990307633e-06, "loss": 1.0736, "step": 5242 }, { "epoch": 1.032308038279903, "grad_norm": 2.140625, "learning_rate": 7.339248596918526e-06, "loss": 1.0523, "step": 5243 }, { "epoch": 1.0325079333349991, "grad_norm": 2.0, "learning_rate": 7.338317186851549e-06, "loss": 1.0294, "step": 5244 }, { "epoch": 1.0327078283900952, "grad_norm": 2.140625, "learning_rate": 7.337385672916772e-06, "loss": 0.9691, "step": 5245 }, { "epoch": 1.0329077234451913, "grad_norm": 2.1875, "learning_rate": 7.336454055155573e-06, "loss": 1.0367, "step": 5246 }, { "epoch": 1.0331076185002874, "grad_norm": 2.046875, "learning_rate": 7.335522333609334e-06, "loss": 0.9095, "step": 5247 }, { "epoch": 1.0333075135553835, "grad_norm": 2.0625, "learning_rate": 7.334590508319442e-06, "loss": 0.9809, "step": 5248 }, { "epoch": 1.0335074086104794, "grad_norm": 2.046875, "learning_rate": 7.3336585793272905e-06, "loss": 0.9904, "step": 5249 }, { "epoch": 1.0337073036655755, "grad_norm": 2.140625, "learning_rate": 7.3327265466742734e-06, "loss": 0.941, "step": 5250 }, { "epoch": 1.0339071987206716, "grad_norm": 2.296875, "learning_rate": 7.331794410401792e-06, "loss": 1.0044, "step": 5251 }, { "epoch": 1.0341070937757677, "grad_norm": 2.109375, "learning_rate": 7.330862170551253e-06, "loss": 0.8835, "step": 5252 }, { "epoch": 1.0343069888308638, "grad_norm": 2.125, "learning_rate": 7.329929827164064e-06, "loss": 0.9934, "step": 5253 }, { "epoch": 1.03450688388596, "grad_norm": 2.15625, "learning_rate": 7.328997380281642e-06, "loss": 0.9716, "step": 5254 }, { "epoch": 1.034706778941056, "grad_norm": 2.25, "learning_rate": 7.3280648299454035e-06, "loss": 1.0728, "step": 5255 }, { "epoch": 1.034906673996152, "grad_norm": 2.171875, "learning_rate": 7.3271321761967754e-06, "loss": 0.9832, "step": 5256 }, { "epoch": 1.0351065690512482, "grad_norm": 2.09375, "learning_rate": 7.326199419077185e-06, "loss": 1.0008, "step": 5257 }, { "epoch": 1.035306464106344, "grad_norm": 2.015625, "learning_rate": 7.325266558628064e-06, "loss": 0.9786, "step": 5258 }, { "epoch": 1.0355063591614402, "grad_norm": 2.0625, "learning_rate": 7.32433359489085e-06, "loss": 0.9522, "step": 5259 }, { "epoch": 1.0357062542165363, "grad_norm": 2.0, "learning_rate": 7.323400527906988e-06, "loss": 0.9269, "step": 5260 }, { "epoch": 1.0359061492716324, "grad_norm": 2.171875, "learning_rate": 7.32246735771792e-06, "loss": 0.8953, "step": 5261 }, { "epoch": 1.0361060443267285, "grad_norm": 2.109375, "learning_rate": 7.321534084365101e-06, "loss": 1.0412, "step": 5262 }, { "epoch": 1.0363059393818246, "grad_norm": 2.234375, "learning_rate": 7.320600707889988e-06, "loss": 1.0666, "step": 5263 }, { "epoch": 1.0365058344369207, "grad_norm": 2.03125, "learning_rate": 7.3196672283340364e-06, "loss": 0.9433, "step": 5264 }, { "epoch": 1.0367057294920168, "grad_norm": 2.078125, "learning_rate": 7.318733645738716e-06, "loss": 0.9937, "step": 5265 }, { "epoch": 1.0369056245471127, "grad_norm": 2.109375, "learning_rate": 7.317799960145495e-06, "loss": 1.0192, "step": 5266 }, { "epoch": 1.0371055196022088, "grad_norm": 2.03125, "learning_rate": 7.316866171595846e-06, "loss": 0.9529, "step": 5267 }, { "epoch": 1.0373054146573049, "grad_norm": 2.140625, "learning_rate": 7.31593228013125e-06, "loss": 1.0775, "step": 5268 }, { "epoch": 1.037505309712401, "grad_norm": 2.09375, "learning_rate": 7.314998285793189e-06, "loss": 1.0069, "step": 5269 }, { "epoch": 1.037705204767497, "grad_norm": 2.265625, "learning_rate": 7.314064188623151e-06, "loss": 0.9793, "step": 5270 }, { "epoch": 1.0379050998225932, "grad_norm": 2.140625, "learning_rate": 7.313129988662631e-06, "loss": 1.0244, "step": 5271 }, { "epoch": 1.0381049948776893, "grad_norm": 2.140625, "learning_rate": 7.312195685953122e-06, "loss": 1.026, "step": 5272 }, { "epoch": 1.0383048899327854, "grad_norm": 2.0625, "learning_rate": 7.311261280536129e-06, "loss": 0.9317, "step": 5273 }, { "epoch": 1.0385047849878815, "grad_norm": 2.09375, "learning_rate": 7.310326772453156e-06, "loss": 1.0624, "step": 5274 }, { "epoch": 1.0387046800429773, "grad_norm": 2.03125, "learning_rate": 7.309392161745714e-06, "loss": 0.9344, "step": 5275 }, { "epoch": 1.0389045750980734, "grad_norm": 2.046875, "learning_rate": 7.3084574484553185e-06, "loss": 0.9381, "step": 5276 }, { "epoch": 1.0391044701531695, "grad_norm": 2.046875, "learning_rate": 7.307522632623491e-06, "loss": 0.9804, "step": 5277 }, { "epoch": 1.0393043652082656, "grad_norm": 2.234375, "learning_rate": 7.306587714291753e-06, "loss": 1.0398, "step": 5278 }, { "epoch": 1.0395042602633617, "grad_norm": 2.125, "learning_rate": 7.305652693501637e-06, "loss": 1.012, "step": 5279 }, { "epoch": 1.0397041553184578, "grad_norm": 2.09375, "learning_rate": 7.304717570294674e-06, "loss": 0.9732, "step": 5280 }, { "epoch": 1.039904050373554, "grad_norm": 2.078125, "learning_rate": 7.303782344712401e-06, "loss": 1.025, "step": 5281 }, { "epoch": 1.04010394542865, "grad_norm": 2.09375, "learning_rate": 7.302847016796365e-06, "loss": 0.9559, "step": 5282 }, { "epoch": 1.0403038404837461, "grad_norm": 2.078125, "learning_rate": 7.301911586588108e-06, "loss": 0.9812, "step": 5283 }, { "epoch": 1.040503735538842, "grad_norm": 2.15625, "learning_rate": 7.300976054129185e-06, "loss": 0.9592, "step": 5284 }, { "epoch": 1.0407036305939381, "grad_norm": 2.125, "learning_rate": 7.300040419461153e-06, "loss": 0.9289, "step": 5285 }, { "epoch": 1.0409035256490342, "grad_norm": 2.125, "learning_rate": 7.2991046826255685e-06, "loss": 0.9968, "step": 5286 }, { "epoch": 1.0411034207041303, "grad_norm": 2.15625, "learning_rate": 7.298168843664001e-06, "loss": 0.9789, "step": 5287 }, { "epoch": 1.0413033157592264, "grad_norm": 2.0625, "learning_rate": 7.297232902618021e-06, "loss": 0.8654, "step": 5288 }, { "epoch": 1.0415032108143225, "grad_norm": 2.21875, "learning_rate": 7.296296859529199e-06, "loss": 0.9042, "step": 5289 }, { "epoch": 1.0417031058694186, "grad_norm": 2.0625, "learning_rate": 7.295360714439115e-06, "loss": 0.9787, "step": 5290 }, { "epoch": 1.0419030009245147, "grad_norm": 2.046875, "learning_rate": 7.294424467389354e-06, "loss": 0.9877, "step": 5291 }, { "epoch": 1.0421028959796108, "grad_norm": 2.296875, "learning_rate": 7.293488118421502e-06, "loss": 1.0066, "step": 5292 }, { "epoch": 1.0423027910347067, "grad_norm": 2.328125, "learning_rate": 7.292551667577153e-06, "loss": 0.9006, "step": 5293 }, { "epoch": 1.0425026860898028, "grad_norm": 2.125, "learning_rate": 7.291615114897905e-06, "loss": 0.9904, "step": 5294 }, { "epoch": 1.0427025811448989, "grad_norm": 2.140625, "learning_rate": 7.290678460425358e-06, "loss": 0.9638, "step": 5295 }, { "epoch": 1.042902476199995, "grad_norm": 2.0625, "learning_rate": 7.289741704201119e-06, "loss": 0.9075, "step": 5296 }, { "epoch": 1.043102371255091, "grad_norm": 2.1875, "learning_rate": 7.288804846266796e-06, "loss": 0.9738, "step": 5297 }, { "epoch": 1.0433022663101872, "grad_norm": 2.078125, "learning_rate": 7.287867886664008e-06, "loss": 0.9857, "step": 5298 }, { "epoch": 1.0435021613652833, "grad_norm": 2.140625, "learning_rate": 7.286930825434372e-06, "loss": 0.9809, "step": 5299 }, { "epoch": 1.0437020564203794, "grad_norm": 3.0, "learning_rate": 7.2859936626195126e-06, "loss": 1.0613, "step": 5300 }, { "epoch": 1.0439019514754753, "grad_norm": 2.15625, "learning_rate": 7.285056398261059e-06, "loss": 0.9796, "step": 5301 }, { "epoch": 1.0441018465305714, "grad_norm": 2.03125, "learning_rate": 7.2841190324006464e-06, "loss": 0.8832, "step": 5302 }, { "epoch": 1.0443017415856675, "grad_norm": 2.203125, "learning_rate": 7.283181565079907e-06, "loss": 1.0052, "step": 5303 }, { "epoch": 1.0445016366407636, "grad_norm": 2.078125, "learning_rate": 7.28224399634049e-06, "loss": 0.9675, "step": 5304 }, { "epoch": 1.0447015316958597, "grad_norm": 2.15625, "learning_rate": 7.2813063262240355e-06, "loss": 1.0163, "step": 5305 }, { "epoch": 1.0449014267509558, "grad_norm": 2.265625, "learning_rate": 7.280368554772199e-06, "loss": 0.9621, "step": 5306 }, { "epoch": 1.0451013218060519, "grad_norm": 2.140625, "learning_rate": 7.2794306820266335e-06, "loss": 1.0418, "step": 5307 }, { "epoch": 1.045301216861148, "grad_norm": 2.21875, "learning_rate": 7.2784927080290025e-06, "loss": 1.0345, "step": 5308 }, { "epoch": 1.045501111916244, "grad_norm": 2.171875, "learning_rate": 7.277554632820968e-06, "loss": 1.0125, "step": 5309 }, { "epoch": 1.04570100697134, "grad_norm": 2.078125, "learning_rate": 7.2766164564442e-06, "loss": 0.9752, "step": 5310 }, { "epoch": 1.045900902026436, "grad_norm": 2.15625, "learning_rate": 7.275678178940372e-06, "loss": 1.078, "step": 5311 }, { "epoch": 1.0461007970815321, "grad_norm": 2.234375, "learning_rate": 7.274739800351162e-06, "loss": 0.9943, "step": 5312 }, { "epoch": 1.0463006921366282, "grad_norm": 2.28125, "learning_rate": 7.273801320718254e-06, "loss": 1.0436, "step": 5313 }, { "epoch": 1.0465005871917243, "grad_norm": 2.09375, "learning_rate": 7.272862740083332e-06, "loss": 1.027, "step": 5314 }, { "epoch": 1.0467004822468204, "grad_norm": 2.125, "learning_rate": 7.271924058488091e-06, "loss": 1.0068, "step": 5315 }, { "epoch": 1.0469003773019165, "grad_norm": 2.015625, "learning_rate": 7.270985275974227e-06, "loss": 0.947, "step": 5316 }, { "epoch": 1.0471002723570126, "grad_norm": 2.265625, "learning_rate": 7.270046392583438e-06, "loss": 1.0863, "step": 5317 }, { "epoch": 1.0473001674121087, "grad_norm": 2.328125, "learning_rate": 7.269107408357432e-06, "loss": 0.9548, "step": 5318 }, { "epoch": 1.0475000624672046, "grad_norm": 2.421875, "learning_rate": 7.2681683233379176e-06, "loss": 0.9997, "step": 5319 }, { "epoch": 1.0476999575223007, "grad_norm": 2.078125, "learning_rate": 7.267229137566607e-06, "loss": 0.9421, "step": 5320 }, { "epoch": 1.0478998525773968, "grad_norm": 2.03125, "learning_rate": 7.266289851085221e-06, "loss": 0.9005, "step": 5321 }, { "epoch": 1.048099747632493, "grad_norm": 2.140625, "learning_rate": 7.265350463935482e-06, "loss": 1.0251, "step": 5322 }, { "epoch": 1.048299642687589, "grad_norm": 2.234375, "learning_rate": 7.264410976159117e-06, "loss": 1.008, "step": 5323 }, { "epoch": 1.0484995377426851, "grad_norm": 2.078125, "learning_rate": 7.263471387797859e-06, "loss": 1.0063, "step": 5324 }, { "epoch": 1.0486994327977812, "grad_norm": 2.15625, "learning_rate": 7.262531698893443e-06, "loss": 1.0123, "step": 5325 }, { "epoch": 1.0488993278528773, "grad_norm": 2.140625, "learning_rate": 7.261591909487611e-06, "loss": 0.9359, "step": 5326 }, { "epoch": 1.0490992229079734, "grad_norm": 2.109375, "learning_rate": 7.260652019622108e-06, "loss": 0.9853, "step": 5327 }, { "epoch": 1.0492991179630693, "grad_norm": 2.296875, "learning_rate": 7.259712029338682e-06, "loss": 0.9753, "step": 5328 }, { "epoch": 1.0494990130181654, "grad_norm": 2.15625, "learning_rate": 7.25877193867909e-06, "loss": 1.0207, "step": 5329 }, { "epoch": 1.0496989080732615, "grad_norm": 2.109375, "learning_rate": 7.2578317476850915e-06, "loss": 1.0005, "step": 5330 }, { "epoch": 1.0498988031283576, "grad_norm": 2.125, "learning_rate": 7.256891456398446e-06, "loss": 0.9452, "step": 5331 }, { "epoch": 1.0500986981834537, "grad_norm": 2.1875, "learning_rate": 7.2559510648609234e-06, "loss": 1.0279, "step": 5332 }, { "epoch": 1.0502985932385498, "grad_norm": 2.1875, "learning_rate": 7.255010573114296e-06, "loss": 1.002, "step": 5333 }, { "epoch": 1.050498488293646, "grad_norm": 2.0, "learning_rate": 7.254069981200339e-06, "loss": 0.8251, "step": 5334 }, { "epoch": 1.050698383348742, "grad_norm": 2.125, "learning_rate": 7.253129289160835e-06, "loss": 1.0114, "step": 5335 }, { "epoch": 1.0508982784038379, "grad_norm": 2.171875, "learning_rate": 7.252188497037569e-06, "loss": 1.0005, "step": 5336 }, { "epoch": 1.051098173458934, "grad_norm": 2.171875, "learning_rate": 7.251247604872329e-06, "loss": 0.9921, "step": 5337 }, { "epoch": 1.05129806851403, "grad_norm": 2.234375, "learning_rate": 7.250306612706912e-06, "loss": 0.9755, "step": 5338 }, { "epoch": 1.0514979635691262, "grad_norm": 2.0625, "learning_rate": 7.249365520583116e-06, "loss": 0.942, "step": 5339 }, { "epoch": 1.0516978586242223, "grad_norm": 2.09375, "learning_rate": 7.248424328542742e-06, "loss": 1.055, "step": 5340 }, { "epoch": 1.0518977536793184, "grad_norm": 2.140625, "learning_rate": 7.247483036627601e-06, "loss": 1.0509, "step": 5341 }, { "epoch": 1.0520976487344145, "grad_norm": 2.046875, "learning_rate": 7.246541644879502e-06, "loss": 0.9881, "step": 5342 }, { "epoch": 1.0522975437895106, "grad_norm": 2.125, "learning_rate": 7.245600153340264e-06, "loss": 0.9471, "step": 5343 }, { "epoch": 1.0524974388446067, "grad_norm": 2.109375, "learning_rate": 7.244658562051708e-06, "loss": 0.9631, "step": 5344 }, { "epoch": 1.0526973338997028, "grad_norm": 2.1875, "learning_rate": 7.243716871055657e-06, "loss": 0.9688, "step": 5345 }, { "epoch": 1.0528972289547986, "grad_norm": 2.203125, "learning_rate": 7.242775080393942e-06, "loss": 0.9997, "step": 5346 }, { "epoch": 1.0530971240098947, "grad_norm": 2.09375, "learning_rate": 7.241833190108399e-06, "loss": 0.947, "step": 5347 }, { "epoch": 1.0532970190649908, "grad_norm": 2.125, "learning_rate": 7.240891200240864e-06, "loss": 1.0385, "step": 5348 }, { "epoch": 1.053496914120087, "grad_norm": 2.234375, "learning_rate": 7.239949110833182e-06, "loss": 1.0076, "step": 5349 }, { "epoch": 1.053696809175183, "grad_norm": 2.0625, "learning_rate": 7.2390069219272e-06, "loss": 1.033, "step": 5350 }, { "epoch": 1.0538967042302791, "grad_norm": 2.046875, "learning_rate": 7.238064633564769e-06, "loss": 1.0587, "step": 5351 }, { "epoch": 1.0540965992853752, "grad_norm": 2.171875, "learning_rate": 7.2371222457877456e-06, "loss": 1.0285, "step": 5352 }, { "epoch": 1.0542964943404713, "grad_norm": 2.046875, "learning_rate": 7.236179758637991e-06, "loss": 0.9545, "step": 5353 }, { "epoch": 1.0544963893955672, "grad_norm": 2.109375, "learning_rate": 7.2352371721573715e-06, "loss": 1.0356, "step": 5354 }, { "epoch": 1.0546962844506633, "grad_norm": 2.109375, "learning_rate": 7.234294486387754e-06, "loss": 0.9266, "step": 5355 }, { "epoch": 1.0548961795057594, "grad_norm": 2.15625, "learning_rate": 7.233351701371015e-06, "loss": 0.9389, "step": 5356 }, { "epoch": 1.0550960745608555, "grad_norm": 2.09375, "learning_rate": 7.232408817149032e-06, "loss": 0.9307, "step": 5357 }, { "epoch": 1.0552959696159516, "grad_norm": 2.171875, "learning_rate": 7.231465833763687e-06, "loss": 1.0046, "step": 5358 }, { "epoch": 1.0554958646710477, "grad_norm": 2.1875, "learning_rate": 7.230522751256868e-06, "loss": 0.9269, "step": 5359 }, { "epoch": 1.0556957597261438, "grad_norm": 2.0625, "learning_rate": 7.229579569670467e-06, "loss": 0.9892, "step": 5360 }, { "epoch": 1.05589565478124, "grad_norm": 2.046875, "learning_rate": 7.2286362890463805e-06, "loss": 0.9841, "step": 5361 }, { "epoch": 1.056095549836336, "grad_norm": 2.3125, "learning_rate": 7.227692909426507e-06, "loss": 1.094, "step": 5362 }, { "epoch": 1.056295444891432, "grad_norm": 2.203125, "learning_rate": 7.226749430852753e-06, "loss": 0.9837, "step": 5363 }, { "epoch": 1.056495339946528, "grad_norm": 2.015625, "learning_rate": 7.225805853367027e-06, "loss": 0.9187, "step": 5364 }, { "epoch": 1.056695235001624, "grad_norm": 1.9765625, "learning_rate": 7.224862177011241e-06, "loss": 0.9311, "step": 5365 }, { "epoch": 1.0568951300567202, "grad_norm": 2.125, "learning_rate": 7.223918401827318e-06, "loss": 0.9494, "step": 5366 }, { "epoch": 1.0570950251118163, "grad_norm": 2.078125, "learning_rate": 7.222974527857176e-06, "loss": 0.9531, "step": 5367 }, { "epoch": 1.0572949201669124, "grad_norm": 2.125, "learning_rate": 7.222030555142742e-06, "loss": 1.0476, "step": 5368 }, { "epoch": 1.0574948152220085, "grad_norm": 2.15625, "learning_rate": 7.2210864837259474e-06, "loss": 0.9496, "step": 5369 }, { "epoch": 1.0576947102771046, "grad_norm": 2.140625, "learning_rate": 7.22014231364873e-06, "loss": 1.0252, "step": 5370 }, { "epoch": 1.0578946053322007, "grad_norm": 2.1875, "learning_rate": 7.219198044953026e-06, "loss": 0.9492, "step": 5371 }, { "epoch": 1.0580945003872966, "grad_norm": 2.265625, "learning_rate": 7.2182536776807845e-06, "loss": 1.0375, "step": 5372 }, { "epoch": 1.0582943954423927, "grad_norm": 2.125, "learning_rate": 7.217309211873951e-06, "loss": 0.9649, "step": 5373 }, { "epoch": 1.0584942904974888, "grad_norm": 2.171875, "learning_rate": 7.2163646475744775e-06, "loss": 0.9941, "step": 5374 }, { "epoch": 1.0586941855525849, "grad_norm": 2.171875, "learning_rate": 7.215419984824325e-06, "loss": 1.0115, "step": 5375 }, { "epoch": 1.058894080607681, "grad_norm": 2.046875, "learning_rate": 7.214475223665452e-06, "loss": 1.0192, "step": 5376 }, { "epoch": 1.059093975662777, "grad_norm": 2.171875, "learning_rate": 7.213530364139826e-06, "loss": 1.0203, "step": 5377 }, { "epoch": 1.0592938707178732, "grad_norm": 2.140625, "learning_rate": 7.2125854062894184e-06, "loss": 0.9952, "step": 5378 }, { "epoch": 1.0594937657729693, "grad_norm": 2.1875, "learning_rate": 7.211640350156203e-06, "loss": 1.0367, "step": 5379 }, { "epoch": 1.0596936608280654, "grad_norm": 2.15625, "learning_rate": 7.21069519578216e-06, "loss": 0.9191, "step": 5380 }, { "epoch": 1.0598935558831613, "grad_norm": 2.15625, "learning_rate": 7.209749943209273e-06, "loss": 0.983, "step": 5381 }, { "epoch": 1.0600934509382574, "grad_norm": 2.03125, "learning_rate": 7.208804592479528e-06, "loss": 0.9328, "step": 5382 }, { "epoch": 1.0602933459933535, "grad_norm": 2.109375, "learning_rate": 7.207859143634919e-06, "loss": 0.9217, "step": 5383 }, { "epoch": 1.0604932410484496, "grad_norm": 2.15625, "learning_rate": 7.206913596717444e-06, "loss": 1.083, "step": 5384 }, { "epoch": 1.0606931361035457, "grad_norm": 2.1875, "learning_rate": 7.205967951769101e-06, "loss": 1.018, "step": 5385 }, { "epoch": 1.0608930311586418, "grad_norm": 2.046875, "learning_rate": 7.2050222088318996e-06, "loss": 0.9879, "step": 5386 }, { "epoch": 1.0610929262137379, "grad_norm": 2.09375, "learning_rate": 7.204076367947846e-06, "loss": 1.0441, "step": 5387 }, { "epoch": 1.061292821268834, "grad_norm": 2.203125, "learning_rate": 7.203130429158954e-06, "loss": 0.9985, "step": 5388 }, { "epoch": 1.0614927163239298, "grad_norm": 2.03125, "learning_rate": 7.202184392507245e-06, "loss": 0.9014, "step": 5389 }, { "epoch": 1.061692611379026, "grad_norm": 2.046875, "learning_rate": 7.2012382580347405e-06, "loss": 0.9595, "step": 5390 }, { "epoch": 1.061892506434122, "grad_norm": 2.21875, "learning_rate": 7.200292025783467e-06, "loss": 1.0465, "step": 5391 }, { "epoch": 1.0620924014892181, "grad_norm": 2.140625, "learning_rate": 7.199345695795458e-06, "loss": 0.9302, "step": 5392 }, { "epoch": 1.0622922965443142, "grad_norm": 2.21875, "learning_rate": 7.198399268112747e-06, "loss": 0.9104, "step": 5393 }, { "epoch": 1.0624921915994103, "grad_norm": 2.15625, "learning_rate": 7.197452742777376e-06, "loss": 0.9866, "step": 5394 }, { "epoch": 1.0626920866545064, "grad_norm": 2.1875, "learning_rate": 7.196506119831388e-06, "loss": 0.8295, "step": 5395 }, { "epoch": 1.0628919817096025, "grad_norm": 2.1875, "learning_rate": 7.1955593993168335e-06, "loss": 0.979, "step": 5396 }, { "epoch": 1.0630918767646986, "grad_norm": 2.078125, "learning_rate": 7.194612581275765e-06, "loss": 0.9389, "step": 5397 }, { "epoch": 1.0632917718197945, "grad_norm": 2.140625, "learning_rate": 7.1936656657502405e-06, "loss": 0.972, "step": 5398 }, { "epoch": 1.0634916668748906, "grad_norm": 2.234375, "learning_rate": 7.19271865278232e-06, "loss": 0.9406, "step": 5399 }, { "epoch": 1.0636915619299867, "grad_norm": 2.171875, "learning_rate": 7.1917715424140736e-06, "loss": 1.0155, "step": 5400 }, { "epoch": 1.0638914569850828, "grad_norm": 2.15625, "learning_rate": 7.190824334687567e-06, "loss": 0.9612, "step": 5401 }, { "epoch": 1.064091352040179, "grad_norm": 2.234375, "learning_rate": 7.1898770296448775e-06, "loss": 0.9834, "step": 5402 }, { "epoch": 1.064291247095275, "grad_norm": 2.140625, "learning_rate": 7.188929627328085e-06, "loss": 0.9918, "step": 5403 }, { "epoch": 1.064491142150371, "grad_norm": 2.171875, "learning_rate": 7.187982127779272e-06, "loss": 0.8852, "step": 5404 }, { "epoch": 1.0646910372054672, "grad_norm": 2.140625, "learning_rate": 7.187034531040526e-06, "loss": 0.9535, "step": 5405 }, { "epoch": 1.0648909322605633, "grad_norm": 2.140625, "learning_rate": 7.186086837153941e-06, "loss": 0.9757, "step": 5406 }, { "epoch": 1.0650908273156592, "grad_norm": 2.15625, "learning_rate": 7.185139046161611e-06, "loss": 0.9962, "step": 5407 }, { "epoch": 1.0652907223707553, "grad_norm": 2.1875, "learning_rate": 7.184191158105639e-06, "loss": 1.019, "step": 5408 }, { "epoch": 1.0654906174258514, "grad_norm": 2.09375, "learning_rate": 7.183243173028128e-06, "loss": 0.9948, "step": 5409 }, { "epoch": 1.0656905124809475, "grad_norm": 2.140625, "learning_rate": 7.182295090971189e-06, "loss": 0.947, "step": 5410 }, { "epoch": 1.0658904075360436, "grad_norm": 2.234375, "learning_rate": 7.181346911976935e-06, "loss": 0.9602, "step": 5411 }, { "epoch": 1.0660903025911397, "grad_norm": 2.078125, "learning_rate": 7.180398636087485e-06, "loss": 0.9464, "step": 5412 }, { "epoch": 1.0662901976462358, "grad_norm": 2.21875, "learning_rate": 7.179450263344959e-06, "loss": 1.0616, "step": 5413 }, { "epoch": 1.0664900927013319, "grad_norm": 2.140625, "learning_rate": 7.178501793791487e-06, "loss": 0.9518, "step": 5414 }, { "epoch": 1.066689987756428, "grad_norm": 1.953125, "learning_rate": 7.1775532274691965e-06, "loss": 0.895, "step": 5415 }, { "epoch": 1.0668898828115239, "grad_norm": 2.078125, "learning_rate": 7.176604564420224e-06, "loss": 0.9706, "step": 5416 }, { "epoch": 1.06708977786662, "grad_norm": 2.140625, "learning_rate": 7.17565580468671e-06, "loss": 0.976, "step": 5417 }, { "epoch": 1.067289672921716, "grad_norm": 2.140625, "learning_rate": 7.174706948310797e-06, "loss": 0.9806, "step": 5418 }, { "epoch": 1.0674895679768122, "grad_norm": 2.15625, "learning_rate": 7.173757995334634e-06, "loss": 1.0118, "step": 5419 }, { "epoch": 1.0676894630319083, "grad_norm": 2.03125, "learning_rate": 7.172808945800372e-06, "loss": 0.9701, "step": 5420 }, { "epoch": 1.0678893580870044, "grad_norm": 2.015625, "learning_rate": 7.171859799750169e-06, "loss": 0.9854, "step": 5421 }, { "epoch": 1.0680892531421005, "grad_norm": 2.109375, "learning_rate": 7.170910557226186e-06, "loss": 0.9865, "step": 5422 }, { "epoch": 1.0682891481971966, "grad_norm": 2.15625, "learning_rate": 7.1699612182705894e-06, "loss": 1.0558, "step": 5423 }, { "epoch": 1.0684890432522924, "grad_norm": 2.046875, "learning_rate": 7.169011782925545e-06, "loss": 0.9415, "step": 5424 }, { "epoch": 1.0686889383073885, "grad_norm": 2.109375, "learning_rate": 7.16806225123323e-06, "loss": 1.0717, "step": 5425 }, { "epoch": 1.0688888333624846, "grad_norm": 2.0625, "learning_rate": 7.167112623235821e-06, "loss": 0.8841, "step": 5426 }, { "epoch": 1.0690887284175807, "grad_norm": 2.109375, "learning_rate": 7.1661628989755016e-06, "loss": 0.9353, "step": 5427 }, { "epoch": 1.0692886234726768, "grad_norm": 2.25, "learning_rate": 7.165213078494456e-06, "loss": 1.0364, "step": 5428 }, { "epoch": 1.069488518527773, "grad_norm": 2.28125, "learning_rate": 7.164263161834879e-06, "loss": 0.9237, "step": 5429 }, { "epoch": 1.069688413582869, "grad_norm": 2.109375, "learning_rate": 7.163313149038962e-06, "loss": 0.9851, "step": 5430 }, { "epoch": 1.0698883086379651, "grad_norm": 2.3125, "learning_rate": 7.162363040148905e-06, "loss": 1.0218, "step": 5431 }, { "epoch": 1.0700882036930612, "grad_norm": 2.078125, "learning_rate": 7.161412835206915e-06, "loss": 0.9518, "step": 5432 }, { "epoch": 1.0702880987481573, "grad_norm": 2.28125, "learning_rate": 7.160462534255195e-06, "loss": 1.0077, "step": 5433 }, { "epoch": 1.0704879938032532, "grad_norm": 2.109375, "learning_rate": 7.159512137335962e-06, "loss": 0.9244, "step": 5434 }, { "epoch": 1.0706878888583493, "grad_norm": 2.09375, "learning_rate": 7.15856164449143e-06, "loss": 1.0028, "step": 5435 }, { "epoch": 1.0708877839134454, "grad_norm": 2.125, "learning_rate": 7.15761105576382e-06, "loss": 1.0343, "step": 5436 }, { "epoch": 1.0710876789685415, "grad_norm": 2.0625, "learning_rate": 7.156660371195357e-06, "loss": 0.9361, "step": 5437 }, { "epoch": 1.0712875740236376, "grad_norm": 2.21875, "learning_rate": 7.155709590828271e-06, "loss": 0.986, "step": 5438 }, { "epoch": 1.0714874690787337, "grad_norm": 2.15625, "learning_rate": 7.154758714704797e-06, "loss": 1.0873, "step": 5439 }, { "epoch": 1.0716873641338298, "grad_norm": 2.234375, "learning_rate": 7.153807742867169e-06, "loss": 0.9848, "step": 5440 }, { "epoch": 1.071887259188926, "grad_norm": 2.21875, "learning_rate": 7.152856675357631e-06, "loss": 0.9704, "step": 5441 }, { "epoch": 1.0720871542440218, "grad_norm": 2.15625, "learning_rate": 7.15190551221843e-06, "loss": 0.9277, "step": 5442 }, { "epoch": 1.072287049299118, "grad_norm": 2.25, "learning_rate": 7.150954253491818e-06, "loss": 0.9952, "step": 5443 }, { "epoch": 1.072486944354214, "grad_norm": 2.234375, "learning_rate": 7.1500028992200445e-06, "loss": 0.9912, "step": 5444 }, { "epoch": 1.07268683940931, "grad_norm": 2.1875, "learning_rate": 7.1490514494453736e-06, "loss": 0.964, "step": 5445 }, { "epoch": 1.0728867344644062, "grad_norm": 2.203125, "learning_rate": 7.148099904210067e-06, "loss": 1.1416, "step": 5446 }, { "epoch": 1.0730866295195023, "grad_norm": 2.046875, "learning_rate": 7.14714826355639e-06, "loss": 0.927, "step": 5447 }, { "epoch": 1.0732865245745984, "grad_norm": 2.140625, "learning_rate": 7.146196527526617e-06, "loss": 0.9822, "step": 5448 }, { "epoch": 1.0734864196296945, "grad_norm": 2.21875, "learning_rate": 7.145244696163025e-06, "loss": 1.0945, "step": 5449 }, { "epoch": 1.0736863146847906, "grad_norm": 2.078125, "learning_rate": 7.144292769507891e-06, "loss": 0.9773, "step": 5450 }, { "epoch": 1.0738862097398865, "grad_norm": 2.046875, "learning_rate": 7.143340747603503e-06, "loss": 0.9519, "step": 5451 }, { "epoch": 1.0740861047949826, "grad_norm": 2.171875, "learning_rate": 7.142388630492147e-06, "loss": 0.9781, "step": 5452 }, { "epoch": 1.0742859998500787, "grad_norm": 2.09375, "learning_rate": 7.141436418216118e-06, "loss": 0.954, "step": 5453 }, { "epoch": 1.0744858949051748, "grad_norm": 1.984375, "learning_rate": 7.14048411081771e-06, "loss": 0.8944, "step": 5454 }, { "epoch": 1.0746857899602709, "grad_norm": 2.203125, "learning_rate": 7.139531708339227e-06, "loss": 0.9898, "step": 5455 }, { "epoch": 1.074885685015367, "grad_norm": 2.046875, "learning_rate": 7.138579210822976e-06, "loss": 0.9674, "step": 5456 }, { "epoch": 1.075085580070463, "grad_norm": 2.15625, "learning_rate": 7.137626618311262e-06, "loss": 0.9289, "step": 5457 }, { "epoch": 1.0752854751255592, "grad_norm": 2.09375, "learning_rate": 7.136673930846404e-06, "loss": 0.9659, "step": 5458 }, { "epoch": 1.075485370180655, "grad_norm": 2.3125, "learning_rate": 7.135721148470718e-06, "loss": 1.0642, "step": 5459 }, { "epoch": 1.0756852652357511, "grad_norm": 2.1875, "learning_rate": 7.134768271226525e-06, "loss": 1.0505, "step": 5460 }, { "epoch": 1.0758851602908472, "grad_norm": 2.140625, "learning_rate": 7.133815299156155e-06, "loss": 0.9973, "step": 5461 }, { "epoch": 1.0760850553459433, "grad_norm": 2.0625, "learning_rate": 7.132862232301937e-06, "loss": 0.9298, "step": 5462 }, { "epoch": 1.0762849504010394, "grad_norm": 2.078125, "learning_rate": 7.1319090707062065e-06, "loss": 0.9508, "step": 5463 }, { "epoch": 1.0764848454561355, "grad_norm": 2.078125, "learning_rate": 7.130955814411302e-06, "loss": 1.013, "step": 5464 }, { "epoch": 1.0766847405112316, "grad_norm": 2.1875, "learning_rate": 7.130002463459569e-06, "loss": 1.0727, "step": 5465 }, { "epoch": 1.0768846355663277, "grad_norm": 2.125, "learning_rate": 7.129049017893352e-06, "loss": 0.9824, "step": 5466 }, { "epoch": 1.0770845306214238, "grad_norm": 2.078125, "learning_rate": 7.128095477755006e-06, "loss": 0.8751, "step": 5467 }, { "epoch": 1.07728442567652, "grad_norm": 2.0625, "learning_rate": 7.127141843086888e-06, "loss": 0.9525, "step": 5468 }, { "epoch": 1.0774843207316158, "grad_norm": 2.0625, "learning_rate": 7.126188113931353e-06, "loss": 0.9577, "step": 5469 }, { "epoch": 1.077684215786712, "grad_norm": 2.171875, "learning_rate": 7.125234290330774e-06, "loss": 1.0297, "step": 5470 }, { "epoch": 1.077884110841808, "grad_norm": 2.09375, "learning_rate": 7.124280372327511e-06, "loss": 0.9516, "step": 5471 }, { "epoch": 1.0780840058969041, "grad_norm": 2.078125, "learning_rate": 7.123326359963941e-06, "loss": 0.978, "step": 5472 }, { "epoch": 1.0782839009520002, "grad_norm": 2.078125, "learning_rate": 7.122372253282442e-06, "loss": 0.924, "step": 5473 }, { "epoch": 1.0784837960070963, "grad_norm": 2.234375, "learning_rate": 7.121418052325395e-06, "loss": 0.9747, "step": 5474 }, { "epoch": 1.0786836910621924, "grad_norm": 2.125, "learning_rate": 7.1204637571351835e-06, "loss": 0.9425, "step": 5475 }, { "epoch": 1.0788835861172885, "grad_norm": 2.125, "learning_rate": 7.119509367754198e-06, "loss": 0.8835, "step": 5476 }, { "epoch": 1.0790834811723844, "grad_norm": 2.125, "learning_rate": 7.118554884224833e-06, "loss": 0.9955, "step": 5477 }, { "epoch": 1.0792833762274805, "grad_norm": 2.15625, "learning_rate": 7.117600306589486e-06, "loss": 0.9089, "step": 5478 }, { "epoch": 1.0794832712825766, "grad_norm": 2.09375, "learning_rate": 7.11664563489056e-06, "loss": 0.9591, "step": 5479 }, { "epoch": 1.0796831663376727, "grad_norm": 2.25, "learning_rate": 7.1156908691704604e-06, "loss": 1.0151, "step": 5480 }, { "epoch": 1.0798830613927688, "grad_norm": 2.265625, "learning_rate": 7.114736009471599e-06, "loss": 1.1079, "step": 5481 }, { "epoch": 1.080082956447865, "grad_norm": 2.15625, "learning_rate": 7.113781055836391e-06, "loss": 0.9985, "step": 5482 }, { "epoch": 1.080282851502961, "grad_norm": 2.109375, "learning_rate": 7.112826008307252e-06, "loss": 1.023, "step": 5483 }, { "epoch": 1.080482746558057, "grad_norm": 2.171875, "learning_rate": 7.111870866926609e-06, "loss": 1.0379, "step": 5484 }, { "epoch": 1.0806826416131532, "grad_norm": 2.09375, "learning_rate": 7.110915631736887e-06, "loss": 0.9583, "step": 5485 }, { "epoch": 1.080882536668249, "grad_norm": 2.171875, "learning_rate": 7.109960302780518e-06, "loss": 1.0009, "step": 5486 }, { "epoch": 1.0810824317233452, "grad_norm": 2.078125, "learning_rate": 7.109004880099938e-06, "loss": 0.9799, "step": 5487 }, { "epoch": 1.0812823267784413, "grad_norm": 2.140625, "learning_rate": 7.108049363737586e-06, "loss": 0.9076, "step": 5488 }, { "epoch": 1.0814822218335374, "grad_norm": 2.21875, "learning_rate": 7.107093753735907e-06, "loss": 0.9846, "step": 5489 }, { "epoch": 1.0816821168886335, "grad_norm": 2.109375, "learning_rate": 7.106138050137349e-06, "loss": 1.0191, "step": 5490 }, { "epoch": 1.0818820119437296, "grad_norm": 2.140625, "learning_rate": 7.105182252984363e-06, "loss": 0.9635, "step": 5491 }, { "epoch": 1.0820819069988257, "grad_norm": 2.140625, "learning_rate": 7.104226362319405e-06, "loss": 0.8561, "step": 5492 }, { "epoch": 1.0822818020539218, "grad_norm": 2.0625, "learning_rate": 7.103270378184939e-06, "loss": 0.9057, "step": 5493 }, { "epoch": 1.0824816971090176, "grad_norm": 2.0625, "learning_rate": 7.102314300623425e-06, "loss": 0.9439, "step": 5494 }, { "epoch": 1.0826815921641137, "grad_norm": 2.046875, "learning_rate": 7.101358129677336e-06, "loss": 0.9405, "step": 5495 }, { "epoch": 1.0828814872192098, "grad_norm": 2.078125, "learning_rate": 7.100401865389144e-06, "loss": 0.9559, "step": 5496 }, { "epoch": 1.083081382274306, "grad_norm": 2.046875, "learning_rate": 7.099445507801324e-06, "loss": 0.9675, "step": 5497 }, { "epoch": 1.083281277329402, "grad_norm": 2.25, "learning_rate": 7.0984890569563595e-06, "loss": 0.9406, "step": 5498 }, { "epoch": 1.0834811723844981, "grad_norm": 2.1875, "learning_rate": 7.097532512896734e-06, "loss": 1.0121, "step": 5499 }, { "epoch": 1.0836810674395942, "grad_norm": 2.046875, "learning_rate": 7.096575875664939e-06, "loss": 0.9781, "step": 5500 }, { "epoch": 1.0838809624946903, "grad_norm": 2.359375, "learning_rate": 7.095619145303469e-06, "loss": 1.0666, "step": 5501 }, { "epoch": 1.0840808575497864, "grad_norm": 2.21875, "learning_rate": 7.094662321854818e-06, "loss": 0.9576, "step": 5502 }, { "epoch": 1.0842807526048825, "grad_norm": 2.40625, "learning_rate": 7.09370540536149e-06, "loss": 0.9628, "step": 5503 }, { "epoch": 1.0844806476599784, "grad_norm": 2.109375, "learning_rate": 7.092748395865995e-06, "loss": 0.8464, "step": 5504 }, { "epoch": 1.0846805427150745, "grad_norm": 2.453125, "learning_rate": 7.091791293410838e-06, "loss": 1.0179, "step": 5505 }, { "epoch": 1.0848804377701706, "grad_norm": 2.296875, "learning_rate": 7.090834098038535e-06, "loss": 1.0201, "step": 5506 }, { "epoch": 1.0850803328252667, "grad_norm": 2.09375, "learning_rate": 7.0898768097916045e-06, "loss": 0.9504, "step": 5507 }, { "epoch": 1.0852802278803628, "grad_norm": 2.015625, "learning_rate": 7.08891942871257e-06, "loss": 0.8916, "step": 5508 }, { "epoch": 1.085480122935459, "grad_norm": 2.0625, "learning_rate": 7.087961954843956e-06, "loss": 0.9202, "step": 5509 }, { "epoch": 1.085680017990555, "grad_norm": 2.078125, "learning_rate": 7.087004388228297e-06, "loss": 0.9722, "step": 5510 }, { "epoch": 1.0858799130456511, "grad_norm": 2.09375, "learning_rate": 7.086046728908125e-06, "loss": 1.0817, "step": 5511 }, { "epoch": 1.086079808100747, "grad_norm": 2.078125, "learning_rate": 7.085088976925979e-06, "loss": 0.8969, "step": 5512 }, { "epoch": 1.086279703155843, "grad_norm": 2.109375, "learning_rate": 7.084131132324405e-06, "loss": 0.9292, "step": 5513 }, { "epoch": 1.0864795982109392, "grad_norm": 2.09375, "learning_rate": 7.083173195145947e-06, "loss": 0.8921, "step": 5514 }, { "epoch": 1.0866794932660353, "grad_norm": 2.125, "learning_rate": 7.08221516543316e-06, "loss": 0.9571, "step": 5515 }, { "epoch": 1.0868793883211314, "grad_norm": 2.125, "learning_rate": 7.081257043228597e-06, "loss": 1.0, "step": 5516 }, { "epoch": 1.0870792833762275, "grad_norm": 2.265625, "learning_rate": 7.080298828574818e-06, "loss": 0.9582, "step": 5517 }, { "epoch": 1.0872791784313236, "grad_norm": 2.0625, "learning_rate": 7.079340521514389e-06, "loss": 0.9338, "step": 5518 }, { "epoch": 1.0874790734864197, "grad_norm": 2.125, "learning_rate": 7.078382122089873e-06, "loss": 1.0542, "step": 5519 }, { "epoch": 1.0876789685415158, "grad_norm": 2.09375, "learning_rate": 7.0774236303438485e-06, "loss": 0.9537, "step": 5520 }, { "epoch": 1.0878788635966117, "grad_norm": 2.25, "learning_rate": 7.076465046318886e-06, "loss": 0.903, "step": 5521 }, { "epoch": 1.0880787586517078, "grad_norm": 2.21875, "learning_rate": 7.075506370057569e-06, "loss": 0.9471, "step": 5522 }, { "epoch": 1.0882786537068039, "grad_norm": 2.078125, "learning_rate": 7.074547601602479e-06, "loss": 0.96, "step": 5523 }, { "epoch": 1.0884785487619, "grad_norm": 1.9765625, "learning_rate": 7.073588740996208e-06, "loss": 0.9324, "step": 5524 }, { "epoch": 1.088678443816996, "grad_norm": 2.125, "learning_rate": 7.072629788281345e-06, "loss": 1.002, "step": 5525 }, { "epoch": 1.0888783388720922, "grad_norm": 2.046875, "learning_rate": 7.07167074350049e-06, "loss": 0.9232, "step": 5526 }, { "epoch": 1.0890782339271883, "grad_norm": 2.0625, "learning_rate": 7.0707116066962415e-06, "loss": 0.9671, "step": 5527 }, { "epoch": 1.0892781289822844, "grad_norm": 2.21875, "learning_rate": 7.069752377911203e-06, "loss": 0.9804, "step": 5528 }, { "epoch": 1.0894780240373805, "grad_norm": 2.296875, "learning_rate": 7.068793057187986e-06, "loss": 0.8772, "step": 5529 }, { "epoch": 1.0896779190924764, "grad_norm": 2.203125, "learning_rate": 7.067833644569202e-06, "loss": 1.0187, "step": 5530 }, { "epoch": 1.0898778141475725, "grad_norm": 2.15625, "learning_rate": 7.066874140097468e-06, "loss": 0.9763, "step": 5531 }, { "epoch": 1.0900777092026686, "grad_norm": 2.125, "learning_rate": 7.065914543815408e-06, "loss": 0.9646, "step": 5532 }, { "epoch": 1.0902776042577647, "grad_norm": 2.15625, "learning_rate": 7.064954855765641e-06, "loss": 0.9707, "step": 5533 }, { "epoch": 1.0904774993128608, "grad_norm": 2.03125, "learning_rate": 7.063995075990801e-06, "loss": 1.0343, "step": 5534 }, { "epoch": 1.0906773943679569, "grad_norm": 2.046875, "learning_rate": 7.063035204533522e-06, "loss": 0.9079, "step": 5535 }, { "epoch": 1.090877289423053, "grad_norm": 2.0, "learning_rate": 7.062075241436439e-06, "loss": 0.9172, "step": 5536 }, { "epoch": 1.091077184478149, "grad_norm": 2.296875, "learning_rate": 7.061115186742192e-06, "loss": 1.0221, "step": 5537 }, { "epoch": 1.0912770795332452, "grad_norm": 2.03125, "learning_rate": 7.060155040493431e-06, "loss": 0.9661, "step": 5538 }, { "epoch": 1.091476974588341, "grad_norm": 2.171875, "learning_rate": 7.059194802732802e-06, "loss": 1.0385, "step": 5539 }, { "epoch": 1.0916768696434371, "grad_norm": 2.203125, "learning_rate": 7.0582344735029585e-06, "loss": 0.9009, "step": 5540 }, { "epoch": 1.0918767646985332, "grad_norm": 2.15625, "learning_rate": 7.0572740528465625e-06, "loss": 0.9973, "step": 5541 }, { "epoch": 1.0920766597536293, "grad_norm": 2.171875, "learning_rate": 7.0563135408062696e-06, "loss": 1.022, "step": 5542 }, { "epoch": 1.0922765548087254, "grad_norm": 2.140625, "learning_rate": 7.055352937424751e-06, "loss": 0.947, "step": 5543 }, { "epoch": 1.0924764498638215, "grad_norm": 2.046875, "learning_rate": 7.0543922427446734e-06, "loss": 0.9117, "step": 5544 }, { "epoch": 1.0926763449189176, "grad_norm": 2.140625, "learning_rate": 7.053431456808712e-06, "loss": 0.9254, "step": 5545 }, { "epoch": 1.0928762399740137, "grad_norm": 2.15625, "learning_rate": 7.052470579659545e-06, "loss": 1.0449, "step": 5546 }, { "epoch": 1.0930761350291096, "grad_norm": 2.109375, "learning_rate": 7.051509611339853e-06, "loss": 0.9985, "step": 5547 }, { "epoch": 1.0932760300842057, "grad_norm": 2.125, "learning_rate": 7.050548551892325e-06, "loss": 0.9993, "step": 5548 }, { "epoch": 1.0934759251393018, "grad_norm": 2.65625, "learning_rate": 7.049587401359647e-06, "loss": 0.9773, "step": 5549 }, { "epoch": 1.093675820194398, "grad_norm": 2.171875, "learning_rate": 7.048626159784517e-06, "loss": 1.0189, "step": 5550 }, { "epoch": 1.093875715249494, "grad_norm": 2.109375, "learning_rate": 7.047664827209633e-06, "loss": 0.9613, "step": 5551 }, { "epoch": 1.09407561030459, "grad_norm": 2.125, "learning_rate": 7.0467034036776945e-06, "loss": 0.938, "step": 5552 }, { "epoch": 1.0942755053596862, "grad_norm": 2.09375, "learning_rate": 7.04574188923141e-06, "loss": 0.9479, "step": 5553 }, { "epoch": 1.0944754004147823, "grad_norm": 2.203125, "learning_rate": 7.044780283913488e-06, "loss": 1.0025, "step": 5554 }, { "epoch": 1.0946752954698784, "grad_norm": 2.203125, "learning_rate": 7.043818587766645e-06, "loss": 1.0156, "step": 5555 }, { "epoch": 1.0948751905249743, "grad_norm": 2.140625, "learning_rate": 7.0428568008336e-06, "loss": 1.1055, "step": 5556 }, { "epoch": 1.0950750855800704, "grad_norm": 2.375, "learning_rate": 7.041894923157071e-06, "loss": 1.0323, "step": 5557 }, { "epoch": 1.0952749806351665, "grad_norm": 2.15625, "learning_rate": 7.04093295477979e-06, "loss": 1.035, "step": 5558 }, { "epoch": 1.0954748756902626, "grad_norm": 2.09375, "learning_rate": 7.039970895744485e-06, "loss": 1.0006, "step": 5559 }, { "epoch": 1.0956747707453587, "grad_norm": 2.140625, "learning_rate": 7.039008746093889e-06, "loss": 1.0166, "step": 5560 }, { "epoch": 1.0958746658004548, "grad_norm": 2.140625, "learning_rate": 7.038046505870744e-06, "loss": 0.9481, "step": 5561 }, { "epoch": 1.0960745608555509, "grad_norm": 2.140625, "learning_rate": 7.03708417511779e-06, "loss": 0.9159, "step": 5562 }, { "epoch": 1.096274455910647, "grad_norm": 2.109375, "learning_rate": 7.036121753877776e-06, "loss": 1.0279, "step": 5563 }, { "epoch": 1.096474350965743, "grad_norm": 2.15625, "learning_rate": 7.035159242193449e-06, "loss": 1.049, "step": 5564 }, { "epoch": 1.096674246020839, "grad_norm": 2.15625, "learning_rate": 7.0341966401075664e-06, "loss": 0.9342, "step": 5565 }, { "epoch": 1.096874141075935, "grad_norm": 2.0625, "learning_rate": 7.033233947662887e-06, "loss": 0.8556, "step": 5566 }, { "epoch": 1.0970740361310312, "grad_norm": 2.09375, "learning_rate": 7.0322711649021735e-06, "loss": 0.9793, "step": 5567 }, { "epoch": 1.0972739311861273, "grad_norm": 2.09375, "learning_rate": 7.031308291868191e-06, "loss": 1.0418, "step": 5568 }, { "epoch": 1.0974738262412234, "grad_norm": 2.09375, "learning_rate": 7.030345328603711e-06, "loss": 0.9986, "step": 5569 }, { "epoch": 1.0976737212963195, "grad_norm": 2.09375, "learning_rate": 7.029382275151508e-06, "loss": 0.9284, "step": 5570 }, { "epoch": 1.0978736163514156, "grad_norm": 2.125, "learning_rate": 7.0284191315543606e-06, "loss": 1.0405, "step": 5571 }, { "epoch": 1.0980735114065117, "grad_norm": 2.21875, "learning_rate": 7.027455897855053e-06, "loss": 1.0036, "step": 5572 }, { "epoch": 1.0982734064616078, "grad_norm": 2.09375, "learning_rate": 7.026492574096371e-06, "loss": 0.999, "step": 5573 }, { "epoch": 1.0984733015167036, "grad_norm": 2.171875, "learning_rate": 7.025529160321107e-06, "loss": 0.9254, "step": 5574 }, { "epoch": 1.0986731965717997, "grad_norm": 2.09375, "learning_rate": 7.024565656572051e-06, "loss": 1.0104, "step": 5575 }, { "epoch": 1.0988730916268958, "grad_norm": 2.203125, "learning_rate": 7.023602062892005e-06, "loss": 1.0095, "step": 5576 }, { "epoch": 1.099072986681992, "grad_norm": 2.125, "learning_rate": 7.022638379323774e-06, "loss": 0.9809, "step": 5577 }, { "epoch": 1.099272881737088, "grad_norm": 2.125, "learning_rate": 7.021674605910161e-06, "loss": 0.9851, "step": 5578 }, { "epoch": 1.0994727767921841, "grad_norm": 2.234375, "learning_rate": 7.0207107426939755e-06, "loss": 0.9673, "step": 5579 }, { "epoch": 1.0996726718472802, "grad_norm": 2.203125, "learning_rate": 7.019746789718038e-06, "loss": 0.9563, "step": 5580 }, { "epoch": 1.0998725669023763, "grad_norm": 2.109375, "learning_rate": 7.018782747025161e-06, "loss": 0.9989, "step": 5581 }, { "epoch": 1.1000724619574722, "grad_norm": 2.171875, "learning_rate": 7.0178186146581725e-06, "loss": 0.9575, "step": 5582 }, { "epoch": 1.1002723570125683, "grad_norm": 1.9609375, "learning_rate": 7.0168543926598965e-06, "loss": 0.9197, "step": 5583 }, { "epoch": 1.1004722520676644, "grad_norm": 2.21875, "learning_rate": 7.015890081073162e-06, "loss": 0.9936, "step": 5584 }, { "epoch": 1.1006721471227605, "grad_norm": 2.0, "learning_rate": 7.014925679940807e-06, "loss": 0.8853, "step": 5585 }, { "epoch": 1.1008720421778566, "grad_norm": 2.15625, "learning_rate": 7.013961189305668e-06, "loss": 1.0785, "step": 5586 }, { "epoch": 1.1010719372329527, "grad_norm": 2.078125, "learning_rate": 7.012996609210587e-06, "loss": 0.9489, "step": 5587 }, { "epoch": 1.1012718322880488, "grad_norm": 2.09375, "learning_rate": 7.012031939698414e-06, "loss": 0.9351, "step": 5588 }, { "epoch": 1.101471727343145, "grad_norm": 2.234375, "learning_rate": 7.011067180811994e-06, "loss": 0.9849, "step": 5589 }, { "epoch": 1.101671622398241, "grad_norm": 2.1875, "learning_rate": 7.010102332594186e-06, "loss": 1.059, "step": 5590 }, { "epoch": 1.1018715174533371, "grad_norm": 2.234375, "learning_rate": 7.009137395087848e-06, "loss": 1.0314, "step": 5591 }, { "epoch": 1.102071412508433, "grad_norm": 2.203125, "learning_rate": 7.008172368335842e-06, "loss": 0.8892, "step": 5592 }, { "epoch": 1.102271307563529, "grad_norm": 2.203125, "learning_rate": 7.007207252381032e-06, "loss": 0.9998, "step": 5593 }, { "epoch": 1.1024712026186252, "grad_norm": 2.25, "learning_rate": 7.006242047266292e-06, "loss": 0.9962, "step": 5594 }, { "epoch": 1.1026710976737213, "grad_norm": 2.3125, "learning_rate": 7.005276753034492e-06, "loss": 0.9656, "step": 5595 }, { "epoch": 1.1028709927288174, "grad_norm": 2.109375, "learning_rate": 7.004311369728514e-06, "loss": 1.0528, "step": 5596 }, { "epoch": 1.1030708877839135, "grad_norm": 2.1875, "learning_rate": 7.003345897391241e-06, "loss": 0.94, "step": 5597 }, { "epoch": 1.1032707828390096, "grad_norm": 2.25, "learning_rate": 7.002380336065555e-06, "loss": 1.0038, "step": 5598 }, { "epoch": 1.1034706778941057, "grad_norm": 2.09375, "learning_rate": 7.0014146857943486e-06, "loss": 0.9046, "step": 5599 }, { "epoch": 1.1036705729492016, "grad_norm": 2.15625, "learning_rate": 7.000448946620517e-06, "loss": 1.0184, "step": 5600 }, { "epoch": 1.1038704680042977, "grad_norm": 2.0625, "learning_rate": 6.999483118586955e-06, "loss": 0.9515, "step": 5601 }, { "epoch": 1.1040703630593938, "grad_norm": 2.046875, "learning_rate": 6.998517201736566e-06, "loss": 0.9786, "step": 5602 }, { "epoch": 1.1042702581144899, "grad_norm": 2.28125, "learning_rate": 6.997551196112259e-06, "loss": 1.0022, "step": 5603 }, { "epoch": 1.104470153169586, "grad_norm": 2.15625, "learning_rate": 6.996585101756938e-06, "loss": 0.9757, "step": 5604 }, { "epoch": 1.104670048224682, "grad_norm": 2.015625, "learning_rate": 6.9956189187135226e-06, "loss": 0.8901, "step": 5605 }, { "epoch": 1.1048699432797782, "grad_norm": 2.078125, "learning_rate": 6.994652647024927e-06, "loss": 0.9721, "step": 5606 }, { "epoch": 1.1050698383348743, "grad_norm": 2.046875, "learning_rate": 6.9936862867340736e-06, "loss": 1.022, "step": 5607 }, { "epoch": 1.1052697333899704, "grad_norm": 2.140625, "learning_rate": 6.99271983788389e-06, "loss": 1.0285, "step": 5608 }, { "epoch": 1.1054696284450662, "grad_norm": 2.1875, "learning_rate": 6.991753300517302e-06, "loss": 1.0873, "step": 5609 }, { "epoch": 1.1056695235001623, "grad_norm": 2.09375, "learning_rate": 6.990786674677246e-06, "loss": 0.9022, "step": 5610 }, { "epoch": 1.1058694185552584, "grad_norm": 1.96875, "learning_rate": 6.989819960406659e-06, "loss": 0.8544, "step": 5611 }, { "epoch": 1.1060693136103545, "grad_norm": 2.046875, "learning_rate": 6.988853157748479e-06, "loss": 0.9286, "step": 5612 }, { "epoch": 1.1062692086654506, "grad_norm": 2.1875, "learning_rate": 6.987886266745658e-06, "loss": 0.9779, "step": 5613 }, { "epoch": 1.1064691037205467, "grad_norm": 2.125, "learning_rate": 6.986919287441141e-06, "loss": 0.8984, "step": 5614 }, { "epoch": 1.1066689987756428, "grad_norm": 2.09375, "learning_rate": 6.985952219877879e-06, "loss": 0.9477, "step": 5615 }, { "epoch": 1.106868893830739, "grad_norm": 2.109375, "learning_rate": 6.984985064098833e-06, "loss": 1.0035, "step": 5616 }, { "epoch": 1.1070687888858348, "grad_norm": 2.078125, "learning_rate": 6.984017820146962e-06, "loss": 0.8997, "step": 5617 }, { "epoch": 1.107268683940931, "grad_norm": 2.140625, "learning_rate": 6.983050488065231e-06, "loss": 1.0193, "step": 5618 }, { "epoch": 1.107468578996027, "grad_norm": 1.9921875, "learning_rate": 6.98208306789661e-06, "loss": 0.9445, "step": 5619 }, { "epoch": 1.1076684740511231, "grad_norm": 2.1875, "learning_rate": 6.98111555968407e-06, "loss": 0.9414, "step": 5620 }, { "epoch": 1.1078683691062192, "grad_norm": 2.296875, "learning_rate": 6.9801479634705885e-06, "loss": 1.0249, "step": 5621 }, { "epoch": 1.1080682641613153, "grad_norm": 2.046875, "learning_rate": 6.979180279299147e-06, "loss": 0.9569, "step": 5622 }, { "epoch": 1.1082681592164114, "grad_norm": 2.078125, "learning_rate": 6.978212507212727e-06, "loss": 1.0551, "step": 5623 }, { "epoch": 1.1084680542715075, "grad_norm": 2.171875, "learning_rate": 6.97724464725432e-06, "loss": 0.875, "step": 5624 }, { "epoch": 1.1086679493266036, "grad_norm": 2.078125, "learning_rate": 6.9762766994669175e-06, "loss": 0.9271, "step": 5625 }, { "epoch": 1.1088678443816997, "grad_norm": 2.078125, "learning_rate": 6.9753086638935144e-06, "loss": 0.9936, "step": 5626 }, { "epoch": 1.1090677394367956, "grad_norm": 2.0625, "learning_rate": 6.974340540577111e-06, "loss": 0.9048, "step": 5627 }, { "epoch": 1.1092676344918917, "grad_norm": 2.234375, "learning_rate": 6.973372329560713e-06, "loss": 0.9632, "step": 5628 }, { "epoch": 1.1094675295469878, "grad_norm": 2.015625, "learning_rate": 6.972404030887325e-06, "loss": 0.8766, "step": 5629 }, { "epoch": 1.109667424602084, "grad_norm": 2.125, "learning_rate": 6.971435644599961e-06, "loss": 0.8904, "step": 5630 }, { "epoch": 1.10986731965718, "grad_norm": 2.09375, "learning_rate": 6.970467170741637e-06, "loss": 0.9855, "step": 5631 }, { "epoch": 1.110067214712276, "grad_norm": 2.203125, "learning_rate": 6.969498609355372e-06, "loss": 0.9869, "step": 5632 }, { "epoch": 1.1102671097673722, "grad_norm": 2.1875, "learning_rate": 6.968529960484189e-06, "loss": 0.9859, "step": 5633 }, { "epoch": 1.1104670048224683, "grad_norm": 2.15625, "learning_rate": 6.967561224171114e-06, "loss": 0.9741, "step": 5634 }, { "epoch": 1.1106668998775642, "grad_norm": 2.203125, "learning_rate": 6.96659240045918e-06, "loss": 0.8432, "step": 5635 }, { "epoch": 1.1108667949326603, "grad_norm": 2.171875, "learning_rate": 6.965623489391423e-06, "loss": 1.0741, "step": 5636 }, { "epoch": 1.1110666899877564, "grad_norm": 2.09375, "learning_rate": 6.964654491010879e-06, "loss": 0.9718, "step": 5637 }, { "epoch": 1.1112665850428525, "grad_norm": 2.109375, "learning_rate": 6.963685405360594e-06, "loss": 1.0086, "step": 5638 }, { "epoch": 1.1114664800979486, "grad_norm": 2.15625, "learning_rate": 6.962716232483612e-06, "loss": 0.9935, "step": 5639 }, { "epoch": 1.1116663751530447, "grad_norm": 2.046875, "learning_rate": 6.961746972422985e-06, "loss": 0.9189, "step": 5640 }, { "epoch": 1.1118662702081408, "grad_norm": 2.234375, "learning_rate": 6.960777625221765e-06, "loss": 0.8785, "step": 5641 }, { "epoch": 1.1120661652632369, "grad_norm": 2.0625, "learning_rate": 6.959808190923015e-06, "loss": 1.0427, "step": 5642 }, { "epoch": 1.112266060318333, "grad_norm": 2.046875, "learning_rate": 6.958838669569793e-06, "loss": 0.944, "step": 5643 }, { "epoch": 1.1124659553734288, "grad_norm": 2.09375, "learning_rate": 6.957869061205168e-06, "loss": 0.9901, "step": 5644 }, { "epoch": 1.112665850428525, "grad_norm": 2.234375, "learning_rate": 6.956899365872207e-06, "loss": 0.9979, "step": 5645 }, { "epoch": 1.112865745483621, "grad_norm": 2.265625, "learning_rate": 6.955929583613985e-06, "loss": 1.0601, "step": 5646 }, { "epoch": 1.1130656405387171, "grad_norm": 2.1875, "learning_rate": 6.95495971447358e-06, "loss": 1.0979, "step": 5647 }, { "epoch": 1.1132655355938132, "grad_norm": 2.15625, "learning_rate": 6.953989758494075e-06, "loss": 0.9665, "step": 5648 }, { "epoch": 1.1134654306489093, "grad_norm": 2.203125, "learning_rate": 6.953019715718552e-06, "loss": 0.9663, "step": 5649 }, { "epoch": 1.1136653257040054, "grad_norm": 2.1875, "learning_rate": 6.952049586190102e-06, "loss": 1.0084, "step": 5650 }, { "epoch": 1.1138652207591015, "grad_norm": 2.140625, "learning_rate": 6.951079369951817e-06, "loss": 0.9027, "step": 5651 }, { "epoch": 1.1140651158141976, "grad_norm": 2.15625, "learning_rate": 6.950109067046797e-06, "loss": 0.9388, "step": 5652 }, { "epoch": 1.1142650108692935, "grad_norm": 2.203125, "learning_rate": 6.949138677518139e-06, "loss": 1.0136, "step": 5653 }, { "epoch": 1.1144649059243896, "grad_norm": 2.125, "learning_rate": 6.948168201408949e-06, "loss": 0.9745, "step": 5654 }, { "epoch": 1.1146648009794857, "grad_norm": 2.0625, "learning_rate": 6.947197638762335e-06, "loss": 0.9844, "step": 5655 }, { "epoch": 1.1148646960345818, "grad_norm": 2.046875, "learning_rate": 6.946226989621412e-06, "loss": 0.9096, "step": 5656 }, { "epoch": 1.115064591089678, "grad_norm": 2.046875, "learning_rate": 6.945256254029292e-06, "loss": 1.0328, "step": 5657 }, { "epoch": 1.115264486144774, "grad_norm": 2.171875, "learning_rate": 6.944285432029098e-06, "loss": 0.9901, "step": 5658 }, { "epoch": 1.1154643811998701, "grad_norm": 2.171875, "learning_rate": 6.943314523663953e-06, "loss": 0.9223, "step": 5659 }, { "epoch": 1.1156642762549662, "grad_norm": 2.203125, "learning_rate": 6.942343528976984e-06, "loss": 0.9988, "step": 5660 }, { "epoch": 1.1158641713100623, "grad_norm": 2.109375, "learning_rate": 6.9413724480113224e-06, "loss": 0.9521, "step": 5661 }, { "epoch": 1.1160640663651582, "grad_norm": 2.15625, "learning_rate": 6.940401280810105e-06, "loss": 0.9889, "step": 5662 }, { "epoch": 1.1162639614202543, "grad_norm": 2.109375, "learning_rate": 6.939430027416468e-06, "loss": 1.0112, "step": 5663 }, { "epoch": 1.1164638564753504, "grad_norm": 2.265625, "learning_rate": 6.938458687873558e-06, "loss": 1.0181, "step": 5664 }, { "epoch": 1.1166637515304465, "grad_norm": 2.1875, "learning_rate": 6.93748726222452e-06, "loss": 0.9091, "step": 5665 }, { "epoch": 1.1168636465855426, "grad_norm": 2.046875, "learning_rate": 6.936515750512505e-06, "loss": 0.9331, "step": 5666 }, { "epoch": 1.1170635416406387, "grad_norm": 2.25, "learning_rate": 6.935544152780666e-06, "loss": 1.0873, "step": 5667 }, { "epoch": 1.1172634366957348, "grad_norm": 2.296875, "learning_rate": 6.934572469072163e-06, "loss": 0.8997, "step": 5668 }, { "epoch": 1.117463331750831, "grad_norm": 2.140625, "learning_rate": 6.933600699430157e-06, "loss": 1.0442, "step": 5669 }, { "epoch": 1.1176632268059268, "grad_norm": 2.203125, "learning_rate": 6.932628843897816e-06, "loss": 0.9619, "step": 5670 }, { "epoch": 1.1178631218610229, "grad_norm": 2.125, "learning_rate": 6.931656902518307e-06, "loss": 0.9943, "step": 5671 }, { "epoch": 1.118063016916119, "grad_norm": 2.109375, "learning_rate": 6.930684875334806e-06, "loss": 0.9998, "step": 5672 }, { "epoch": 1.118262911971215, "grad_norm": 2.109375, "learning_rate": 6.929712762390487e-06, "loss": 1.0107, "step": 5673 }, { "epoch": 1.1184628070263112, "grad_norm": 2.0625, "learning_rate": 6.928740563728533e-06, "loss": 1.0266, "step": 5674 }, { "epoch": 1.1186627020814073, "grad_norm": 2.140625, "learning_rate": 6.927768279392132e-06, "loss": 0.9991, "step": 5675 }, { "epoch": 1.1188625971365034, "grad_norm": 2.1875, "learning_rate": 6.926795909424468e-06, "loss": 1.0663, "step": 5676 }, { "epoch": 1.1190624921915995, "grad_norm": 2.109375, "learning_rate": 6.9258234538687366e-06, "loss": 0.9595, "step": 5677 }, { "epoch": 1.1192623872466956, "grad_norm": 2.078125, "learning_rate": 6.924850912768133e-06, "loss": 0.9147, "step": 5678 }, { "epoch": 1.1194622823017915, "grad_norm": 2.15625, "learning_rate": 6.923878286165856e-06, "loss": 0.9804, "step": 5679 }, { "epoch": 1.1196621773568876, "grad_norm": 2.0, "learning_rate": 6.922905574105111e-06, "loss": 0.8939, "step": 5680 }, { "epoch": 1.1198620724119837, "grad_norm": 2.140625, "learning_rate": 6.921932776629107e-06, "loss": 0.9842, "step": 5681 }, { "epoch": 1.1200619674670798, "grad_norm": 2.21875, "learning_rate": 6.920959893781054e-06, "loss": 1.0701, "step": 5682 }, { "epoch": 1.1202618625221759, "grad_norm": 2.015625, "learning_rate": 6.919986925604166e-06, "loss": 0.9869, "step": 5683 }, { "epoch": 1.120461757577272, "grad_norm": 2.390625, "learning_rate": 6.919013872141667e-06, "loss": 1.0938, "step": 5684 }, { "epoch": 1.120661652632368, "grad_norm": 2.125, "learning_rate": 6.918040733436774e-06, "loss": 0.9659, "step": 5685 }, { "epoch": 1.1208615476874642, "grad_norm": 2.09375, "learning_rate": 6.917067509532718e-06, "loss": 0.9582, "step": 5686 }, { "epoch": 1.1210614427425603, "grad_norm": 2.0625, "learning_rate": 6.916094200472727e-06, "loss": 0.9951, "step": 5687 }, { "epoch": 1.1212613377976561, "grad_norm": 2.1875, "learning_rate": 6.915120806300036e-06, "loss": 0.9728, "step": 5688 }, { "epoch": 1.1214612328527522, "grad_norm": 2.15625, "learning_rate": 6.914147327057885e-06, "loss": 0.9721, "step": 5689 }, { "epoch": 1.1216611279078483, "grad_norm": 2.15625, "learning_rate": 6.913173762789515e-06, "loss": 0.9446, "step": 5690 }, { "epoch": 1.1218610229629444, "grad_norm": 2.203125, "learning_rate": 6.912200113538168e-06, "loss": 0.9531, "step": 5691 }, { "epoch": 1.1220609180180405, "grad_norm": 2.09375, "learning_rate": 6.911226379347097e-06, "loss": 0.9618, "step": 5692 }, { "epoch": 1.1222608130731366, "grad_norm": 2.171875, "learning_rate": 6.910252560259555e-06, "loss": 0.9827, "step": 5693 }, { "epoch": 1.1224607081282327, "grad_norm": 2.15625, "learning_rate": 6.9092786563187975e-06, "loss": 1.0449, "step": 5694 }, { "epoch": 1.1226606031833288, "grad_norm": 2.25, "learning_rate": 6.908304667568087e-06, "loss": 1.0932, "step": 5695 }, { "epoch": 1.122860498238425, "grad_norm": 2.109375, "learning_rate": 6.907330594050685e-06, "loss": 0.9571, "step": 5696 }, { "epoch": 1.1230603932935208, "grad_norm": 2.265625, "learning_rate": 6.9063564358098636e-06, "loss": 1.0944, "step": 5697 }, { "epoch": 1.123260288348617, "grad_norm": 2.25, "learning_rate": 6.905382192888893e-06, "loss": 1.1063, "step": 5698 }, { "epoch": 1.123460183403713, "grad_norm": 2.25, "learning_rate": 6.904407865331048e-06, "loss": 0.8986, "step": 5699 }, { "epoch": 1.123660078458809, "grad_norm": 2.09375, "learning_rate": 6.903433453179609e-06, "loss": 1.0043, "step": 5700 }, { "epoch": 1.1238599735139052, "grad_norm": 2.0625, "learning_rate": 6.90245895647786e-06, "loss": 0.9294, "step": 5701 }, { "epoch": 1.1240598685690013, "grad_norm": 2.140625, "learning_rate": 6.901484375269086e-06, "loss": 0.9703, "step": 5702 }, { "epoch": 1.1242597636240974, "grad_norm": 2.109375, "learning_rate": 6.900509709596581e-06, "loss": 0.9744, "step": 5703 }, { "epoch": 1.1244596586791935, "grad_norm": 2.375, "learning_rate": 6.8995349595036365e-06, "loss": 0.9381, "step": 5704 }, { "epoch": 1.1246595537342894, "grad_norm": 2.109375, "learning_rate": 6.898560125033552e-06, "loss": 0.9513, "step": 5705 }, { "epoch": 1.1248594487893855, "grad_norm": 2.125, "learning_rate": 6.897585206229631e-06, "loss": 0.935, "step": 5706 }, { "epoch": 1.1250593438444816, "grad_norm": 2.046875, "learning_rate": 6.896610203135176e-06, "loss": 0.9522, "step": 5707 }, { "epoch": 1.1252592388995777, "grad_norm": 2.203125, "learning_rate": 6.8956351157935e-06, "loss": 1.0196, "step": 5708 }, { "epoch": 1.1254591339546738, "grad_norm": 2.078125, "learning_rate": 6.894659944247914e-06, "loss": 0.9577, "step": 5709 }, { "epoch": 1.1256590290097699, "grad_norm": 2.171875, "learning_rate": 6.8936846885417344e-06, "loss": 0.971, "step": 5710 }, { "epoch": 1.125858924064866, "grad_norm": 2.203125, "learning_rate": 6.892709348718283e-06, "loss": 0.9899, "step": 5711 }, { "epoch": 1.126058819119962, "grad_norm": 2.078125, "learning_rate": 6.891733924820887e-06, "loss": 0.9589, "step": 5712 }, { "epoch": 1.1262587141750582, "grad_norm": 2.234375, "learning_rate": 6.89075841689287e-06, "loss": 0.9113, "step": 5713 }, { "epoch": 1.1264586092301543, "grad_norm": 2.21875, "learning_rate": 6.889782824977566e-06, "loss": 0.9732, "step": 5714 }, { "epoch": 1.1266585042852502, "grad_norm": 2.109375, "learning_rate": 6.8888071491183114e-06, "loss": 0.941, "step": 5715 }, { "epoch": 1.1268583993403463, "grad_norm": 2.09375, "learning_rate": 6.887831389358445e-06, "loss": 0.946, "step": 5716 }, { "epoch": 1.1270582943954424, "grad_norm": 2.140625, "learning_rate": 6.8868555457413115e-06, "loss": 1.0431, "step": 5717 }, { "epoch": 1.1272581894505385, "grad_norm": 2.1875, "learning_rate": 6.885879618310253e-06, "loss": 0.9591, "step": 5718 }, { "epoch": 1.1274580845056346, "grad_norm": 2.03125, "learning_rate": 6.884903607108624e-06, "loss": 0.9041, "step": 5719 }, { "epoch": 1.1276579795607307, "grad_norm": 2.015625, "learning_rate": 6.88392751217978e-06, "loss": 1.0078, "step": 5720 }, { "epoch": 1.1278578746158268, "grad_norm": 2.265625, "learning_rate": 6.882951333567076e-06, "loss": 1.0244, "step": 5721 }, { "epoch": 1.1280577696709229, "grad_norm": 2.125, "learning_rate": 6.881975071313876e-06, "loss": 0.8964, "step": 5722 }, { "epoch": 1.1282576647260187, "grad_norm": 2.15625, "learning_rate": 6.880998725463543e-06, "loss": 0.9442, "step": 5723 }, { "epoch": 1.1284575597811148, "grad_norm": 2.21875, "learning_rate": 6.880022296059448e-06, "loss": 1.0363, "step": 5724 }, { "epoch": 1.128657454836211, "grad_norm": 2.234375, "learning_rate": 6.879045783144962e-06, "loss": 0.9913, "step": 5725 }, { "epoch": 1.128857349891307, "grad_norm": 2.15625, "learning_rate": 6.878069186763466e-06, "loss": 1.0463, "step": 5726 }, { "epoch": 1.1290572449464031, "grad_norm": 2.0625, "learning_rate": 6.877092506958334e-06, "loss": 0.9575, "step": 5727 }, { "epoch": 1.1292571400014992, "grad_norm": 2.0625, "learning_rate": 6.876115743772954e-06, "loss": 0.9106, "step": 5728 }, { "epoch": 1.1294570350565953, "grad_norm": 2.09375, "learning_rate": 6.8751388972507146e-06, "loss": 0.936, "step": 5729 }, { "epoch": 1.1296569301116914, "grad_norm": 2.140625, "learning_rate": 6.874161967435005e-06, "loss": 1.0641, "step": 5730 }, { "epoch": 1.1298568251667875, "grad_norm": 2.1875, "learning_rate": 6.87318495436922e-06, "loss": 0.9322, "step": 5731 }, { "epoch": 1.1300567202218834, "grad_norm": 2.078125, "learning_rate": 6.87220785809676e-06, "loss": 0.9547, "step": 5732 }, { "epoch": 1.1302566152769795, "grad_norm": 2.203125, "learning_rate": 6.871230678661027e-06, "loss": 1.0251, "step": 5733 }, { "epoch": 1.1304565103320756, "grad_norm": 2.125, "learning_rate": 6.870253416105428e-06, "loss": 0.9616, "step": 5734 }, { "epoch": 1.1306564053871717, "grad_norm": 2.1875, "learning_rate": 6.8692760704733705e-06, "loss": 0.9055, "step": 5735 }, { "epoch": 1.1308563004422678, "grad_norm": 2.21875, "learning_rate": 6.868298641808271e-06, "loss": 1.0096, "step": 5736 }, { "epoch": 1.131056195497364, "grad_norm": 2.1875, "learning_rate": 6.867321130153545e-06, "loss": 0.9167, "step": 5737 }, { "epoch": 1.13125609055246, "grad_norm": 2.0625, "learning_rate": 6.866343535552614e-06, "loss": 0.9472, "step": 5738 }, { "epoch": 1.1314559856075561, "grad_norm": 2.0625, "learning_rate": 6.865365858048902e-06, "loss": 0.9423, "step": 5739 }, { "epoch": 1.131655880662652, "grad_norm": 2.1875, "learning_rate": 6.864388097685838e-06, "loss": 1.0048, "step": 5740 }, { "epoch": 1.131855775717748, "grad_norm": 2.125, "learning_rate": 6.863410254506853e-06, "loss": 0.8936, "step": 5741 }, { "epoch": 1.1320556707728442, "grad_norm": 2.1875, "learning_rate": 6.862432328555384e-06, "loss": 0.9888, "step": 5742 }, { "epoch": 1.1322555658279403, "grad_norm": 2.15625, "learning_rate": 6.861454319874871e-06, "loss": 1.0313, "step": 5743 }, { "epoch": 1.1324554608830364, "grad_norm": 2.140625, "learning_rate": 6.860476228508755e-06, "loss": 1.0116, "step": 5744 }, { "epoch": 1.1326553559381325, "grad_norm": 2.078125, "learning_rate": 6.859498054500482e-06, "loss": 1.0209, "step": 5745 }, { "epoch": 1.1328552509932286, "grad_norm": 2.03125, "learning_rate": 6.858519797893507e-06, "loss": 1.0137, "step": 5746 }, { "epoch": 1.1330551460483247, "grad_norm": 2.09375, "learning_rate": 6.8575414587312785e-06, "loss": 0.9231, "step": 5747 }, { "epoch": 1.1332550411034208, "grad_norm": 2.15625, "learning_rate": 6.856563037057259e-06, "loss": 0.9772, "step": 5748 }, { "epoch": 1.133454936158517, "grad_norm": 2.0625, "learning_rate": 6.855584532914906e-06, "loss": 0.9557, "step": 5749 }, { "epoch": 1.1336548312136128, "grad_norm": 2.140625, "learning_rate": 6.8546059463476864e-06, "loss": 0.9826, "step": 5750 }, { "epoch": 1.1338547262687089, "grad_norm": 2.171875, "learning_rate": 6.853627277399071e-06, "loss": 0.9502, "step": 5751 }, { "epoch": 1.134054621323805, "grad_norm": 2.09375, "learning_rate": 6.852648526112529e-06, "loss": 1.0073, "step": 5752 }, { "epoch": 1.134254516378901, "grad_norm": 2.3125, "learning_rate": 6.851669692531535e-06, "loss": 1.0135, "step": 5753 }, { "epoch": 1.1344544114339972, "grad_norm": 2.078125, "learning_rate": 6.850690776699574e-06, "loss": 0.986, "step": 5754 }, { "epoch": 1.1346543064890933, "grad_norm": 2.0625, "learning_rate": 6.849711778660124e-06, "loss": 1.0102, "step": 5755 }, { "epoch": 1.1348542015441894, "grad_norm": 2.015625, "learning_rate": 6.848732698456675e-06, "loss": 1.0157, "step": 5756 }, { "epoch": 1.1350540965992855, "grad_norm": 2.125, "learning_rate": 6.847753536132717e-06, "loss": 0.9198, "step": 5757 }, { "epoch": 1.1352539916543813, "grad_norm": 2.390625, "learning_rate": 6.846774291731744e-06, "loss": 1.0121, "step": 5758 }, { "epoch": 1.1354538867094774, "grad_norm": 1.953125, "learning_rate": 6.845794965297254e-06, "loss": 0.8884, "step": 5759 }, { "epoch": 1.1356537817645735, "grad_norm": 2.171875, "learning_rate": 6.844815556872751e-06, "loss": 0.9572, "step": 5760 }, { "epoch": 1.1358536768196696, "grad_norm": 1.984375, "learning_rate": 6.8438360665017355e-06, "loss": 0.8053, "step": 5761 }, { "epoch": 1.1360535718747657, "grad_norm": 2.09375, "learning_rate": 6.842856494227721e-06, "loss": 1.0477, "step": 5762 }, { "epoch": 1.1362534669298618, "grad_norm": 2.15625, "learning_rate": 6.841876840094216e-06, "loss": 0.9417, "step": 5763 }, { "epoch": 1.136453361984958, "grad_norm": 2.140625, "learning_rate": 6.840897104144739e-06, "loss": 0.9415, "step": 5764 }, { "epoch": 1.136653257040054, "grad_norm": 2.078125, "learning_rate": 6.839917286422811e-06, "loss": 0.9455, "step": 5765 }, { "epoch": 1.1368531520951501, "grad_norm": 2.265625, "learning_rate": 6.838937386971951e-06, "loss": 1.1089, "step": 5766 }, { "epoch": 1.1370530471502462, "grad_norm": 2.109375, "learning_rate": 6.837957405835689e-06, "loss": 0.9556, "step": 5767 }, { "epoch": 1.1372529422053421, "grad_norm": 1.984375, "learning_rate": 6.836977343057558e-06, "loss": 0.9827, "step": 5768 }, { "epoch": 1.1374528372604382, "grad_norm": 2.0625, "learning_rate": 6.835997198681087e-06, "loss": 0.9803, "step": 5769 }, { "epoch": 1.1376527323155343, "grad_norm": 2.09375, "learning_rate": 6.835016972749817e-06, "loss": 1.0608, "step": 5770 }, { "epoch": 1.1378526273706304, "grad_norm": 2.09375, "learning_rate": 6.83403666530729e-06, "loss": 0.9443, "step": 5771 }, { "epoch": 1.1380525224257265, "grad_norm": 2.109375, "learning_rate": 6.8330562763970484e-06, "loss": 0.9665, "step": 5772 }, { "epoch": 1.1382524174808226, "grad_norm": 2.09375, "learning_rate": 6.832075806062644e-06, "loss": 0.9547, "step": 5773 }, { "epoch": 1.1384523125359187, "grad_norm": 2.203125, "learning_rate": 6.831095254347629e-06, "loss": 1.0589, "step": 5774 }, { "epoch": 1.1386522075910146, "grad_norm": 2.078125, "learning_rate": 6.830114621295556e-06, "loss": 1.0209, "step": 5775 }, { "epoch": 1.1388521026461107, "grad_norm": 2.1875, "learning_rate": 6.829133906949988e-06, "loss": 0.9995, "step": 5776 }, { "epoch": 1.1390519977012068, "grad_norm": 2.21875, "learning_rate": 6.8281531113544875e-06, "loss": 0.9766, "step": 5777 }, { "epoch": 1.139251892756303, "grad_norm": 2.140625, "learning_rate": 6.827172234552621e-06, "loss": 0.9472, "step": 5778 }, { "epoch": 1.139451787811399, "grad_norm": 2.328125, "learning_rate": 6.826191276587959e-06, "loss": 1.0093, "step": 5779 }, { "epoch": 1.139651682866495, "grad_norm": 2.15625, "learning_rate": 6.825210237504075e-06, "loss": 0.9545, "step": 5780 }, { "epoch": 1.1398515779215912, "grad_norm": 2.046875, "learning_rate": 6.824229117344547e-06, "loss": 0.9503, "step": 5781 }, { "epoch": 1.1400514729766873, "grad_norm": 2.21875, "learning_rate": 6.823247916152957e-06, "loss": 0.988, "step": 5782 }, { "epoch": 1.1402513680317834, "grad_norm": 2.203125, "learning_rate": 6.822266633972891e-06, "loss": 0.9702, "step": 5783 }, { "epoch": 1.1404512630868795, "grad_norm": 2.015625, "learning_rate": 6.821285270847934e-06, "loss": 0.8738, "step": 5784 }, { "epoch": 1.1406511581419754, "grad_norm": 2.09375, "learning_rate": 6.82030382682168e-06, "loss": 0.8834, "step": 5785 }, { "epoch": 1.1408510531970715, "grad_norm": 2.125, "learning_rate": 6.819322301937724e-06, "loss": 0.9622, "step": 5786 }, { "epoch": 1.1410509482521676, "grad_norm": 2.1875, "learning_rate": 6.818340696239666e-06, "loss": 0.9004, "step": 5787 }, { "epoch": 1.1412508433072637, "grad_norm": 2.1875, "learning_rate": 6.817359009771109e-06, "loss": 0.9325, "step": 5788 }, { "epoch": 1.1414507383623598, "grad_norm": 2.0625, "learning_rate": 6.816377242575658e-06, "loss": 0.973, "step": 5789 }, { "epoch": 1.1416506334174559, "grad_norm": 2.21875, "learning_rate": 6.815395394696924e-06, "loss": 0.934, "step": 5790 }, { "epoch": 1.141850528472552, "grad_norm": 2.203125, "learning_rate": 6.814413466178521e-06, "loss": 1.0456, "step": 5791 }, { "epoch": 1.142050423527648, "grad_norm": 2.171875, "learning_rate": 6.813431457064064e-06, "loss": 1.0741, "step": 5792 }, { "epoch": 1.142250318582744, "grad_norm": 2.171875, "learning_rate": 6.812449367397178e-06, "loss": 0.9353, "step": 5793 }, { "epoch": 1.14245021363784, "grad_norm": 2.09375, "learning_rate": 6.811467197221483e-06, "loss": 0.9993, "step": 5794 }, { "epoch": 1.1426501086929362, "grad_norm": 2.0625, "learning_rate": 6.8104849465806086e-06, "loss": 0.8419, "step": 5795 }, { "epoch": 1.1428500037480323, "grad_norm": 2.03125, "learning_rate": 6.809502615518187e-06, "loss": 0.8346, "step": 5796 }, { "epoch": 1.1430498988031284, "grad_norm": 2.1875, "learning_rate": 6.808520204077852e-06, "loss": 1.0155, "step": 5797 }, { "epoch": 1.1432497938582245, "grad_norm": 2.21875, "learning_rate": 6.807537712303243e-06, "loss": 1.03, "step": 5798 }, { "epoch": 1.1434496889133205, "grad_norm": 2.1875, "learning_rate": 6.806555140238002e-06, "loss": 1.0012, "step": 5799 }, { "epoch": 1.1436495839684166, "grad_norm": 2.125, "learning_rate": 6.805572487925774e-06, "loss": 0.9254, "step": 5800 }, { "epoch": 1.1438494790235127, "grad_norm": 2.265625, "learning_rate": 6.804589755410209e-06, "loss": 1.0384, "step": 5801 }, { "epoch": 1.1440493740786088, "grad_norm": 2.203125, "learning_rate": 6.803606942734961e-06, "loss": 0.954, "step": 5802 }, { "epoch": 1.1442492691337047, "grad_norm": 2.25, "learning_rate": 6.802624049943684e-06, "loss": 1.0369, "step": 5803 }, { "epoch": 1.1444491641888008, "grad_norm": 2.109375, "learning_rate": 6.801641077080039e-06, "loss": 0.9482, "step": 5804 }, { "epoch": 1.144649059243897, "grad_norm": 2.203125, "learning_rate": 6.80065802418769e-06, "loss": 1.0058, "step": 5805 }, { "epoch": 1.144848954298993, "grad_norm": 2.21875, "learning_rate": 6.799674891310304e-06, "loss": 0.9918, "step": 5806 }, { "epoch": 1.1450488493540891, "grad_norm": 2.28125, "learning_rate": 6.798691678491552e-06, "loss": 0.948, "step": 5807 }, { "epoch": 1.1452487444091852, "grad_norm": 2.03125, "learning_rate": 6.797708385775107e-06, "loss": 0.8365, "step": 5808 }, { "epoch": 1.1454486394642813, "grad_norm": 2.15625, "learning_rate": 6.796725013204648e-06, "loss": 0.9136, "step": 5809 }, { "epoch": 1.1456485345193772, "grad_norm": 2.171875, "learning_rate": 6.795741560823856e-06, "loss": 1.0892, "step": 5810 }, { "epoch": 1.1458484295744733, "grad_norm": 2.125, "learning_rate": 6.794758028676415e-06, "loss": 0.9594, "step": 5811 }, { "epoch": 1.1460483246295694, "grad_norm": 2.140625, "learning_rate": 6.793774416806014e-06, "loss": 1.0519, "step": 5812 }, { "epoch": 1.1462482196846655, "grad_norm": 2.09375, "learning_rate": 6.792790725256347e-06, "loss": 0.9291, "step": 5813 }, { "epoch": 1.1464481147397616, "grad_norm": 2.203125, "learning_rate": 6.791806954071105e-06, "loss": 0.9338, "step": 5814 }, { "epoch": 1.1466480097948577, "grad_norm": 2.0625, "learning_rate": 6.79082310329399e-06, "loss": 0.9201, "step": 5815 }, { "epoch": 1.1468479048499538, "grad_norm": 2.0625, "learning_rate": 6.789839172968705e-06, "loss": 0.9484, "step": 5816 }, { "epoch": 1.14704779990505, "grad_norm": 2.0625, "learning_rate": 6.7888551631389545e-06, "loss": 0.9763, "step": 5817 }, { "epoch": 1.147247694960146, "grad_norm": 2.109375, "learning_rate": 6.787871073848448e-06, "loss": 1.0782, "step": 5818 }, { "epoch": 1.147447590015242, "grad_norm": 2.109375, "learning_rate": 6.7868869051409e-06, "loss": 0.9979, "step": 5819 }, { "epoch": 1.147647485070338, "grad_norm": 2.09375, "learning_rate": 6.785902657060026e-06, "loss": 0.9832, "step": 5820 }, { "epoch": 1.147847380125434, "grad_norm": 2.109375, "learning_rate": 6.784918329649548e-06, "loss": 0.965, "step": 5821 }, { "epoch": 1.1480472751805302, "grad_norm": 2.09375, "learning_rate": 6.783933922953188e-06, "loss": 0.9862, "step": 5822 }, { "epoch": 1.1482471702356263, "grad_norm": 2.078125, "learning_rate": 6.782949437014672e-06, "loss": 0.9793, "step": 5823 }, { "epoch": 1.1484470652907224, "grad_norm": 2.0, "learning_rate": 6.781964871877735e-06, "loss": 0.9625, "step": 5824 }, { "epoch": 1.1486469603458185, "grad_norm": 2.046875, "learning_rate": 6.780980227586107e-06, "loss": 0.9612, "step": 5825 }, { "epoch": 1.1488468554009146, "grad_norm": 2.015625, "learning_rate": 6.7799955041835276e-06, "loss": 0.965, "step": 5826 }, { "epoch": 1.1490467504560107, "grad_norm": 2.21875, "learning_rate": 6.77901070171374e-06, "loss": 1.0603, "step": 5827 }, { "epoch": 1.1492466455111066, "grad_norm": 2.15625, "learning_rate": 6.778025820220484e-06, "loss": 1.0257, "step": 5828 }, { "epoch": 1.1494465405662027, "grad_norm": 2.078125, "learning_rate": 6.777040859747512e-06, "loss": 0.9683, "step": 5829 }, { "epoch": 1.1496464356212988, "grad_norm": 2.234375, "learning_rate": 6.7760558203385765e-06, "loss": 1.0206, "step": 5830 }, { "epoch": 1.1498463306763949, "grad_norm": 2.28125, "learning_rate": 6.77507070203743e-06, "loss": 0.9786, "step": 5831 }, { "epoch": 1.150046225731491, "grad_norm": 2.140625, "learning_rate": 6.774085504887832e-06, "loss": 1.0074, "step": 5832 }, { "epoch": 1.150246120786587, "grad_norm": 2.140625, "learning_rate": 6.7731002289335455e-06, "loss": 1.0272, "step": 5833 }, { "epoch": 1.1504460158416832, "grad_norm": 2.015625, "learning_rate": 6.772114874218337e-06, "loss": 0.959, "step": 5834 }, { "epoch": 1.1506459108967793, "grad_norm": 2.3125, "learning_rate": 6.771129440785973e-06, "loss": 1.0305, "step": 5835 }, { "epoch": 1.1508458059518754, "grad_norm": 2.25, "learning_rate": 6.770143928680231e-06, "loss": 1.1036, "step": 5836 }, { "epoch": 1.1510457010069715, "grad_norm": 2.15625, "learning_rate": 6.769158337944883e-06, "loss": 0.983, "step": 5837 }, { "epoch": 1.1512455960620673, "grad_norm": 2.1875, "learning_rate": 6.768172668623711e-06, "loss": 1.0334, "step": 5838 }, { "epoch": 1.1514454911171634, "grad_norm": 2.09375, "learning_rate": 6.767186920760499e-06, "loss": 0.9598, "step": 5839 }, { "epoch": 1.1516453861722595, "grad_norm": 2.28125, "learning_rate": 6.766201094399031e-06, "loss": 1.0598, "step": 5840 }, { "epoch": 1.1518452812273556, "grad_norm": 2.125, "learning_rate": 6.765215189583101e-06, "loss": 0.887, "step": 5841 }, { "epoch": 1.1520451762824517, "grad_norm": 2.109375, "learning_rate": 6.764229206356498e-06, "loss": 0.9367, "step": 5842 }, { "epoch": 1.1522450713375478, "grad_norm": 2.296875, "learning_rate": 6.763243144763024e-06, "loss": 1.0542, "step": 5843 }, { "epoch": 1.152444966392644, "grad_norm": 2.0625, "learning_rate": 6.762257004846479e-06, "loss": 1.0015, "step": 5844 }, { "epoch": 1.15264486144774, "grad_norm": 2.15625, "learning_rate": 6.761270786650664e-06, "loss": 0.9359, "step": 5845 }, { "epoch": 1.152844756502836, "grad_norm": 2.203125, "learning_rate": 6.76028449021939e-06, "loss": 1.0411, "step": 5846 }, { "epoch": 1.153044651557932, "grad_norm": 2.109375, "learning_rate": 6.759298115596467e-06, "loss": 0.9873, "step": 5847 }, { "epoch": 1.153244546613028, "grad_norm": 2.1875, "learning_rate": 6.7583116628257075e-06, "loss": 1.012, "step": 5848 }, { "epoch": 1.1534444416681242, "grad_norm": 2.09375, "learning_rate": 6.757325131950934e-06, "loss": 0.9291, "step": 5849 }, { "epoch": 1.1536443367232203, "grad_norm": 2.203125, "learning_rate": 6.756338523015965e-06, "loss": 1.0056, "step": 5850 }, { "epoch": 1.1538442317783164, "grad_norm": 2.015625, "learning_rate": 6.755351836064625e-06, "loss": 0.8476, "step": 5851 }, { "epoch": 1.1540441268334125, "grad_norm": 2.109375, "learning_rate": 6.754365071140747e-06, "loss": 0.9314, "step": 5852 }, { "epoch": 1.1542440218885086, "grad_norm": 2.125, "learning_rate": 6.753378228288158e-06, "loss": 1.0368, "step": 5853 }, { "epoch": 1.1544439169436047, "grad_norm": 2.21875, "learning_rate": 6.752391307550694e-06, "loss": 1.0438, "step": 5854 }, { "epoch": 1.1546438119987006, "grad_norm": 2.1875, "learning_rate": 6.751404308972198e-06, "loss": 1.0567, "step": 5855 }, { "epoch": 1.1548437070537967, "grad_norm": 2.109375, "learning_rate": 6.750417232596509e-06, "loss": 0.9272, "step": 5856 }, { "epoch": 1.1550436021088928, "grad_norm": 2.15625, "learning_rate": 6.749430078467472e-06, "loss": 1.0246, "step": 5857 }, { "epoch": 1.1552434971639889, "grad_norm": 2.171875, "learning_rate": 6.74844284662894e-06, "loss": 0.9252, "step": 5858 }, { "epoch": 1.155443392219085, "grad_norm": 2.125, "learning_rate": 6.7474555371247605e-06, "loss": 0.9725, "step": 5859 }, { "epoch": 1.155643287274181, "grad_norm": 2.015625, "learning_rate": 6.746468149998796e-06, "loss": 0.9515, "step": 5860 }, { "epoch": 1.1558431823292772, "grad_norm": 2.078125, "learning_rate": 6.7454806852949015e-06, "loss": 0.9205, "step": 5861 }, { "epoch": 1.1560430773843733, "grad_norm": 2.09375, "learning_rate": 6.744493143056941e-06, "loss": 0.9725, "step": 5862 }, { "epoch": 1.1562429724394692, "grad_norm": 2.0625, "learning_rate": 6.743505523328781e-06, "loss": 0.8935, "step": 5863 }, { "epoch": 1.1564428674945653, "grad_norm": 2.15625, "learning_rate": 6.742517826154293e-06, "loss": 1.0846, "step": 5864 }, { "epoch": 1.1566427625496614, "grad_norm": 2.03125, "learning_rate": 6.741530051577347e-06, "loss": 0.9303, "step": 5865 }, { "epoch": 1.1568426576047575, "grad_norm": 2.140625, "learning_rate": 6.740542199641824e-06, "loss": 0.9525, "step": 5866 }, { "epoch": 1.1570425526598536, "grad_norm": 2.046875, "learning_rate": 6.739554270391603e-06, "loss": 0.964, "step": 5867 }, { "epoch": 1.1572424477149497, "grad_norm": 1.984375, "learning_rate": 6.738566263870566e-06, "loss": 0.8848, "step": 5868 }, { "epoch": 1.1574423427700458, "grad_norm": 2.0625, "learning_rate": 6.737578180122603e-06, "loss": 0.9419, "step": 5869 }, { "epoch": 1.1576422378251419, "grad_norm": 2.125, "learning_rate": 6.7365900191916e-06, "loss": 0.968, "step": 5870 }, { "epoch": 1.157842132880238, "grad_norm": 2.234375, "learning_rate": 6.735601781121454e-06, "loss": 0.9856, "step": 5871 }, { "epoch": 1.158042027935334, "grad_norm": 2.046875, "learning_rate": 6.734613465956065e-06, "loss": 0.953, "step": 5872 }, { "epoch": 1.15824192299043, "grad_norm": 2.234375, "learning_rate": 6.733625073739329e-06, "loss": 0.9394, "step": 5873 }, { "epoch": 1.158441818045526, "grad_norm": 2.25, "learning_rate": 6.732636604515153e-06, "loss": 0.9459, "step": 5874 }, { "epoch": 1.1586417131006221, "grad_norm": 1.9921875, "learning_rate": 6.731648058327445e-06, "loss": 0.8777, "step": 5875 }, { "epoch": 1.1588416081557182, "grad_norm": 2.15625, "learning_rate": 6.7306594352201135e-06, "loss": 0.9294, "step": 5876 }, { "epoch": 1.1590415032108143, "grad_norm": 2.125, "learning_rate": 6.729670735237075e-06, "loss": 1.0541, "step": 5877 }, { "epoch": 1.1592413982659104, "grad_norm": 2.1875, "learning_rate": 6.728681958422248e-06, "loss": 0.9905, "step": 5878 }, { "epoch": 1.1594412933210065, "grad_norm": 2.28125, "learning_rate": 6.727693104819553e-06, "loss": 0.9105, "step": 5879 }, { "epoch": 1.1596411883761026, "grad_norm": 2.109375, "learning_rate": 6.726704174472913e-06, "loss": 1.0022, "step": 5880 }, { "epoch": 1.1598410834311985, "grad_norm": 2.171875, "learning_rate": 6.725715167426261e-06, "loss": 1.0047, "step": 5881 }, { "epoch": 1.1600409784862946, "grad_norm": 2.125, "learning_rate": 6.724726083723523e-06, "loss": 0.9738, "step": 5882 }, { "epoch": 1.1602408735413907, "grad_norm": 2.125, "learning_rate": 6.723736923408638e-06, "loss": 0.9422, "step": 5883 }, { "epoch": 1.1604407685964868, "grad_norm": 2.109375, "learning_rate": 6.722747686525543e-06, "loss": 0.9893, "step": 5884 }, { "epoch": 1.160640663651583, "grad_norm": 2.0625, "learning_rate": 6.721758373118178e-06, "loss": 1.0724, "step": 5885 }, { "epoch": 1.160840558706679, "grad_norm": 2.171875, "learning_rate": 6.720768983230492e-06, "loss": 1.0144, "step": 5886 }, { "epoch": 1.1610404537617751, "grad_norm": 2.09375, "learning_rate": 6.7197795169064305e-06, "loss": 0.9422, "step": 5887 }, { "epoch": 1.1612403488168712, "grad_norm": 2.140625, "learning_rate": 6.7187899741899465e-06, "loss": 0.9876, "step": 5888 }, { "epoch": 1.1614402438719673, "grad_norm": 2.09375, "learning_rate": 6.717800355124996e-06, "loss": 0.8915, "step": 5889 }, { "epoch": 1.1616401389270634, "grad_norm": 2.03125, "learning_rate": 6.716810659755537e-06, "loss": 0.8954, "step": 5890 }, { "epoch": 1.1618400339821593, "grad_norm": 2.078125, "learning_rate": 6.715820888125532e-06, "loss": 0.9359, "step": 5891 }, { "epoch": 1.1620399290372554, "grad_norm": 2.03125, "learning_rate": 6.714831040278946e-06, "loss": 0.9886, "step": 5892 }, { "epoch": 1.1622398240923515, "grad_norm": 2.109375, "learning_rate": 6.713841116259749e-06, "loss": 0.9766, "step": 5893 }, { "epoch": 1.1624397191474476, "grad_norm": 2.109375, "learning_rate": 6.7128511161119115e-06, "loss": 0.9345, "step": 5894 }, { "epoch": 1.1626396142025437, "grad_norm": 2.078125, "learning_rate": 6.7118610398794115e-06, "loss": 0.9462, "step": 5895 }, { "epoch": 1.1628395092576398, "grad_norm": 2.171875, "learning_rate": 6.710870887606227e-06, "loss": 0.9552, "step": 5896 }, { "epoch": 1.163039404312736, "grad_norm": 2.109375, "learning_rate": 6.70988065933634e-06, "loss": 0.9285, "step": 5897 }, { "epoch": 1.1632392993678318, "grad_norm": 2.203125, "learning_rate": 6.708890355113736e-06, "loss": 1.0474, "step": 5898 }, { "epoch": 1.1634391944229279, "grad_norm": 1.96875, "learning_rate": 6.707899974982405e-06, "loss": 0.8558, "step": 5899 }, { "epoch": 1.163639089478024, "grad_norm": 2.140625, "learning_rate": 6.706909518986341e-06, "loss": 0.9501, "step": 5900 }, { "epoch": 1.16383898453312, "grad_norm": 2.21875, "learning_rate": 6.705918987169537e-06, "loss": 0.9321, "step": 5901 }, { "epoch": 1.1640388795882162, "grad_norm": 2.109375, "learning_rate": 6.704928379575993e-06, "loss": 0.9636, "step": 5902 }, { "epoch": 1.1642387746433123, "grad_norm": 2.0625, "learning_rate": 6.703937696249715e-06, "loss": 1.0126, "step": 5903 }, { "epoch": 1.1644386696984084, "grad_norm": 2.1875, "learning_rate": 6.7029469372347045e-06, "loss": 1.084, "step": 5904 }, { "epoch": 1.1646385647535045, "grad_norm": 2.0625, "learning_rate": 6.701956102574973e-06, "loss": 0.9991, "step": 5905 }, { "epoch": 1.1648384598086006, "grad_norm": 2.1875, "learning_rate": 6.700965192314536e-06, "loss": 0.9644, "step": 5906 }, { "epoch": 1.1650383548636967, "grad_norm": 2.078125, "learning_rate": 6.699974206497405e-06, "loss": 1.0096, "step": 5907 }, { "epoch": 1.1652382499187925, "grad_norm": 2.125, "learning_rate": 6.6989831451676015e-06, "loss": 0.9761, "step": 5908 }, { "epoch": 1.1654381449738886, "grad_norm": 2.125, "learning_rate": 6.697992008369147e-06, "loss": 1.0231, "step": 5909 }, { "epoch": 1.1656380400289847, "grad_norm": 2.1875, "learning_rate": 6.6970007961460695e-06, "loss": 1.039, "step": 5910 }, { "epoch": 1.1658379350840808, "grad_norm": 2.078125, "learning_rate": 6.6960095085423985e-06, "loss": 0.9953, "step": 5911 }, { "epoch": 1.166037830139177, "grad_norm": 2.15625, "learning_rate": 6.695018145602165e-06, "loss": 1.0316, "step": 5912 }, { "epoch": 1.166237725194273, "grad_norm": 2.0625, "learning_rate": 6.694026707369407e-06, "loss": 1.0413, "step": 5913 }, { "epoch": 1.1664376202493691, "grad_norm": 2.1875, "learning_rate": 6.693035193888164e-06, "loss": 0.9899, "step": 5914 }, { "epoch": 1.1666375153044652, "grad_norm": 2.1875, "learning_rate": 6.692043605202478e-06, "loss": 0.9738, "step": 5915 }, { "epoch": 1.1668374103595611, "grad_norm": 2.140625, "learning_rate": 6.691051941356397e-06, "loss": 0.9412, "step": 5916 }, { "epoch": 1.1670373054146572, "grad_norm": 2.140625, "learning_rate": 6.6900602023939685e-06, "loss": 1.0286, "step": 5917 }, { "epoch": 1.1672372004697533, "grad_norm": 2.15625, "learning_rate": 6.6890683883592455e-06, "loss": 0.9842, "step": 5918 }, { "epoch": 1.1674370955248494, "grad_norm": 2.203125, "learning_rate": 6.688076499296285e-06, "loss": 0.9791, "step": 5919 }, { "epoch": 1.1676369905799455, "grad_norm": 2.1875, "learning_rate": 6.687084535249149e-06, "loss": 0.9836, "step": 5920 }, { "epoch": 1.1678368856350416, "grad_norm": 2.171875, "learning_rate": 6.686092496261896e-06, "loss": 0.9675, "step": 5921 }, { "epoch": 1.1680367806901377, "grad_norm": 2.09375, "learning_rate": 6.685100382378595e-06, "loss": 0.9873, "step": 5922 }, { "epoch": 1.1682366757452338, "grad_norm": 2.125, "learning_rate": 6.684108193643317e-06, "loss": 1.0282, "step": 5923 }, { "epoch": 1.16843657080033, "grad_norm": 2.234375, "learning_rate": 6.68311593010013e-06, "loss": 1.0129, "step": 5924 }, { "epoch": 1.168636465855426, "grad_norm": 2.015625, "learning_rate": 6.682123591793114e-06, "loss": 0.9194, "step": 5925 }, { "epoch": 1.168836360910522, "grad_norm": 2.203125, "learning_rate": 6.681131178766349e-06, "loss": 0.9182, "step": 5926 }, { "epoch": 1.169036255965618, "grad_norm": 2.109375, "learning_rate": 6.680138691063914e-06, "loss": 0.9553, "step": 5927 }, { "epoch": 1.169236151020714, "grad_norm": 2.125, "learning_rate": 6.679146128729901e-06, "loss": 0.9834, "step": 5928 }, { "epoch": 1.1694360460758102, "grad_norm": 2.140625, "learning_rate": 6.678153491808394e-06, "loss": 1.0813, "step": 5929 }, { "epoch": 1.1696359411309063, "grad_norm": 2.171875, "learning_rate": 6.677160780343488e-06, "loss": 1.0055, "step": 5930 }, { "epoch": 1.1698358361860024, "grad_norm": 2.046875, "learning_rate": 6.6761679943792805e-06, "loss": 0.9817, "step": 5931 }, { "epoch": 1.1700357312410985, "grad_norm": 2.0625, "learning_rate": 6.675175133959868e-06, "loss": 0.9821, "step": 5932 }, { "epoch": 1.1702356262961944, "grad_norm": 2.140625, "learning_rate": 6.674182199129356e-06, "loss": 0.8733, "step": 5933 }, { "epoch": 1.1704355213512905, "grad_norm": 2.25, "learning_rate": 6.67318918993185e-06, "loss": 0.9753, "step": 5934 }, { "epoch": 1.1706354164063866, "grad_norm": 2.046875, "learning_rate": 6.6721961064114584e-06, "loss": 0.9783, "step": 5935 }, { "epoch": 1.1708353114614827, "grad_norm": 2.078125, "learning_rate": 6.6712029486122946e-06, "loss": 0.9435, "step": 5936 }, { "epoch": 1.1710352065165788, "grad_norm": 2.03125, "learning_rate": 6.670209716578474e-06, "loss": 0.9447, "step": 5937 }, { "epoch": 1.1712351015716749, "grad_norm": 2.046875, "learning_rate": 6.669216410354118e-06, "loss": 0.9303, "step": 5938 }, { "epoch": 1.171434996626771, "grad_norm": 2.1875, "learning_rate": 6.668223029983345e-06, "loss": 0.9607, "step": 5939 }, { "epoch": 1.171634891681867, "grad_norm": 2.234375, "learning_rate": 6.667229575510284e-06, "loss": 0.9161, "step": 5940 }, { "epoch": 1.1718347867369632, "grad_norm": 2.109375, "learning_rate": 6.666236046979062e-06, "loss": 0.9774, "step": 5941 }, { "epoch": 1.1720346817920593, "grad_norm": 2.046875, "learning_rate": 6.665242444433815e-06, "loss": 0.9084, "step": 5942 }, { "epoch": 1.1722345768471552, "grad_norm": 2.296875, "learning_rate": 6.664248767918675e-06, "loss": 0.9373, "step": 5943 }, { "epoch": 1.1724344719022513, "grad_norm": 2.25, "learning_rate": 6.663255017477783e-06, "loss": 0.9009, "step": 5944 }, { "epoch": 1.1726343669573474, "grad_norm": 2.203125, "learning_rate": 6.662261193155281e-06, "loss": 1.1289, "step": 5945 }, { "epoch": 1.1728342620124435, "grad_norm": 2.15625, "learning_rate": 6.661267294995314e-06, "loss": 1.0036, "step": 5946 }, { "epoch": 1.1730341570675396, "grad_norm": 2.328125, "learning_rate": 6.66027332304203e-06, "loss": 1.0468, "step": 5947 }, { "epoch": 1.1732340521226357, "grad_norm": 2.09375, "learning_rate": 6.659279277339584e-06, "loss": 0.8799, "step": 5948 }, { "epoch": 1.1734339471777318, "grad_norm": 2.1875, "learning_rate": 6.65828515793213e-06, "loss": 0.9727, "step": 5949 }, { "epoch": 1.1736338422328279, "grad_norm": 2.15625, "learning_rate": 6.657290964863825e-06, "loss": 0.9576, "step": 5950 }, { "epoch": 1.1738337372879237, "grad_norm": 2.09375, "learning_rate": 6.656296698178832e-06, "loss": 0.8868, "step": 5951 }, { "epoch": 1.1740336323430198, "grad_norm": 2.0625, "learning_rate": 6.655302357921318e-06, "loss": 0.943, "step": 5952 }, { "epoch": 1.174233527398116, "grad_norm": 2.09375, "learning_rate": 6.65430794413545e-06, "loss": 1.0092, "step": 5953 }, { "epoch": 1.174433422453212, "grad_norm": 2.171875, "learning_rate": 6.6533134568654e-06, "loss": 0.9646, "step": 5954 }, { "epoch": 1.1746333175083081, "grad_norm": 2.25, "learning_rate": 6.652318896155342e-06, "loss": 1.1088, "step": 5955 }, { "epoch": 1.1748332125634042, "grad_norm": 2.125, "learning_rate": 6.651324262049454e-06, "loss": 1.0244, "step": 5956 }, { "epoch": 1.1750331076185003, "grad_norm": 2.203125, "learning_rate": 6.650329554591921e-06, "loss": 1.0009, "step": 5957 }, { "epoch": 1.1752330026735964, "grad_norm": 2.0, "learning_rate": 6.649334773826924e-06, "loss": 0.9352, "step": 5958 }, { "epoch": 1.1754328977286925, "grad_norm": 2.09375, "learning_rate": 6.648339919798654e-06, "loss": 0.9745, "step": 5959 }, { "epoch": 1.1756327927837886, "grad_norm": 2.171875, "learning_rate": 6.647344992551299e-06, "loss": 1.1091, "step": 5960 }, { "epoch": 1.1758326878388845, "grad_norm": 2.171875, "learning_rate": 6.646349992129055e-06, "loss": 0.9725, "step": 5961 }, { "epoch": 1.1760325828939806, "grad_norm": 2.375, "learning_rate": 6.645354918576122e-06, "loss": 0.9178, "step": 5962 }, { "epoch": 1.1762324779490767, "grad_norm": 2.078125, "learning_rate": 6.644359771936699e-06, "loss": 0.9725, "step": 5963 }, { "epoch": 1.1764323730041728, "grad_norm": 2.0625, "learning_rate": 6.643364552254989e-06, "loss": 0.8993, "step": 5964 }, { "epoch": 1.176632268059269, "grad_norm": 2.203125, "learning_rate": 6.642369259575203e-06, "loss": 0.9373, "step": 5965 }, { "epoch": 1.176832163114365, "grad_norm": 2.25, "learning_rate": 6.64137389394155e-06, "loss": 1.0464, "step": 5966 }, { "epoch": 1.177032058169461, "grad_norm": 2.171875, "learning_rate": 6.640378455398242e-06, "loss": 0.9632, "step": 5967 }, { "epoch": 1.1772319532245572, "grad_norm": 2.046875, "learning_rate": 6.6393829439895e-06, "loss": 0.9081, "step": 5968 }, { "epoch": 1.177431848279653, "grad_norm": 2.140625, "learning_rate": 6.6383873597595415e-06, "loss": 1.0095, "step": 5969 }, { "epoch": 1.1776317433347492, "grad_norm": 2.015625, "learning_rate": 6.637391702752591e-06, "loss": 0.9309, "step": 5970 }, { "epoch": 1.1778316383898453, "grad_norm": 2.1875, "learning_rate": 6.636395973012878e-06, "loss": 1.0349, "step": 5971 }, { "epoch": 1.1780315334449414, "grad_norm": 2.328125, "learning_rate": 6.635400170584629e-06, "loss": 1.0354, "step": 5972 }, { "epoch": 1.1782314285000375, "grad_norm": 2.109375, "learning_rate": 6.63440429551208e-06, "loss": 0.9784, "step": 5973 }, { "epoch": 1.1784313235551336, "grad_norm": 2.03125, "learning_rate": 6.633408347839466e-06, "loss": 0.9179, "step": 5974 }, { "epoch": 1.1786312186102297, "grad_norm": 2.046875, "learning_rate": 6.632412327611029e-06, "loss": 0.9423, "step": 5975 }, { "epoch": 1.1788311136653258, "grad_norm": 2.234375, "learning_rate": 6.63141623487101e-06, "loss": 0.9934, "step": 5976 }, { "epoch": 1.1790310087204219, "grad_norm": 2.21875, "learning_rate": 6.6304200696636545e-06, "loss": 1.0466, "step": 5977 }, { "epoch": 1.1792309037755178, "grad_norm": 2.203125, "learning_rate": 6.629423832033215e-06, "loss": 0.9837, "step": 5978 }, { "epoch": 1.1794307988306139, "grad_norm": 2.0625, "learning_rate": 6.6284275220239435e-06, "loss": 0.9193, "step": 5979 }, { "epoch": 1.17963069388571, "grad_norm": 2.21875, "learning_rate": 6.627431139680094e-06, "loss": 0.9951, "step": 5980 }, { "epoch": 1.179830588940806, "grad_norm": 2.0625, "learning_rate": 6.626434685045928e-06, "loss": 0.9343, "step": 5981 }, { "epoch": 1.1800304839959022, "grad_norm": 2.140625, "learning_rate": 6.625438158165707e-06, "loss": 1.1256, "step": 5982 }, { "epoch": 1.1802303790509983, "grad_norm": 2.140625, "learning_rate": 6.624441559083696e-06, "loss": 0.9476, "step": 5983 }, { "epoch": 1.1804302741060944, "grad_norm": 2.140625, "learning_rate": 6.623444887844166e-06, "loss": 1.0104, "step": 5984 }, { "epoch": 1.1806301691611905, "grad_norm": 2.3125, "learning_rate": 6.622448144491387e-06, "loss": 0.926, "step": 5985 }, { "epoch": 1.1808300642162863, "grad_norm": 2.15625, "learning_rate": 6.621451329069634e-06, "loss": 0.9933, "step": 5986 }, { "epoch": 1.1810299592713824, "grad_norm": 2.265625, "learning_rate": 6.6204544416231865e-06, "loss": 0.988, "step": 5987 }, { "epoch": 1.1812298543264785, "grad_norm": 2.046875, "learning_rate": 6.619457482196326e-06, "loss": 0.9556, "step": 5988 }, { "epoch": 1.1814297493815746, "grad_norm": 2.1875, "learning_rate": 6.618460450833335e-06, "loss": 0.9795, "step": 5989 }, { "epoch": 1.1816296444366707, "grad_norm": 2.140625, "learning_rate": 6.617463347578506e-06, "loss": 1.0121, "step": 5990 }, { "epoch": 1.1818295394917668, "grad_norm": 2.234375, "learning_rate": 6.6164661724761255e-06, "loss": 1.0122, "step": 5991 }, { "epoch": 1.182029434546863, "grad_norm": 2.1875, "learning_rate": 6.615468925570492e-06, "loss": 1.0103, "step": 5992 }, { "epoch": 1.182229329601959, "grad_norm": 2.1875, "learning_rate": 6.614471606905902e-06, "loss": 0.9475, "step": 5993 }, { "epoch": 1.1824292246570551, "grad_norm": 2.15625, "learning_rate": 6.6134742165266545e-06, "loss": 0.9314, "step": 5994 }, { "epoch": 1.1826291197121512, "grad_norm": 2.25, "learning_rate": 6.612476754477055e-06, "loss": 0.98, "step": 5995 }, { "epoch": 1.1828290147672471, "grad_norm": 2.125, "learning_rate": 6.6114792208014115e-06, "loss": 0.9514, "step": 5996 }, { "epoch": 1.1830289098223432, "grad_norm": 2.09375, "learning_rate": 6.610481615544031e-06, "loss": 0.9479, "step": 5997 }, { "epoch": 1.1832288048774393, "grad_norm": 2.0625, "learning_rate": 6.60948393874923e-06, "loss": 0.9903, "step": 5998 }, { "epoch": 1.1834286999325354, "grad_norm": 2.03125, "learning_rate": 6.608486190461324e-06, "loss": 1.0254, "step": 5999 }, { "epoch": 1.1836285949876315, "grad_norm": 2.15625, "learning_rate": 6.607488370724635e-06, "loss": 0.9799, "step": 6000 }, { "epoch": 1.1836285949876315, "eval_loss": 0.9062411189079285, "eval_runtime": 594.9136, "eval_samples_per_second": 3.594, "eval_steps_per_second": 3.594, "step": 6000 }, { "epoch": 1.1838284900427276, "grad_norm": 2.109375, "learning_rate": 6.606490479583481e-06, "loss": 0.9528, "step": 6001 }, { "epoch": 1.1840283850978237, "grad_norm": 2.390625, "learning_rate": 6.605492517082195e-06, "loss": 1.0422, "step": 6002 }, { "epoch": 1.1842282801529198, "grad_norm": 2.234375, "learning_rate": 6.604494483265101e-06, "loss": 0.9536, "step": 6003 }, { "epoch": 1.1844281752080157, "grad_norm": 2.015625, "learning_rate": 6.603496378176534e-06, "loss": 0.9032, "step": 6004 }, { "epoch": 1.1846280702631118, "grad_norm": 2.109375, "learning_rate": 6.602498201860828e-06, "loss": 0.9891, "step": 6005 }, { "epoch": 1.1848279653182079, "grad_norm": 2.1875, "learning_rate": 6.601499954362324e-06, "loss": 0.9402, "step": 6006 }, { "epoch": 1.185027860373304, "grad_norm": 2.0625, "learning_rate": 6.6005016357253624e-06, "loss": 0.9325, "step": 6007 }, { "epoch": 1.1852277554284, "grad_norm": 2.125, "learning_rate": 6.5995032459942895e-06, "loss": 0.923, "step": 6008 }, { "epoch": 1.1854276504834962, "grad_norm": 2.3125, "learning_rate": 6.5985047852134535e-06, "loss": 1.0531, "step": 6009 }, { "epoch": 1.1856275455385923, "grad_norm": 2.328125, "learning_rate": 6.597506253427206e-06, "loss": 0.9276, "step": 6010 }, { "epoch": 1.1858274405936884, "grad_norm": 2.109375, "learning_rate": 6.5965076506799e-06, "loss": 1.0075, "step": 6011 }, { "epoch": 1.1860273356487845, "grad_norm": 2.078125, "learning_rate": 6.595508977015897e-06, "loss": 0.9152, "step": 6012 }, { "epoch": 1.1862272307038806, "grad_norm": 2.0625, "learning_rate": 6.594510232479553e-06, "loss": 0.9343, "step": 6013 }, { "epoch": 1.1864271257589765, "grad_norm": 2.15625, "learning_rate": 6.5935114171152345e-06, "loss": 0.9068, "step": 6014 }, { "epoch": 1.1866270208140726, "grad_norm": 2.25, "learning_rate": 6.592512530967312e-06, "loss": 0.9557, "step": 6015 }, { "epoch": 1.1868269158691687, "grad_norm": 2.09375, "learning_rate": 6.591513574080152e-06, "loss": 0.9594, "step": 6016 }, { "epoch": 1.1870268109242648, "grad_norm": 2.140625, "learning_rate": 6.590514546498128e-06, "loss": 0.9499, "step": 6017 }, { "epoch": 1.1872267059793609, "grad_norm": 2.15625, "learning_rate": 6.589515448265619e-06, "loss": 0.9983, "step": 6018 }, { "epoch": 1.187426601034457, "grad_norm": 2.125, "learning_rate": 6.588516279427002e-06, "loss": 0.9502, "step": 6019 }, { "epoch": 1.187626496089553, "grad_norm": 2.09375, "learning_rate": 6.587517040026662e-06, "loss": 0.9559, "step": 6020 }, { "epoch": 1.187826391144649, "grad_norm": 2.234375, "learning_rate": 6.586517730108985e-06, "loss": 1.1202, "step": 6021 }, { "epoch": 1.188026286199745, "grad_norm": 2.15625, "learning_rate": 6.58551834971836e-06, "loss": 0.9888, "step": 6022 }, { "epoch": 1.1882261812548411, "grad_norm": 2.21875, "learning_rate": 6.584518898899178e-06, "loss": 1.0361, "step": 6023 }, { "epoch": 1.1884260763099372, "grad_norm": 2.21875, "learning_rate": 6.583519377695838e-06, "loss": 0.989, "step": 6024 }, { "epoch": 1.1886259713650333, "grad_norm": 2.078125, "learning_rate": 6.582519786152735e-06, "loss": 0.9821, "step": 6025 }, { "epoch": 1.1888258664201294, "grad_norm": 2.234375, "learning_rate": 6.581520124314271e-06, "loss": 0.9858, "step": 6026 }, { "epoch": 1.1890257614752255, "grad_norm": 2.25, "learning_rate": 6.580520392224854e-06, "loss": 1.0612, "step": 6027 }, { "epoch": 1.1892256565303216, "grad_norm": 2.03125, "learning_rate": 6.579520589928888e-06, "loss": 0.9674, "step": 6028 }, { "epoch": 1.1894255515854177, "grad_norm": 2.109375, "learning_rate": 6.578520717470789e-06, "loss": 0.948, "step": 6029 }, { "epoch": 1.1896254466405138, "grad_norm": 2.203125, "learning_rate": 6.577520774894967e-06, "loss": 1.0566, "step": 6030 }, { "epoch": 1.1898253416956097, "grad_norm": 2.296875, "learning_rate": 6.57652076224584e-06, "loss": 1.1256, "step": 6031 }, { "epoch": 1.1900252367507058, "grad_norm": 2.21875, "learning_rate": 6.57552067956783e-06, "loss": 1.0127, "step": 6032 }, { "epoch": 1.190225131805802, "grad_norm": 2.375, "learning_rate": 6.574520526905358e-06, "loss": 0.9052, "step": 6033 }, { "epoch": 1.190425026860898, "grad_norm": 2.125, "learning_rate": 6.573520304302853e-06, "loss": 0.9465, "step": 6034 }, { "epoch": 1.1906249219159941, "grad_norm": 2.1875, "learning_rate": 6.572520011804745e-06, "loss": 0.9954, "step": 6035 }, { "epoch": 1.1908248169710902, "grad_norm": 2.140625, "learning_rate": 6.571519649455464e-06, "loss": 0.9809, "step": 6036 }, { "epoch": 1.1910247120261863, "grad_norm": 2.15625, "learning_rate": 6.5705192172994505e-06, "loss": 0.9703, "step": 6037 }, { "epoch": 1.1912246070812824, "grad_norm": 2.125, "learning_rate": 6.56951871538114e-06, "loss": 0.9372, "step": 6038 }, { "epoch": 1.1914245021363783, "grad_norm": 2.140625, "learning_rate": 6.568518143744977e-06, "loss": 1.0186, "step": 6039 }, { "epoch": 1.1916243971914744, "grad_norm": 2.34375, "learning_rate": 6.567517502435403e-06, "loss": 0.9661, "step": 6040 }, { "epoch": 1.1918242922465705, "grad_norm": 2.015625, "learning_rate": 6.5665167914968706e-06, "loss": 0.8746, "step": 6041 }, { "epoch": 1.1920241873016666, "grad_norm": 2.140625, "learning_rate": 6.56551601097383e-06, "loss": 1.0034, "step": 6042 }, { "epoch": 1.1922240823567627, "grad_norm": 2.078125, "learning_rate": 6.564515160910736e-06, "loss": 1.093, "step": 6043 }, { "epoch": 1.1924239774118588, "grad_norm": 2.0625, "learning_rate": 6.563514241352043e-06, "loss": 1.0007, "step": 6044 }, { "epoch": 1.192623872466955, "grad_norm": 2.15625, "learning_rate": 6.562513252342216e-06, "loss": 0.9565, "step": 6045 }, { "epoch": 1.192823767522051, "grad_norm": 2.109375, "learning_rate": 6.561512193925718e-06, "loss": 0.9181, "step": 6046 }, { "epoch": 1.193023662577147, "grad_norm": 2.15625, "learning_rate": 6.560511066147015e-06, "loss": 0.9166, "step": 6047 }, { "epoch": 1.1932235576322432, "grad_norm": 2.15625, "learning_rate": 6.559509869050575e-06, "loss": 1.0238, "step": 6048 }, { "epoch": 1.193423452687339, "grad_norm": 2.1875, "learning_rate": 6.558508602680876e-06, "loss": 0.9868, "step": 6049 }, { "epoch": 1.1936233477424352, "grad_norm": 2.1875, "learning_rate": 6.557507267082391e-06, "loss": 0.9623, "step": 6050 }, { "epoch": 1.1938232427975313, "grad_norm": 2.0, "learning_rate": 6.556505862299597e-06, "loss": 0.9718, "step": 6051 }, { "epoch": 1.1940231378526274, "grad_norm": 2.140625, "learning_rate": 6.555504388376981e-06, "loss": 0.9164, "step": 6052 }, { "epoch": 1.1942230329077235, "grad_norm": 2.15625, "learning_rate": 6.554502845359026e-06, "loss": 1.0381, "step": 6053 }, { "epoch": 1.1944229279628196, "grad_norm": 2.15625, "learning_rate": 6.553501233290218e-06, "loss": 1.0206, "step": 6054 }, { "epoch": 1.1946228230179157, "grad_norm": 2.203125, "learning_rate": 6.5524995522150545e-06, "loss": 1.0788, "step": 6055 }, { "epoch": 1.1948227180730115, "grad_norm": 2.140625, "learning_rate": 6.551497802178025e-06, "loss": 1.0028, "step": 6056 }, { "epoch": 1.1950226131281076, "grad_norm": 2.140625, "learning_rate": 6.55049598322363e-06, "loss": 1.0223, "step": 6057 }, { "epoch": 1.1952225081832037, "grad_norm": 2.140625, "learning_rate": 6.549494095396368e-06, "loss": 0.975, "step": 6058 }, { "epoch": 1.1954224032382998, "grad_norm": 2.28125, "learning_rate": 6.548492138740743e-06, "loss": 1.0228, "step": 6059 }, { "epoch": 1.195622298293396, "grad_norm": 2.15625, "learning_rate": 6.547490113301265e-06, "loss": 1.0139, "step": 6060 }, { "epoch": 1.195822193348492, "grad_norm": 2.09375, "learning_rate": 6.546488019122441e-06, "loss": 0.9619, "step": 6061 }, { "epoch": 1.1960220884035881, "grad_norm": 2.3125, "learning_rate": 6.5454858562487835e-06, "loss": 0.9858, "step": 6062 }, { "epoch": 1.1962219834586842, "grad_norm": 2.03125, "learning_rate": 6.544483624724809e-06, "loss": 0.9372, "step": 6063 }, { "epoch": 1.1964218785137803, "grad_norm": 2.28125, "learning_rate": 6.543481324595037e-06, "loss": 0.9888, "step": 6064 }, { "epoch": 1.1966217735688764, "grad_norm": 2.015625, "learning_rate": 6.54247895590399e-06, "loss": 0.9256, "step": 6065 }, { "epoch": 1.1968216686239723, "grad_norm": 2.09375, "learning_rate": 6.541476518696191e-06, "loss": 0.9669, "step": 6066 }, { "epoch": 1.1970215636790684, "grad_norm": 2.140625, "learning_rate": 6.5404740130161715e-06, "loss": 0.9895, "step": 6067 }, { "epoch": 1.1972214587341645, "grad_norm": 2.109375, "learning_rate": 6.539471438908459e-06, "loss": 0.8756, "step": 6068 }, { "epoch": 1.1974213537892606, "grad_norm": 2.0625, "learning_rate": 6.5384687964175915e-06, "loss": 1.0367, "step": 6069 }, { "epoch": 1.1976212488443567, "grad_norm": 2.140625, "learning_rate": 6.537466085588104e-06, "loss": 0.9823, "step": 6070 }, { "epoch": 1.1978211438994528, "grad_norm": 2.078125, "learning_rate": 6.536463306464535e-06, "loss": 0.9332, "step": 6071 }, { "epoch": 1.198021038954549, "grad_norm": 2.1875, "learning_rate": 6.535460459091434e-06, "loss": 0.9466, "step": 6072 }, { "epoch": 1.198220934009645, "grad_norm": 2.125, "learning_rate": 6.534457543513341e-06, "loss": 0.9703, "step": 6073 }, { "epoch": 1.198420829064741, "grad_norm": 2.234375, "learning_rate": 6.5334545597748075e-06, "loss": 1.1124, "step": 6074 }, { "epoch": 1.198620724119837, "grad_norm": 2.078125, "learning_rate": 6.532451507920386e-06, "loss": 0.9838, "step": 6075 }, { "epoch": 1.198820619174933, "grad_norm": 2.3125, "learning_rate": 6.531448387994634e-06, "loss": 0.9515, "step": 6076 }, { "epoch": 1.1990205142300292, "grad_norm": 2.21875, "learning_rate": 6.530445200042107e-06, "loss": 1.0353, "step": 6077 }, { "epoch": 1.1992204092851253, "grad_norm": 2.234375, "learning_rate": 6.5294419441073684e-06, "loss": 1.0373, "step": 6078 }, { "epoch": 1.1994203043402214, "grad_norm": 2.203125, "learning_rate": 6.528438620234981e-06, "loss": 1.0811, "step": 6079 }, { "epoch": 1.1996201993953175, "grad_norm": 2.078125, "learning_rate": 6.5274352284695144e-06, "loss": 0.9719, "step": 6080 }, { "epoch": 1.1998200944504136, "grad_norm": 2.03125, "learning_rate": 6.526431768855537e-06, "loss": 0.8906, "step": 6081 }, { "epoch": 1.2000199895055097, "grad_norm": 2.078125, "learning_rate": 6.5254282414376235e-06, "loss": 0.9426, "step": 6082 }, { "epoch": 1.2002198845606058, "grad_norm": 2.09375, "learning_rate": 6.524424646260351e-06, "loss": 0.9419, "step": 6083 }, { "epoch": 1.2004197796157017, "grad_norm": 2.125, "learning_rate": 6.523420983368298e-06, "loss": 0.9869, "step": 6084 }, { "epoch": 1.2006196746707978, "grad_norm": 2.28125, "learning_rate": 6.522417252806048e-06, "loss": 0.9829, "step": 6085 }, { "epoch": 1.2008195697258939, "grad_norm": 2.15625, "learning_rate": 6.5214134546181865e-06, "loss": 1.0244, "step": 6086 }, { "epoch": 1.20101946478099, "grad_norm": 2.125, "learning_rate": 6.520409588849301e-06, "loss": 0.8331, "step": 6087 }, { "epoch": 1.201219359836086, "grad_norm": 2.265625, "learning_rate": 6.519405655543985e-06, "loss": 0.8882, "step": 6088 }, { "epoch": 1.2014192548911822, "grad_norm": 2.234375, "learning_rate": 6.518401654746831e-06, "loss": 0.9884, "step": 6089 }, { "epoch": 1.2016191499462783, "grad_norm": 2.15625, "learning_rate": 6.517397586502439e-06, "loss": 0.9536, "step": 6090 }, { "epoch": 1.2018190450013744, "grad_norm": 2.203125, "learning_rate": 6.516393450855407e-06, "loss": 0.9221, "step": 6091 }, { "epoch": 1.2020189400564703, "grad_norm": 2.140625, "learning_rate": 6.5153892478503414e-06, "loss": 0.9778, "step": 6092 }, { "epoch": 1.2022188351115664, "grad_norm": 2.21875, "learning_rate": 6.514384977531846e-06, "loss": 0.9758, "step": 6093 }, { "epoch": 1.2024187301666625, "grad_norm": 2.15625, "learning_rate": 6.513380639944532e-06, "loss": 0.9534, "step": 6094 }, { "epoch": 1.2026186252217586, "grad_norm": 2.171875, "learning_rate": 6.512376235133011e-06, "loss": 1.0497, "step": 6095 }, { "epoch": 1.2028185202768547, "grad_norm": 2.078125, "learning_rate": 6.511371763141899e-06, "loss": 1.0026, "step": 6096 }, { "epoch": 1.2030184153319508, "grad_norm": 2.21875, "learning_rate": 6.510367224015817e-06, "loss": 0.9807, "step": 6097 }, { "epoch": 1.2032183103870469, "grad_norm": 2.078125, "learning_rate": 6.5093626177993815e-06, "loss": 1.0168, "step": 6098 }, { "epoch": 1.203418205442143, "grad_norm": 2.09375, "learning_rate": 6.508357944537221e-06, "loss": 0.9322, "step": 6099 }, { "epoch": 1.203618100497239, "grad_norm": 2.171875, "learning_rate": 6.507353204273962e-06, "loss": 1.0694, "step": 6100 }, { "epoch": 1.203817995552335, "grad_norm": 2.28125, "learning_rate": 6.506348397054233e-06, "loss": 0.9876, "step": 6101 }, { "epoch": 1.204017890607431, "grad_norm": 2.109375, "learning_rate": 6.505343522922672e-06, "loss": 1.0069, "step": 6102 }, { "epoch": 1.2042177856625271, "grad_norm": 2.125, "learning_rate": 6.5043385819239095e-06, "loss": 0.9277, "step": 6103 }, { "epoch": 1.2044176807176232, "grad_norm": 2.078125, "learning_rate": 6.5033335741025885e-06, "loss": 1.0108, "step": 6104 }, { "epoch": 1.2046175757727193, "grad_norm": 2.125, "learning_rate": 6.502328499503352e-06, "loss": 1.0157, "step": 6105 }, { "epoch": 1.2048174708278154, "grad_norm": 2.296875, "learning_rate": 6.5013233581708425e-06, "loss": 1.0746, "step": 6106 }, { "epoch": 1.2050173658829115, "grad_norm": 2.109375, "learning_rate": 6.500318150149711e-06, "loss": 0.9329, "step": 6107 }, { "epoch": 1.2052172609380076, "grad_norm": 2.1875, "learning_rate": 6.499312875484608e-06, "loss": 1.0009, "step": 6108 }, { "epoch": 1.2054171559931035, "grad_norm": 2.296875, "learning_rate": 6.498307534220186e-06, "loss": 0.9681, "step": 6109 }, { "epoch": 1.2056170510481996, "grad_norm": 2.140625, "learning_rate": 6.497302126401103e-06, "loss": 1.043, "step": 6110 }, { "epoch": 1.2058169461032957, "grad_norm": 2.109375, "learning_rate": 6.496296652072021e-06, "loss": 0.9317, "step": 6111 }, { "epoch": 1.2060168411583918, "grad_norm": 2.1875, "learning_rate": 6.4952911112776e-06, "loss": 0.8968, "step": 6112 }, { "epoch": 1.206216736213488, "grad_norm": 2.125, "learning_rate": 6.494285504062507e-06, "loss": 0.9643, "step": 6113 }, { "epoch": 1.206416631268584, "grad_norm": 2.125, "learning_rate": 6.493279830471414e-06, "loss": 0.9282, "step": 6114 }, { "epoch": 1.20661652632368, "grad_norm": 2.109375, "learning_rate": 6.492274090548989e-06, "loss": 0.922, "step": 6115 }, { "epoch": 1.2068164213787762, "grad_norm": 2.21875, "learning_rate": 6.491268284339908e-06, "loss": 1.0417, "step": 6116 }, { "epoch": 1.2070163164338723, "grad_norm": 2.328125, "learning_rate": 6.490262411888851e-06, "loss": 1.0131, "step": 6117 }, { "epoch": 1.2072162114889684, "grad_norm": 2.09375, "learning_rate": 6.489256473240493e-06, "loss": 0.9763, "step": 6118 }, { "epoch": 1.2074161065440643, "grad_norm": 2.28125, "learning_rate": 6.488250468439525e-06, "loss": 0.9694, "step": 6119 }, { "epoch": 1.2076160015991604, "grad_norm": 2.28125, "learning_rate": 6.4872443975306275e-06, "loss": 1.0717, "step": 6120 }, { "epoch": 1.2078158966542565, "grad_norm": 2.25, "learning_rate": 6.486238260558495e-06, "loss": 1.0287, "step": 6121 }, { "epoch": 1.2080157917093526, "grad_norm": 2.03125, "learning_rate": 6.485232057567816e-06, "loss": 0.9361, "step": 6122 }, { "epoch": 1.2082156867644487, "grad_norm": 2.09375, "learning_rate": 6.48422578860329e-06, "loss": 0.9639, "step": 6123 }, { "epoch": 1.2084155818195448, "grad_norm": 2.0625, "learning_rate": 6.4832194537096105e-06, "loss": 0.9281, "step": 6124 }, { "epoch": 1.2086154768746409, "grad_norm": 2.4375, "learning_rate": 6.482213052931483e-06, "loss": 1.0782, "step": 6125 }, { "epoch": 1.208815371929737, "grad_norm": 2.171875, "learning_rate": 6.481206586313609e-06, "loss": 0.9929, "step": 6126 }, { "epoch": 1.2090152669848329, "grad_norm": 2.328125, "learning_rate": 6.480200053900696e-06, "loss": 0.92, "step": 6127 }, { "epoch": 1.209215162039929, "grad_norm": 2.171875, "learning_rate": 6.479193455737457e-06, "loss": 0.9682, "step": 6128 }, { "epoch": 1.209415057095025, "grad_norm": 2.203125, "learning_rate": 6.4781867918686e-06, "loss": 0.9514, "step": 6129 }, { "epoch": 1.2096149521501212, "grad_norm": 2.15625, "learning_rate": 6.477180062338845e-06, "loss": 0.9545, "step": 6130 }, { "epoch": 1.2098148472052173, "grad_norm": 2.265625, "learning_rate": 6.47617326719291e-06, "loss": 1.0176, "step": 6131 }, { "epoch": 1.2100147422603134, "grad_norm": 2.15625, "learning_rate": 6.475166406475515e-06, "loss": 0.9661, "step": 6132 }, { "epoch": 1.2102146373154095, "grad_norm": 2.078125, "learning_rate": 6.474159480231386e-06, "loss": 0.9227, "step": 6133 }, { "epoch": 1.2104145323705056, "grad_norm": 2.171875, "learning_rate": 6.47315248850525e-06, "loss": 1.0038, "step": 6134 }, { "epoch": 1.2106144274256017, "grad_norm": 2.28125, "learning_rate": 6.472145431341838e-06, "loss": 0.9451, "step": 6135 }, { "epoch": 1.2108143224806978, "grad_norm": 2.21875, "learning_rate": 6.471138308785885e-06, "loss": 1.0021, "step": 6136 }, { "epoch": 1.2110142175357936, "grad_norm": 2.109375, "learning_rate": 6.4701311208821225e-06, "loss": 0.9096, "step": 6137 }, { "epoch": 1.2112141125908897, "grad_norm": 2.171875, "learning_rate": 6.4691238676752935e-06, "loss": 0.934, "step": 6138 }, { "epoch": 1.2114140076459858, "grad_norm": 2.234375, "learning_rate": 6.468116549210142e-06, "loss": 1.0087, "step": 6139 }, { "epoch": 1.211613902701082, "grad_norm": 2.28125, "learning_rate": 6.467109165531407e-06, "loss": 0.986, "step": 6140 }, { "epoch": 1.211813797756178, "grad_norm": 2.140625, "learning_rate": 6.46610171668384e-06, "loss": 1.0162, "step": 6141 }, { "epoch": 1.2120136928112741, "grad_norm": 2.25, "learning_rate": 6.465094202712192e-06, "loss": 1.0288, "step": 6142 }, { "epoch": 1.2122135878663702, "grad_norm": 2.1875, "learning_rate": 6.464086623661215e-06, "loss": 0.8564, "step": 6143 }, { "epoch": 1.2124134829214661, "grad_norm": 2.046875, "learning_rate": 6.463078979575667e-06, "loss": 0.9441, "step": 6144 }, { "epoch": 1.2126133779765622, "grad_norm": 2.140625, "learning_rate": 6.462071270500308e-06, "loss": 0.9968, "step": 6145 }, { "epoch": 1.2128132730316583, "grad_norm": 2.1875, "learning_rate": 6.461063496479899e-06, "loss": 1.0823, "step": 6146 }, { "epoch": 1.2130131680867544, "grad_norm": 2.09375, "learning_rate": 6.460055657559206e-06, "loss": 0.9145, "step": 6147 }, { "epoch": 1.2132130631418505, "grad_norm": 2.125, "learning_rate": 6.459047753782994e-06, "loss": 0.9598, "step": 6148 }, { "epoch": 1.2134129581969466, "grad_norm": 2.25, "learning_rate": 6.458039785196039e-06, "loss": 1.0672, "step": 6149 }, { "epoch": 1.2136128532520427, "grad_norm": 2.234375, "learning_rate": 6.457031751843113e-06, "loss": 0.9857, "step": 6150 }, { "epoch": 1.2138127483071388, "grad_norm": 2.125, "learning_rate": 6.4560236537689905e-06, "loss": 0.9374, "step": 6151 }, { "epoch": 1.214012643362235, "grad_norm": 2.0625, "learning_rate": 6.455015491018452e-06, "loss": 1.0154, "step": 6152 }, { "epoch": 1.214212538417331, "grad_norm": 2.09375, "learning_rate": 6.454007263636283e-06, "loss": 0.9403, "step": 6153 }, { "epoch": 1.2144124334724269, "grad_norm": 2.046875, "learning_rate": 6.452998971667266e-06, "loss": 0.9519, "step": 6154 }, { "epoch": 1.214612328527523, "grad_norm": 2.140625, "learning_rate": 6.451990615156189e-06, "loss": 0.9895, "step": 6155 }, { "epoch": 1.214812223582619, "grad_norm": 2.140625, "learning_rate": 6.4509821941478455e-06, "loss": 0.9187, "step": 6156 }, { "epoch": 1.2150121186377152, "grad_norm": 2.171875, "learning_rate": 6.4499737086870276e-06, "loss": 0.9367, "step": 6157 }, { "epoch": 1.2152120136928113, "grad_norm": 2.09375, "learning_rate": 6.448965158818531e-06, "loss": 0.9306, "step": 6158 }, { "epoch": 1.2154119087479074, "grad_norm": 2.3125, "learning_rate": 6.447956544587158e-06, "loss": 1.0404, "step": 6159 }, { "epoch": 1.2156118038030035, "grad_norm": 2.140625, "learning_rate": 6.44694786603771e-06, "loss": 0.961, "step": 6160 }, { "epoch": 1.2158116988580996, "grad_norm": 2.328125, "learning_rate": 6.445939123214991e-06, "loss": 1.0009, "step": 6161 }, { "epoch": 1.2160115939131955, "grad_norm": 2.0, "learning_rate": 6.444930316163812e-06, "loss": 0.9166, "step": 6162 }, { "epoch": 1.2162114889682916, "grad_norm": 2.125, "learning_rate": 6.443921444928982e-06, "loss": 0.9853, "step": 6163 }, { "epoch": 1.2164113840233877, "grad_norm": 2.125, "learning_rate": 6.442912509555316e-06, "loss": 0.913, "step": 6164 }, { "epoch": 1.2166112790784838, "grad_norm": 2.15625, "learning_rate": 6.44190351008763e-06, "loss": 0.9804, "step": 6165 }, { "epoch": 1.2168111741335799, "grad_norm": 2.171875, "learning_rate": 6.4408944465707435e-06, "loss": 0.993, "step": 6166 }, { "epoch": 1.217011069188676, "grad_norm": 2.171875, "learning_rate": 6.439885319049482e-06, "loss": 0.9392, "step": 6167 }, { "epoch": 1.217210964243772, "grad_norm": 2.078125, "learning_rate": 6.438876127568665e-06, "loss": 0.9837, "step": 6168 }, { "epoch": 1.2174108592988682, "grad_norm": 2.109375, "learning_rate": 6.437866872173127e-06, "loss": 0.9335, "step": 6169 }, { "epoch": 1.2176107543539643, "grad_norm": 2.046875, "learning_rate": 6.436857552907696e-06, "loss": 0.9545, "step": 6170 }, { "epoch": 1.2178106494090604, "grad_norm": 2.078125, "learning_rate": 6.435848169817205e-06, "loss": 1.0012, "step": 6171 }, { "epoch": 1.2180105444641562, "grad_norm": 2.171875, "learning_rate": 6.43483872294649e-06, "loss": 1.0375, "step": 6172 }, { "epoch": 1.2182104395192523, "grad_norm": 2.09375, "learning_rate": 6.433829212340394e-06, "loss": 1.0023, "step": 6173 }, { "epoch": 1.2184103345743484, "grad_norm": 2.046875, "learning_rate": 6.432819638043758e-06, "loss": 0.9833, "step": 6174 }, { "epoch": 1.2186102296294445, "grad_norm": 2.046875, "learning_rate": 6.431810000101425e-06, "loss": 0.9473, "step": 6175 }, { "epoch": 1.2188101246845406, "grad_norm": 2.359375, "learning_rate": 6.430800298558246e-06, "loss": 0.9796, "step": 6176 }, { "epoch": 1.2190100197396367, "grad_norm": 2.171875, "learning_rate": 6.429790533459071e-06, "loss": 1.0393, "step": 6177 }, { "epoch": 1.2192099147947328, "grad_norm": 2.203125, "learning_rate": 6.428780704848753e-06, "loss": 1.0952, "step": 6178 }, { "epoch": 1.2194098098498287, "grad_norm": 2.234375, "learning_rate": 6.427770812772147e-06, "loss": 0.967, "step": 6179 }, { "epoch": 1.2196097049049248, "grad_norm": 2.296875, "learning_rate": 6.426760857274115e-06, "loss": 0.9787, "step": 6180 }, { "epoch": 1.219809599960021, "grad_norm": 2.15625, "learning_rate": 6.425750838399519e-06, "loss": 0.9558, "step": 6181 }, { "epoch": 1.220009495015117, "grad_norm": 2.109375, "learning_rate": 6.4247407561932215e-06, "loss": 0.9681, "step": 6182 }, { "epoch": 1.2202093900702131, "grad_norm": 2.21875, "learning_rate": 6.423730610700092e-06, "loss": 1.1079, "step": 6183 }, { "epoch": 1.2204092851253092, "grad_norm": 2.21875, "learning_rate": 6.422720401965003e-06, "loss": 0.9808, "step": 6184 }, { "epoch": 1.2206091801804053, "grad_norm": 2.3125, "learning_rate": 6.421710130032824e-06, "loss": 1.0888, "step": 6185 }, { "epoch": 1.2208090752355014, "grad_norm": 2.109375, "learning_rate": 6.420699794948433e-06, "loss": 0.9608, "step": 6186 }, { "epoch": 1.2210089702905975, "grad_norm": 2.1875, "learning_rate": 6.419689396756709e-06, "loss": 1.0215, "step": 6187 }, { "epoch": 1.2212088653456936, "grad_norm": 2.1875, "learning_rate": 6.418678935502534e-06, "loss": 0.9127, "step": 6188 }, { "epoch": 1.2214087604007895, "grad_norm": 2.1875, "learning_rate": 6.4176684112307905e-06, "loss": 0.9422, "step": 6189 }, { "epoch": 1.2216086554558856, "grad_norm": 2.046875, "learning_rate": 6.41665782398637e-06, "loss": 1.0003, "step": 6190 }, { "epoch": 1.2218085505109817, "grad_norm": 2.1875, "learning_rate": 6.415647173814158e-06, "loss": 0.9936, "step": 6191 }, { "epoch": 1.2220084455660778, "grad_norm": 2.234375, "learning_rate": 6.414636460759052e-06, "loss": 1.034, "step": 6192 }, { "epoch": 1.222208340621174, "grad_norm": 2.0, "learning_rate": 6.413625684865942e-06, "loss": 0.8903, "step": 6193 }, { "epoch": 1.22240823567627, "grad_norm": 2.140625, "learning_rate": 6.412614846179734e-06, "loss": 0.9948, "step": 6194 }, { "epoch": 1.222608130731366, "grad_norm": 2.171875, "learning_rate": 6.411603944745323e-06, "loss": 1.0735, "step": 6195 }, { "epoch": 1.2228080257864622, "grad_norm": 2.15625, "learning_rate": 6.410592980607616e-06, "loss": 0.8989, "step": 6196 }, { "epoch": 1.223007920841558, "grad_norm": 1.984375, "learning_rate": 6.409581953811519e-06, "loss": 0.8938, "step": 6197 }, { "epoch": 1.2232078158966542, "grad_norm": 2.140625, "learning_rate": 6.408570864401944e-06, "loss": 1.0571, "step": 6198 }, { "epoch": 1.2234077109517503, "grad_norm": 2.15625, "learning_rate": 6.4075597124238e-06, "loss": 1.0363, "step": 6199 }, { "epoch": 1.2236076060068464, "grad_norm": 2.03125, "learning_rate": 6.4065484979220035e-06, "loss": 0.938, "step": 6200 }, { "epoch": 1.2238075010619425, "grad_norm": 2.09375, "learning_rate": 6.405537220941475e-06, "loss": 0.9658, "step": 6201 }, { "epoch": 1.2240073961170386, "grad_norm": 2.078125, "learning_rate": 6.404525881527133e-06, "loss": 0.9345, "step": 6202 }, { "epoch": 1.2242072911721347, "grad_norm": 2.109375, "learning_rate": 6.4035144797239e-06, "loss": 1.0204, "step": 6203 }, { "epoch": 1.2244071862272308, "grad_norm": 2.125, "learning_rate": 6.402503015576705e-06, "loss": 0.9567, "step": 6204 }, { "epoch": 1.2246070812823269, "grad_norm": 2.171875, "learning_rate": 6.401491489130474e-06, "loss": 0.9587, "step": 6205 }, { "epoch": 1.224806976337423, "grad_norm": 2.3125, "learning_rate": 6.400479900430141e-06, "loss": 1.0711, "step": 6206 }, { "epoch": 1.2250068713925188, "grad_norm": 2.234375, "learning_rate": 6.399468249520641e-06, "loss": 0.9917, "step": 6207 }, { "epoch": 1.225206766447615, "grad_norm": 2.390625, "learning_rate": 6.398456536446912e-06, "loss": 1.0016, "step": 6208 }, { "epoch": 1.225406661502711, "grad_norm": 2.140625, "learning_rate": 6.397444761253892e-06, "loss": 0.9884, "step": 6209 }, { "epoch": 1.2256065565578071, "grad_norm": 2.21875, "learning_rate": 6.396432923986525e-06, "loss": 1.0465, "step": 6210 }, { "epoch": 1.2258064516129032, "grad_norm": 2.046875, "learning_rate": 6.3954210246897565e-06, "loss": 0.8695, "step": 6211 }, { "epoch": 1.2260063466679993, "grad_norm": 2.25, "learning_rate": 6.3944090634085355e-06, "loss": 1.0473, "step": 6212 }, { "epoch": 1.2262062417230954, "grad_norm": 2.171875, "learning_rate": 6.393397040187812e-06, "loss": 0.928, "step": 6213 }, { "epoch": 1.2264061367781913, "grad_norm": 2.0625, "learning_rate": 6.392384955072541e-06, "loss": 0.889, "step": 6214 }, { "epoch": 1.2266060318332874, "grad_norm": 2.09375, "learning_rate": 6.39137280810768e-06, "loss": 0.9301, "step": 6215 }, { "epoch": 1.2268059268883835, "grad_norm": 2.015625, "learning_rate": 6.390360599338188e-06, "loss": 0.947, "step": 6216 }, { "epoch": 1.2270058219434796, "grad_norm": 2.203125, "learning_rate": 6.389348328809024e-06, "loss": 1.0382, "step": 6217 }, { "epoch": 1.2272057169985757, "grad_norm": 2.203125, "learning_rate": 6.388335996565158e-06, "loss": 1.0804, "step": 6218 }, { "epoch": 1.2274056120536718, "grad_norm": 2.046875, "learning_rate": 6.387323602651554e-06, "loss": 0.894, "step": 6219 }, { "epoch": 1.227605507108768, "grad_norm": 2.171875, "learning_rate": 6.386311147113185e-06, "loss": 1.0323, "step": 6220 }, { "epoch": 1.227805402163864, "grad_norm": 2.296875, "learning_rate": 6.385298629995021e-06, "loss": 1.0044, "step": 6221 }, { "epoch": 1.2280052972189601, "grad_norm": 2.375, "learning_rate": 6.3842860513420416e-06, "loss": 0.9877, "step": 6222 }, { "epoch": 1.2282051922740562, "grad_norm": 2.328125, "learning_rate": 6.3832734111992236e-06, "loss": 0.9811, "step": 6223 }, { "epoch": 1.228405087329152, "grad_norm": 2.359375, "learning_rate": 6.382260709611547e-06, "loss": 1.006, "step": 6224 }, { "epoch": 1.2286049823842482, "grad_norm": 2.21875, "learning_rate": 6.381247946623997e-06, "loss": 1.0315, "step": 6225 }, { "epoch": 1.2288048774393443, "grad_norm": 2.15625, "learning_rate": 6.380235122281563e-06, "loss": 0.9488, "step": 6226 }, { "epoch": 1.2290047724944404, "grad_norm": 2.140625, "learning_rate": 6.379222236629231e-06, "loss": 0.9502, "step": 6227 }, { "epoch": 1.2292046675495365, "grad_norm": 2.046875, "learning_rate": 6.378209289711994e-06, "loss": 0.9568, "step": 6228 }, { "epoch": 1.2294045626046326, "grad_norm": 2.21875, "learning_rate": 6.377196281574849e-06, "loss": 0.9754, "step": 6229 }, { "epoch": 1.2296044576597287, "grad_norm": 2.109375, "learning_rate": 6.376183212262791e-06, "loss": 0.9237, "step": 6230 }, { "epoch": 1.2298043527148248, "grad_norm": 2.203125, "learning_rate": 6.375170081820823e-06, "loss": 1.066, "step": 6231 }, { "epoch": 1.2300042477699207, "grad_norm": 2.15625, "learning_rate": 6.374156890293947e-06, "loss": 1.0714, "step": 6232 }, { "epoch": 1.2302041428250168, "grad_norm": 2.15625, "learning_rate": 6.373143637727166e-06, "loss": 0.9691, "step": 6233 }, { "epoch": 1.2304040378801129, "grad_norm": 2.21875, "learning_rate": 6.372130324165493e-06, "loss": 0.8767, "step": 6234 }, { "epoch": 1.230603932935209, "grad_norm": 2.1875, "learning_rate": 6.3711169496539385e-06, "loss": 0.9945, "step": 6235 }, { "epoch": 1.230803827990305, "grad_norm": 2.125, "learning_rate": 6.370103514237513e-06, "loss": 0.9998, "step": 6236 }, { "epoch": 1.2310037230454012, "grad_norm": 2.140625, "learning_rate": 6.3690900179612395e-06, "loss": 0.9744, "step": 6237 }, { "epoch": 1.2312036181004973, "grad_norm": 2.09375, "learning_rate": 6.36807646087013e-06, "loss": 1.0184, "step": 6238 }, { "epoch": 1.2314035131555934, "grad_norm": 2.140625, "learning_rate": 6.367062843009211e-06, "loss": 0.9687, "step": 6239 }, { "epoch": 1.2316034082106895, "grad_norm": 1.9921875, "learning_rate": 6.366049164423508e-06, "loss": 0.9406, "step": 6240 }, { "epoch": 1.2318033032657856, "grad_norm": 2.0625, "learning_rate": 6.365035425158046e-06, "loss": 0.9448, "step": 6241 }, { "epoch": 1.2320031983208815, "grad_norm": 2.46875, "learning_rate": 6.364021625257856e-06, "loss": 1.001, "step": 6242 }, { "epoch": 1.2322030933759776, "grad_norm": 2.1875, "learning_rate": 6.363007764767972e-06, "loss": 0.9919, "step": 6243 }, { "epoch": 1.2324029884310737, "grad_norm": 2.078125, "learning_rate": 6.361993843733427e-06, "loss": 1.0646, "step": 6244 }, { "epoch": 1.2326028834861698, "grad_norm": 2.125, "learning_rate": 6.360979862199262e-06, "loss": 0.9798, "step": 6245 }, { "epoch": 1.2328027785412659, "grad_norm": 2.046875, "learning_rate": 6.3599658202105175e-06, "loss": 0.9565, "step": 6246 }, { "epoch": 1.233002673596362, "grad_norm": 2.109375, "learning_rate": 6.3589517178122365e-06, "loss": 0.8722, "step": 6247 }, { "epoch": 1.233202568651458, "grad_norm": 2.09375, "learning_rate": 6.357937555049465e-06, "loss": 0.9004, "step": 6248 }, { "epoch": 1.2334024637065542, "grad_norm": 2.25, "learning_rate": 6.356923331967252e-06, "loss": 1.0188, "step": 6249 }, { "epoch": 1.23360235876165, "grad_norm": 2.484375, "learning_rate": 6.355909048610649e-06, "loss": 0.9297, "step": 6250 }, { "epoch": 1.2338022538167461, "grad_norm": 2.078125, "learning_rate": 6.354894705024711e-06, "loss": 0.9353, "step": 6251 }, { "epoch": 1.2340021488718422, "grad_norm": 2.546875, "learning_rate": 6.353880301254496e-06, "loss": 1.0263, "step": 6252 }, { "epoch": 1.2342020439269383, "grad_norm": 2.09375, "learning_rate": 6.352865837345061e-06, "loss": 1.0051, "step": 6253 }, { "epoch": 1.2344019389820344, "grad_norm": 2.171875, "learning_rate": 6.351851313341473e-06, "loss": 0.9922, "step": 6254 }, { "epoch": 1.2346018340371305, "grad_norm": 2.28125, "learning_rate": 6.350836729288792e-06, "loss": 0.9041, "step": 6255 }, { "epoch": 1.2348017290922266, "grad_norm": 2.234375, "learning_rate": 6.349822085232087e-06, "loss": 0.9869, "step": 6256 }, { "epoch": 1.2350016241473227, "grad_norm": 2.234375, "learning_rate": 6.348807381216431e-06, "loss": 0.9799, "step": 6257 }, { "epoch": 1.2352015192024188, "grad_norm": 2.21875, "learning_rate": 6.347792617286893e-06, "loss": 1.0542, "step": 6258 }, { "epoch": 1.235401414257515, "grad_norm": 2.15625, "learning_rate": 6.346777793488552e-06, "loss": 1.0436, "step": 6259 }, { "epoch": 1.2356013093126108, "grad_norm": 2.140625, "learning_rate": 6.345762909866485e-06, "loss": 1.0035, "step": 6260 }, { "epoch": 1.235801204367707, "grad_norm": 2.125, "learning_rate": 6.344747966465774e-06, "loss": 0.9534, "step": 6261 }, { "epoch": 1.236001099422803, "grad_norm": 2.15625, "learning_rate": 6.343732963331502e-06, "loss": 0.9065, "step": 6262 }, { "epoch": 1.236200994477899, "grad_norm": 2.078125, "learning_rate": 6.342717900508755e-06, "loss": 0.9186, "step": 6263 }, { "epoch": 1.2364008895329952, "grad_norm": 2.265625, "learning_rate": 6.341702778042622e-06, "loss": 1.046, "step": 6264 }, { "epoch": 1.2366007845880913, "grad_norm": 2.203125, "learning_rate": 6.3406875959781944e-06, "loss": 0.9073, "step": 6265 }, { "epoch": 1.2368006796431874, "grad_norm": 2.28125, "learning_rate": 6.339672354360568e-06, "loss": 0.9849, "step": 6266 }, { "epoch": 1.2370005746982833, "grad_norm": 2.078125, "learning_rate": 6.338657053234838e-06, "loss": 0.9251, "step": 6267 }, { "epoch": 1.2372004697533794, "grad_norm": 2.359375, "learning_rate": 6.337641692646106e-06, "loss": 0.9606, "step": 6268 }, { "epoch": 1.2374003648084755, "grad_norm": 2.140625, "learning_rate": 6.336626272639471e-06, "loss": 0.9163, "step": 6269 }, { "epoch": 1.2376002598635716, "grad_norm": 2.21875, "learning_rate": 6.3356107932600396e-06, "loss": 0.9725, "step": 6270 }, { "epoch": 1.2378001549186677, "grad_norm": 2.03125, "learning_rate": 6.334595254552921e-06, "loss": 0.9324, "step": 6271 }, { "epoch": 1.2380000499737638, "grad_norm": 2.21875, "learning_rate": 6.333579656563222e-06, "loss": 0.9135, "step": 6272 }, { "epoch": 1.2381999450288599, "grad_norm": 2.40625, "learning_rate": 6.332563999336059e-06, "loss": 1.0486, "step": 6273 }, { "epoch": 1.238399840083956, "grad_norm": 2.03125, "learning_rate": 6.331548282916545e-06, "loss": 0.9279, "step": 6274 }, { "epoch": 1.238599735139052, "grad_norm": 2.09375, "learning_rate": 6.330532507349798e-06, "loss": 0.9609, "step": 6275 }, { "epoch": 1.2387996301941482, "grad_norm": 2.0625, "learning_rate": 6.3295166726809396e-06, "loss": 0.8932, "step": 6276 }, { "epoch": 1.238999525249244, "grad_norm": 2.140625, "learning_rate": 6.328500778955091e-06, "loss": 0.9976, "step": 6277 }, { "epoch": 1.2391994203043402, "grad_norm": 2.140625, "learning_rate": 6.327484826217382e-06, "loss": 0.9441, "step": 6278 }, { "epoch": 1.2393993153594363, "grad_norm": 2.1875, "learning_rate": 6.326468814512937e-06, "loss": 0.9518, "step": 6279 }, { "epoch": 1.2395992104145324, "grad_norm": 2.078125, "learning_rate": 6.325452743886891e-06, "loss": 0.9586, "step": 6280 }, { "epoch": 1.2397991054696285, "grad_norm": 2.203125, "learning_rate": 6.324436614384374e-06, "loss": 1.0174, "step": 6281 }, { "epoch": 1.2399990005247246, "grad_norm": 2.078125, "learning_rate": 6.3234204260505235e-06, "loss": 0.9922, "step": 6282 }, { "epoch": 1.2401988955798207, "grad_norm": 2.0625, "learning_rate": 6.3224041789304804e-06, "loss": 1.0269, "step": 6283 }, { "epoch": 1.2403987906349168, "grad_norm": 2.109375, "learning_rate": 6.321387873069384e-06, "loss": 0.9655, "step": 6284 }, { "epoch": 1.2405986856900126, "grad_norm": 2.15625, "learning_rate": 6.3203715085123805e-06, "loss": 0.9162, "step": 6285 }, { "epoch": 1.2407985807451087, "grad_norm": 2.0625, "learning_rate": 6.319355085304615e-06, "loss": 1.0348, "step": 6286 }, { "epoch": 1.2409984758002048, "grad_norm": 3.765625, "learning_rate": 6.3183386034912364e-06, "loss": 1.0138, "step": 6287 }, { "epoch": 1.241198370855301, "grad_norm": 2.171875, "learning_rate": 6.317322063117399e-06, "loss": 1.0809, "step": 6288 }, { "epoch": 1.241398265910397, "grad_norm": 2.15625, "learning_rate": 6.316305464228256e-06, "loss": 0.9187, "step": 6289 }, { "epoch": 1.2415981609654931, "grad_norm": 2.15625, "learning_rate": 6.315288806868964e-06, "loss": 0.99, "step": 6290 }, { "epoch": 1.2417980560205892, "grad_norm": 2.203125, "learning_rate": 6.314272091084686e-06, "loss": 1.0059, "step": 6291 }, { "epoch": 1.2419979510756853, "grad_norm": 2.046875, "learning_rate": 6.31325531692058e-06, "loss": 0.9138, "step": 6292 }, { "epoch": 1.2421978461307814, "grad_norm": 2.109375, "learning_rate": 6.312238484421815e-06, "loss": 0.948, "step": 6293 }, { "epoch": 1.2423977411858775, "grad_norm": 2.21875, "learning_rate": 6.311221593633557e-06, "loss": 1.025, "step": 6294 }, { "epoch": 1.2425976362409734, "grad_norm": 2.171875, "learning_rate": 6.310204644600974e-06, "loss": 1.0143, "step": 6295 }, { "epoch": 1.2427975312960695, "grad_norm": 2.203125, "learning_rate": 6.30918763736924e-06, "loss": 0.926, "step": 6296 }, { "epoch": 1.2429974263511656, "grad_norm": 2.25, "learning_rate": 6.3081705719835316e-06, "loss": 1.0279, "step": 6297 }, { "epoch": 1.2431973214062617, "grad_norm": 2.03125, "learning_rate": 6.307153448489028e-06, "loss": 0.9368, "step": 6298 }, { "epoch": 1.2433972164613578, "grad_norm": 2.21875, "learning_rate": 6.306136266930906e-06, "loss": 0.9261, "step": 6299 }, { "epoch": 1.243597111516454, "grad_norm": 2.171875, "learning_rate": 6.305119027354349e-06, "loss": 0.9971, "step": 6300 }, { "epoch": 1.24379700657155, "grad_norm": 2.0625, "learning_rate": 6.304101729804546e-06, "loss": 0.9341, "step": 6301 }, { "epoch": 1.243996901626646, "grad_norm": 2.078125, "learning_rate": 6.303084374326685e-06, "loss": 1.0005, "step": 6302 }, { "epoch": 1.244196796681742, "grad_norm": 2.09375, "learning_rate": 6.302066960965954e-06, "loss": 0.8845, "step": 6303 }, { "epoch": 1.244396691736838, "grad_norm": 3.03125, "learning_rate": 6.301049489767545e-06, "loss": 1.0183, "step": 6304 }, { "epoch": 1.2445965867919342, "grad_norm": 2.234375, "learning_rate": 6.30003196077666e-06, "loss": 0.9922, "step": 6305 }, { "epoch": 1.2447964818470303, "grad_norm": 2.078125, "learning_rate": 6.299014374038493e-06, "loss": 0.9394, "step": 6306 }, { "epoch": 1.2449963769021264, "grad_norm": 2.109375, "learning_rate": 6.297996729598247e-06, "loss": 0.9718, "step": 6307 }, { "epoch": 1.2451962719572225, "grad_norm": 2.203125, "learning_rate": 6.2969790275011245e-06, "loss": 1.0739, "step": 6308 }, { "epoch": 1.2453961670123186, "grad_norm": 2.109375, "learning_rate": 6.295961267792332e-06, "loss": 0.9442, "step": 6309 }, { "epoch": 1.2455960620674147, "grad_norm": 2.0625, "learning_rate": 6.294943450517078e-06, "loss": 0.926, "step": 6310 }, { "epoch": 1.2457959571225108, "grad_norm": 2.078125, "learning_rate": 6.293925575720575e-06, "loss": 0.991, "step": 6311 }, { "epoch": 1.2459958521776067, "grad_norm": 2.078125, "learning_rate": 6.292907643448035e-06, "loss": 0.9396, "step": 6312 }, { "epoch": 1.2461957472327028, "grad_norm": 2.171875, "learning_rate": 6.291889653744677e-06, "loss": 1.037, "step": 6313 }, { "epoch": 1.2463956422877989, "grad_norm": 2.1875, "learning_rate": 6.290871606655718e-06, "loss": 0.9848, "step": 6314 }, { "epoch": 1.246595537342895, "grad_norm": 2.09375, "learning_rate": 6.2898535022263795e-06, "loss": 0.9971, "step": 6315 }, { "epoch": 1.246795432397991, "grad_norm": 2.09375, "learning_rate": 6.288835340501886e-06, "loss": 1.0538, "step": 6316 }, { "epoch": 1.2469953274530872, "grad_norm": 2.125, "learning_rate": 6.287817121527465e-06, "loss": 1.141, "step": 6317 }, { "epoch": 1.2471952225081833, "grad_norm": 2.234375, "learning_rate": 6.286798845348345e-06, "loss": 1.0421, "step": 6318 }, { "epoch": 1.2473951175632794, "grad_norm": 2.109375, "learning_rate": 6.285780512009758e-06, "loss": 0.9491, "step": 6319 }, { "epoch": 1.2475950126183752, "grad_norm": 2.140625, "learning_rate": 6.284762121556937e-06, "loss": 0.9259, "step": 6320 }, { "epoch": 1.2477949076734713, "grad_norm": 2.125, "learning_rate": 6.283743674035121e-06, "loss": 0.9349, "step": 6321 }, { "epoch": 1.2479948027285674, "grad_norm": 2.09375, "learning_rate": 6.282725169489547e-06, "loss": 0.8958, "step": 6322 }, { "epoch": 1.2481946977836635, "grad_norm": 2.0625, "learning_rate": 6.281706607965459e-06, "loss": 0.8789, "step": 6323 }, { "epoch": 1.2483945928387596, "grad_norm": 2.234375, "learning_rate": 6.2806879895081006e-06, "loss": 1.0401, "step": 6324 }, { "epoch": 1.2485944878938557, "grad_norm": 2.203125, "learning_rate": 6.279669314162717e-06, "loss": 0.9825, "step": 6325 }, { "epoch": 1.2487943829489518, "grad_norm": 2.03125, "learning_rate": 6.2786505819745604e-06, "loss": 0.8916, "step": 6326 }, { "epoch": 1.248994278004048, "grad_norm": 2.28125, "learning_rate": 6.277631792988882e-06, "loss": 0.985, "step": 6327 }, { "epoch": 1.249194173059144, "grad_norm": 2.28125, "learning_rate": 6.276612947250934e-06, "loss": 0.9907, "step": 6328 }, { "epoch": 1.2493940681142401, "grad_norm": 2.09375, "learning_rate": 6.275594044805976e-06, "loss": 1.0056, "step": 6329 }, { "epoch": 1.249593963169336, "grad_norm": 2.171875, "learning_rate": 6.274575085699267e-06, "loss": 0.9899, "step": 6330 }, { "epoch": 1.2497938582244321, "grad_norm": 2.0625, "learning_rate": 6.2735560699760676e-06, "loss": 0.9518, "step": 6331 }, { "epoch": 1.2499937532795282, "grad_norm": 2.15625, "learning_rate": 6.2725369976816455e-06, "loss": 1.0028, "step": 6332 }, { "epoch": 1.2501936483346243, "grad_norm": 2.125, "learning_rate": 6.271517868861266e-06, "loss": 0.9737, "step": 6333 }, { "epoch": 1.2503935433897204, "grad_norm": 2.140625, "learning_rate": 6.270498683560195e-06, "loss": 0.944, "step": 6334 }, { "epoch": 1.2505934384448165, "grad_norm": 2.25, "learning_rate": 6.269479441823712e-06, "loss": 0.9353, "step": 6335 }, { "epoch": 1.2507933334999126, "grad_norm": 2.171875, "learning_rate": 6.268460143697086e-06, "loss": 1.0613, "step": 6336 }, { "epoch": 1.2509932285550085, "grad_norm": 2.21875, "learning_rate": 6.267440789225596e-06, "loss": 0.9998, "step": 6337 }, { "epoch": 1.2511931236101046, "grad_norm": 2.046875, "learning_rate": 6.266421378454524e-06, "loss": 0.9825, "step": 6338 }, { "epoch": 1.2513930186652007, "grad_norm": 2.203125, "learning_rate": 6.265401911429147e-06, "loss": 0.8972, "step": 6339 }, { "epoch": 1.2515929137202968, "grad_norm": 2.296875, "learning_rate": 6.264382388194753e-06, "loss": 1.0687, "step": 6340 }, { "epoch": 1.251792808775393, "grad_norm": 2.28125, "learning_rate": 6.263362808796627e-06, "loss": 1.0331, "step": 6341 }, { "epoch": 1.251992703830489, "grad_norm": 2.1875, "learning_rate": 6.262343173280062e-06, "loss": 0.9854, "step": 6342 }, { "epoch": 1.252192598885585, "grad_norm": 2.1875, "learning_rate": 6.261323481690347e-06, "loss": 1.0489, "step": 6343 }, { "epoch": 1.2523924939406812, "grad_norm": 2.09375, "learning_rate": 6.260303734072778e-06, "loss": 0.9546, "step": 6344 }, { "epoch": 1.2525923889957773, "grad_norm": 2.109375, "learning_rate": 6.2592839304726515e-06, "loss": 1.035, "step": 6345 }, { "epoch": 1.2527922840508734, "grad_norm": 1.9921875, "learning_rate": 6.258264070935267e-06, "loss": 0.933, "step": 6346 }, { "epoch": 1.2529921791059695, "grad_norm": 2.140625, "learning_rate": 6.257244155505928e-06, "loss": 0.9164, "step": 6347 }, { "epoch": 1.2531920741610654, "grad_norm": 2.1875, "learning_rate": 6.256224184229936e-06, "loss": 0.984, "step": 6348 }, { "epoch": 1.2533919692161615, "grad_norm": 2.125, "learning_rate": 6.255204157152601e-06, "loss": 0.9565, "step": 6349 }, { "epoch": 1.2535918642712576, "grad_norm": 2.140625, "learning_rate": 6.254184074319231e-06, "loss": 1.0095, "step": 6350 }, { "epoch": 1.2537917593263537, "grad_norm": 2.09375, "learning_rate": 6.253163935775139e-06, "loss": 0.994, "step": 6351 }, { "epoch": 1.2539916543814498, "grad_norm": 2.140625, "learning_rate": 6.252143741565639e-06, "loss": 0.9789, "step": 6352 }, { "epoch": 1.2541915494365459, "grad_norm": 2.125, "learning_rate": 6.251123491736048e-06, "loss": 0.9684, "step": 6353 }, { "epoch": 1.2543914444916417, "grad_norm": 2.140625, "learning_rate": 6.250103186331684e-06, "loss": 1.0011, "step": 6354 }, { "epoch": 1.2545913395467378, "grad_norm": 2.09375, "learning_rate": 6.249082825397871e-06, "loss": 0.9764, "step": 6355 }, { "epoch": 1.254791234601834, "grad_norm": 2.109375, "learning_rate": 6.248062408979933e-06, "loss": 0.988, "step": 6356 }, { "epoch": 1.25499112965693, "grad_norm": 2.171875, "learning_rate": 6.247041937123194e-06, "loss": 0.992, "step": 6357 }, { "epoch": 1.2551910247120261, "grad_norm": 2.125, "learning_rate": 6.246021409872987e-06, "loss": 0.9474, "step": 6358 }, { "epoch": 1.2553909197671222, "grad_norm": 2.171875, "learning_rate": 6.2450008272746395e-06, "loss": 1.0386, "step": 6359 }, { "epoch": 1.2555908148222183, "grad_norm": 2.09375, "learning_rate": 6.24398018937349e-06, "loss": 0.9926, "step": 6360 }, { "epoch": 1.2557907098773144, "grad_norm": 2.140625, "learning_rate": 6.242959496214874e-06, "loss": 1.0452, "step": 6361 }, { "epoch": 1.2559906049324105, "grad_norm": 2.015625, "learning_rate": 6.241938747844129e-06, "loss": 0.9355, "step": 6362 }, { "epoch": 1.2561904999875066, "grad_norm": 2.125, "learning_rate": 6.240917944306597e-06, "loss": 0.9983, "step": 6363 }, { "epoch": 1.2563903950426027, "grad_norm": 2.15625, "learning_rate": 6.239897085647624e-06, "loss": 1.0291, "step": 6364 }, { "epoch": 1.2565902900976986, "grad_norm": 2.25, "learning_rate": 6.238876171912553e-06, "loss": 0.9773, "step": 6365 }, { "epoch": 1.2567901851527947, "grad_norm": 2.03125, "learning_rate": 6.237855203146737e-06, "loss": 0.9554, "step": 6366 }, { "epoch": 1.2569900802078908, "grad_norm": 2.34375, "learning_rate": 6.2368341793955235e-06, "loss": 0.9327, "step": 6367 }, { "epoch": 1.257189975262987, "grad_norm": 2.03125, "learning_rate": 6.235813100704267e-06, "loss": 0.9893, "step": 6368 }, { "epoch": 1.257389870318083, "grad_norm": 2.125, "learning_rate": 6.2347919671183265e-06, "loss": 1.056, "step": 6369 }, { "epoch": 1.2575897653731791, "grad_norm": 2.28125, "learning_rate": 6.233770778683058e-06, "loss": 0.9212, "step": 6370 }, { "epoch": 1.2577896604282752, "grad_norm": 1.9765625, "learning_rate": 6.2327495354438235e-06, "loss": 0.9166, "step": 6371 }, { "epoch": 1.257989555483371, "grad_norm": 2.21875, "learning_rate": 6.231728237445987e-06, "loss": 0.9113, "step": 6372 }, { "epoch": 1.2581894505384672, "grad_norm": 2.125, "learning_rate": 6.230706884734913e-06, "loss": 0.9421, "step": 6373 }, { "epoch": 1.2583893455935633, "grad_norm": 2.1875, "learning_rate": 6.2296854773559685e-06, "loss": 0.9322, "step": 6374 }, { "epoch": 1.2585892406486594, "grad_norm": 2.234375, "learning_rate": 6.22866401535453e-06, "loss": 1.1273, "step": 6375 }, { "epoch": 1.2587891357037555, "grad_norm": 2.359375, "learning_rate": 6.227642498775965e-06, "loss": 1.0321, "step": 6376 }, { "epoch": 1.2589890307588516, "grad_norm": 2.109375, "learning_rate": 6.226620927665652e-06, "loss": 0.9332, "step": 6377 }, { "epoch": 1.2591889258139477, "grad_norm": 2.140625, "learning_rate": 6.225599302068968e-06, "loss": 0.9504, "step": 6378 }, { "epoch": 1.2593888208690438, "grad_norm": 2.203125, "learning_rate": 6.224577622031294e-06, "loss": 0.9156, "step": 6379 }, { "epoch": 1.25958871592414, "grad_norm": 2.1875, "learning_rate": 6.223555887598013e-06, "loss": 0.9659, "step": 6380 }, { "epoch": 1.259788610979236, "grad_norm": 2.1875, "learning_rate": 6.22253409881451e-06, "loss": 0.9739, "step": 6381 }, { "epoch": 1.259988506034332, "grad_norm": 1.9609375, "learning_rate": 6.221512255726173e-06, "loss": 0.8863, "step": 6382 }, { "epoch": 1.260188401089428, "grad_norm": 2.25, "learning_rate": 6.2204903583783924e-06, "loss": 0.9899, "step": 6383 }, { "epoch": 1.260388296144524, "grad_norm": 2.140625, "learning_rate": 6.21946840681656e-06, "loss": 1.0136, "step": 6384 }, { "epoch": 1.2605881911996202, "grad_norm": 2.109375, "learning_rate": 6.218446401086071e-06, "loss": 1.0317, "step": 6385 }, { "epoch": 1.2607880862547163, "grad_norm": 2.25, "learning_rate": 6.217424341232324e-06, "loss": 1.0341, "step": 6386 }, { "epoch": 1.2609879813098124, "grad_norm": 2.03125, "learning_rate": 6.216402227300717e-06, "loss": 0.9551, "step": 6387 }, { "epoch": 1.2611878763649085, "grad_norm": 2.125, "learning_rate": 6.215380059336653e-06, "loss": 0.9913, "step": 6388 }, { "epoch": 1.2613877714200046, "grad_norm": 2.109375, "learning_rate": 6.214357837385538e-06, "loss": 0.8355, "step": 6389 }, { "epoch": 1.2615876664751005, "grad_norm": 2.0625, "learning_rate": 6.213335561492775e-06, "loss": 0.9165, "step": 6390 }, { "epoch": 1.2617875615301966, "grad_norm": 2.09375, "learning_rate": 6.212313231703777e-06, "loss": 0.9236, "step": 6391 }, { "epoch": 1.2619874565852927, "grad_norm": 2.15625, "learning_rate": 6.211290848063955e-06, "loss": 1.0185, "step": 6392 }, { "epoch": 1.2621873516403888, "grad_norm": 2.234375, "learning_rate": 6.210268410618723e-06, "loss": 1.0065, "step": 6393 }, { "epoch": 1.2623872466954849, "grad_norm": 2.078125, "learning_rate": 6.209245919413497e-06, "loss": 0.9274, "step": 6394 }, { "epoch": 1.262587141750581, "grad_norm": 2.0625, "learning_rate": 6.208223374493697e-06, "loss": 0.9656, "step": 6395 }, { "epoch": 1.262787036805677, "grad_norm": 2.0625, "learning_rate": 6.2072007759047426e-06, "loss": 0.9707, "step": 6396 }, { "epoch": 1.2629869318607732, "grad_norm": 2.140625, "learning_rate": 6.20617812369206e-06, "loss": 0.9844, "step": 6397 }, { "epoch": 1.2631868269158693, "grad_norm": 2.15625, "learning_rate": 6.205155417901074e-06, "loss": 0.9175, "step": 6398 }, { "epoch": 1.2633867219709654, "grad_norm": 2.515625, "learning_rate": 6.204132658577212e-06, "loss": 0.9726, "step": 6399 }, { "epoch": 1.2635866170260615, "grad_norm": 2.1875, "learning_rate": 6.203109845765907e-06, "loss": 0.9825, "step": 6400 }, { "epoch": 1.2637865120811573, "grad_norm": 2.28125, "learning_rate": 6.202086979512589e-06, "loss": 0.976, "step": 6401 }, { "epoch": 1.2639864071362534, "grad_norm": 2.09375, "learning_rate": 6.201064059862699e-06, "loss": 1.0102, "step": 6402 }, { "epoch": 1.2641863021913495, "grad_norm": 2.078125, "learning_rate": 6.20004108686167e-06, "loss": 0.9396, "step": 6403 }, { "epoch": 1.2643861972464456, "grad_norm": 2.234375, "learning_rate": 6.199018060554945e-06, "loss": 1.0043, "step": 6404 }, { "epoch": 1.2645860923015417, "grad_norm": 2.203125, "learning_rate": 6.197994980987964e-06, "loss": 0.9013, "step": 6405 }, { "epoch": 1.2647859873566378, "grad_norm": 2.203125, "learning_rate": 6.196971848206175e-06, "loss": 1.0221, "step": 6406 }, { "epoch": 1.2649858824117337, "grad_norm": 2.140625, "learning_rate": 6.195948662255024e-06, "loss": 0.8652, "step": 6407 }, { "epoch": 1.2651857774668298, "grad_norm": 2.078125, "learning_rate": 6.194925423179961e-06, "loss": 0.9215, "step": 6408 }, { "epoch": 1.265385672521926, "grad_norm": 2.078125, "learning_rate": 6.193902131026439e-06, "loss": 1.0518, "step": 6409 }, { "epoch": 1.265585567577022, "grad_norm": 2.125, "learning_rate": 6.192878785839911e-06, "loss": 0.9335, "step": 6410 }, { "epoch": 1.265785462632118, "grad_norm": 2.171875, "learning_rate": 6.191855387665836e-06, "loss": 1.0277, "step": 6411 }, { "epoch": 1.2659853576872142, "grad_norm": 2.234375, "learning_rate": 6.190831936549671e-06, "loss": 0.9709, "step": 6412 }, { "epoch": 1.2661852527423103, "grad_norm": 2.3125, "learning_rate": 6.189808432536879e-06, "loss": 1.0246, "step": 6413 }, { "epoch": 1.2663851477974064, "grad_norm": 2.109375, "learning_rate": 6.188784875672923e-06, "loss": 0.9244, "step": 6414 }, { "epoch": 1.2665850428525025, "grad_norm": 2.078125, "learning_rate": 6.1877612660032706e-06, "loss": 0.9935, "step": 6415 }, { "epoch": 1.2667849379075986, "grad_norm": 2.21875, "learning_rate": 6.1867376035733885e-06, "loss": 0.975, "step": 6416 }, { "epoch": 1.2669848329626947, "grad_norm": 2.078125, "learning_rate": 6.185713888428751e-06, "loss": 0.9996, "step": 6417 }, { "epoch": 1.2671847280177906, "grad_norm": 2.09375, "learning_rate": 6.1846901206148265e-06, "loss": 0.9165, "step": 6418 }, { "epoch": 1.2673846230728867, "grad_norm": 2.046875, "learning_rate": 6.183666300177095e-06, "loss": 0.92, "step": 6419 }, { "epoch": 1.2675845181279828, "grad_norm": 2.1875, "learning_rate": 6.182642427161033e-06, "loss": 0.9924, "step": 6420 }, { "epoch": 1.2677844131830789, "grad_norm": 2.375, "learning_rate": 6.181618501612119e-06, "loss": 1.0589, "step": 6421 }, { "epoch": 1.267984308238175, "grad_norm": 2.21875, "learning_rate": 6.180594523575838e-06, "loss": 0.9984, "step": 6422 }, { "epoch": 1.268184203293271, "grad_norm": 2.09375, "learning_rate": 6.179570493097675e-06, "loss": 0.9574, "step": 6423 }, { "epoch": 1.2683840983483672, "grad_norm": 2.28125, "learning_rate": 6.178546410223116e-06, "loss": 0.9633, "step": 6424 }, { "epoch": 1.268583993403463, "grad_norm": 2.0625, "learning_rate": 6.177522274997651e-06, "loss": 0.9466, "step": 6425 }, { "epoch": 1.2687838884585592, "grad_norm": 2.140625, "learning_rate": 6.176498087466775e-06, "loss": 0.938, "step": 6426 }, { "epoch": 1.2689837835136553, "grad_norm": 2.28125, "learning_rate": 6.1754738476759764e-06, "loss": 1.0154, "step": 6427 }, { "epoch": 1.2691836785687514, "grad_norm": 2.109375, "learning_rate": 6.174449555670757e-06, "loss": 0.9987, "step": 6428 }, { "epoch": 1.2693835736238475, "grad_norm": 2.171875, "learning_rate": 6.173425211496613e-06, "loss": 0.9349, "step": 6429 }, { "epoch": 1.2695834686789436, "grad_norm": 2.140625, "learning_rate": 6.1724008151990465e-06, "loss": 1.0085, "step": 6430 }, { "epoch": 1.2697833637340397, "grad_norm": 2.0, "learning_rate": 6.171376366823562e-06, "loss": 0.9886, "step": 6431 }, { "epoch": 1.2699832587891358, "grad_norm": 2.28125, "learning_rate": 6.1703518664156636e-06, "loss": 1.0036, "step": 6432 }, { "epoch": 1.2701831538442319, "grad_norm": 2.25, "learning_rate": 6.16932731402086e-06, "loss": 0.9389, "step": 6433 }, { "epoch": 1.270383048899328, "grad_norm": 2.3125, "learning_rate": 6.168302709684663e-06, "loss": 0.9765, "step": 6434 }, { "epoch": 1.270582943954424, "grad_norm": 2.234375, "learning_rate": 6.1672780534525825e-06, "loss": 1.0638, "step": 6435 }, { "epoch": 1.27078283900952, "grad_norm": 2.0625, "learning_rate": 6.166253345370137e-06, "loss": 0.9807, "step": 6436 }, { "epoch": 1.270982734064616, "grad_norm": 2.109375, "learning_rate": 6.165228585482842e-06, "loss": 0.9761, "step": 6437 }, { "epoch": 1.2711826291197121, "grad_norm": 2.0625, "learning_rate": 6.164203773836217e-06, "loss": 0.8968, "step": 6438 }, { "epoch": 1.2713825241748082, "grad_norm": 2.140625, "learning_rate": 6.163178910475786e-06, "loss": 0.9883, "step": 6439 }, { "epoch": 1.2715824192299043, "grad_norm": 2.09375, "learning_rate": 6.162153995447071e-06, "loss": 0.9872, "step": 6440 }, { "epoch": 1.2717823142850004, "grad_norm": 2.359375, "learning_rate": 6.1611290287956e-06, "loss": 1.0828, "step": 6441 }, { "epoch": 1.2719822093400963, "grad_norm": 2.171875, "learning_rate": 6.160104010566902e-06, "loss": 1.0257, "step": 6442 }, { "epoch": 1.2721821043951924, "grad_norm": 2.171875, "learning_rate": 6.159078940806507e-06, "loss": 0.9679, "step": 6443 }, { "epoch": 1.2723819994502885, "grad_norm": 2.140625, "learning_rate": 6.15805381955995e-06, "loss": 0.9412, "step": 6444 }, { "epoch": 1.2725818945053846, "grad_norm": 2.296875, "learning_rate": 6.157028646872765e-06, "loss": 1.0561, "step": 6445 }, { "epoch": 1.2727817895604807, "grad_norm": 2.140625, "learning_rate": 6.156003422790492e-06, "loss": 0.9577, "step": 6446 }, { "epoch": 1.2729816846155768, "grad_norm": 2.125, "learning_rate": 6.15497814735867e-06, "loss": 0.9266, "step": 6447 }, { "epoch": 1.273181579670673, "grad_norm": 2.09375, "learning_rate": 6.153952820622844e-06, "loss": 0.9729, "step": 6448 }, { "epoch": 1.273381474725769, "grad_norm": 2.125, "learning_rate": 6.1529274426285546e-06, "loss": 0.9299, "step": 6449 }, { "epoch": 1.2735813697808651, "grad_norm": 2.046875, "learning_rate": 6.151902013421351e-06, "loss": 1.0202, "step": 6450 }, { "epoch": 1.2737812648359612, "grad_norm": 2.109375, "learning_rate": 6.150876533046784e-06, "loss": 0.9122, "step": 6451 }, { "epoch": 1.2739811598910573, "grad_norm": 2.140625, "learning_rate": 6.149851001550404e-06, "loss": 0.8825, "step": 6452 }, { "epoch": 1.2741810549461532, "grad_norm": 2.171875, "learning_rate": 6.1488254189777665e-06, "loss": 0.9751, "step": 6453 }, { "epoch": 1.2743809500012493, "grad_norm": 2.203125, "learning_rate": 6.147799785374425e-06, "loss": 1.0231, "step": 6454 }, { "epoch": 1.2745808450563454, "grad_norm": 2.0625, "learning_rate": 6.1467741007859405e-06, "loss": 1.0558, "step": 6455 }, { "epoch": 1.2747807401114415, "grad_norm": 2.046875, "learning_rate": 6.145748365257873e-06, "loss": 0.9363, "step": 6456 }, { "epoch": 1.2749806351665376, "grad_norm": 2.15625, "learning_rate": 6.1447225788357845e-06, "loss": 0.9968, "step": 6457 }, { "epoch": 1.2751805302216337, "grad_norm": 2.09375, "learning_rate": 6.143696741565241e-06, "loss": 0.9579, "step": 6458 }, { "epoch": 1.2753804252767298, "grad_norm": 2.078125, "learning_rate": 6.142670853491813e-06, "loss": 0.9892, "step": 6459 }, { "epoch": 1.2755803203318257, "grad_norm": 2.203125, "learning_rate": 6.141644914661066e-06, "loss": 0.9936, "step": 6460 }, { "epoch": 1.2757802153869218, "grad_norm": 2.21875, "learning_rate": 6.140618925118574e-06, "loss": 1.0118, "step": 6461 }, { "epoch": 1.2759801104420179, "grad_norm": 2.140625, "learning_rate": 6.13959288490991e-06, "loss": 0.9798, "step": 6462 }, { "epoch": 1.276180005497114, "grad_norm": 2.046875, "learning_rate": 6.138566794080655e-06, "loss": 0.8971, "step": 6463 }, { "epoch": 1.27637990055221, "grad_norm": 2.15625, "learning_rate": 6.137540652676382e-06, "loss": 0.9741, "step": 6464 }, { "epoch": 1.2765797956073062, "grad_norm": 2.109375, "learning_rate": 6.136514460742675e-06, "loss": 0.9904, "step": 6465 }, { "epoch": 1.2767796906624023, "grad_norm": 2.078125, "learning_rate": 6.135488218325116e-06, "loss": 1.0251, "step": 6466 }, { "epoch": 1.2769795857174984, "grad_norm": 2.21875, "learning_rate": 6.134461925469293e-06, "loss": 1.0236, "step": 6467 }, { "epoch": 1.2771794807725945, "grad_norm": 2.015625, "learning_rate": 6.133435582220794e-06, "loss": 0.9359, "step": 6468 }, { "epoch": 1.2773793758276906, "grad_norm": 2.203125, "learning_rate": 6.132409188625205e-06, "loss": 1.0094, "step": 6469 }, { "epoch": 1.2775792708827867, "grad_norm": 2.140625, "learning_rate": 6.13138274472812e-06, "loss": 1.0281, "step": 6470 }, { "epoch": 1.2777791659378825, "grad_norm": 2.1875, "learning_rate": 6.130356250575137e-06, "loss": 1.0421, "step": 6471 }, { "epoch": 1.2779790609929786, "grad_norm": 2.171875, "learning_rate": 6.129329706211849e-06, "loss": 1.0368, "step": 6472 }, { "epoch": 1.2781789560480747, "grad_norm": 2.265625, "learning_rate": 6.1283031116838565e-06, "loss": 0.9902, "step": 6473 }, { "epoch": 1.2783788511031708, "grad_norm": 2.0625, "learning_rate": 6.127276467036759e-06, "loss": 0.9434, "step": 6474 }, { "epoch": 1.278578746158267, "grad_norm": 2.203125, "learning_rate": 6.126249772316161e-06, "loss": 0.9404, "step": 6475 }, { "epoch": 1.278778641213363, "grad_norm": 2.171875, "learning_rate": 6.1252230275676705e-06, "loss": 0.9842, "step": 6476 }, { "epoch": 1.278978536268459, "grad_norm": 2.15625, "learning_rate": 6.124196232836892e-06, "loss": 1.0444, "step": 6477 }, { "epoch": 1.279178431323555, "grad_norm": 2.140625, "learning_rate": 6.123169388169437e-06, "loss": 0.9739, "step": 6478 }, { "epoch": 1.2793783263786511, "grad_norm": 2.3125, "learning_rate": 6.122142493610918e-06, "loss": 1.0631, "step": 6479 }, { "epoch": 1.2795782214337472, "grad_norm": 2.0625, "learning_rate": 6.12111554920695e-06, "loss": 0.9143, "step": 6480 }, { "epoch": 1.2797781164888433, "grad_norm": 2.25, "learning_rate": 6.120088555003148e-06, "loss": 1.0243, "step": 6481 }, { "epoch": 1.2799780115439394, "grad_norm": 2.25, "learning_rate": 6.119061511045133e-06, "loss": 1.0501, "step": 6482 }, { "epoch": 1.2801779065990355, "grad_norm": 2.125, "learning_rate": 6.118034417378523e-06, "loss": 1.0083, "step": 6483 }, { "epoch": 1.2803778016541316, "grad_norm": 2.125, "learning_rate": 6.117007274048945e-06, "loss": 0.9384, "step": 6484 }, { "epoch": 1.2805776967092277, "grad_norm": 2.0, "learning_rate": 6.115980081102024e-06, "loss": 0.9705, "step": 6485 }, { "epoch": 1.2807775917643238, "grad_norm": 2.203125, "learning_rate": 6.114952838583386e-06, "loss": 1.0992, "step": 6486 }, { "epoch": 1.28097748681942, "grad_norm": 2.046875, "learning_rate": 6.1139255465386614e-06, "loss": 0.9487, "step": 6487 }, { "epoch": 1.2811773818745158, "grad_norm": 2.203125, "learning_rate": 6.1128982050134845e-06, "loss": 0.9714, "step": 6488 }, { "epoch": 1.281377276929612, "grad_norm": 2.171875, "learning_rate": 6.111870814053487e-06, "loss": 0.9044, "step": 6489 }, { "epoch": 1.281577171984708, "grad_norm": 2.125, "learning_rate": 6.110843373704307e-06, "loss": 0.9707, "step": 6490 }, { "epoch": 1.281777067039804, "grad_norm": 2.28125, "learning_rate": 6.109815884011583e-06, "loss": 0.9192, "step": 6491 }, { "epoch": 1.2819769620949002, "grad_norm": 2.03125, "learning_rate": 6.108788345020955e-06, "loss": 0.8727, "step": 6492 }, { "epoch": 1.2821768571499963, "grad_norm": 2.390625, "learning_rate": 6.10776075677807e-06, "loss": 1.0429, "step": 6493 }, { "epoch": 1.2823767522050924, "grad_norm": 2.25, "learning_rate": 6.106733119328568e-06, "loss": 1.0389, "step": 6494 }, { "epoch": 1.2825766472601883, "grad_norm": 2.140625, "learning_rate": 6.1057054327181e-06, "loss": 1.047, "step": 6495 }, { "epoch": 1.2827765423152844, "grad_norm": 2.03125, "learning_rate": 6.104677696992315e-06, "loss": 0.8999, "step": 6496 }, { "epoch": 1.2829764373703805, "grad_norm": 2.078125, "learning_rate": 6.103649912196864e-06, "loss": 0.9554, "step": 6497 }, { "epoch": 1.2831763324254766, "grad_norm": 2.234375, "learning_rate": 6.102622078377401e-06, "loss": 1.0413, "step": 6498 }, { "epoch": 1.2833762274805727, "grad_norm": 2.15625, "learning_rate": 6.101594195579585e-06, "loss": 1.0102, "step": 6499 }, { "epoch": 1.2835761225356688, "grad_norm": 2.046875, "learning_rate": 6.1005662638490715e-06, "loss": 0.9513, "step": 6500 }, { "epoch": 1.2837760175907649, "grad_norm": 2.1875, "learning_rate": 6.099538283231523e-06, "loss": 1.0219, "step": 6501 }, { "epoch": 1.283975912645861, "grad_norm": 2.203125, "learning_rate": 6.098510253772599e-06, "loss": 0.9539, "step": 6502 }, { "epoch": 1.284175807700957, "grad_norm": 2.15625, "learning_rate": 6.097482175517968e-06, "loss": 0.9664, "step": 6503 }, { "epoch": 1.2843757027560532, "grad_norm": 2.265625, "learning_rate": 6.096454048513297e-06, "loss": 0.9948, "step": 6504 }, { "epoch": 1.2845755978111493, "grad_norm": 2.046875, "learning_rate": 6.0954258728042535e-06, "loss": 0.9397, "step": 6505 }, { "epoch": 1.2847754928662452, "grad_norm": 2.1875, "learning_rate": 6.0943976484365095e-06, "loss": 1.0426, "step": 6506 }, { "epoch": 1.2849753879213412, "grad_norm": 2.140625, "learning_rate": 6.09336937545574e-06, "loss": 0.9307, "step": 6507 }, { "epoch": 1.2851752829764373, "grad_norm": 2.109375, "learning_rate": 6.092341053907618e-06, "loss": 0.9363, "step": 6508 }, { "epoch": 1.2853751780315334, "grad_norm": 2.078125, "learning_rate": 6.091312683837823e-06, "loss": 0.9501, "step": 6509 }, { "epoch": 1.2855750730866295, "grad_norm": 2.1875, "learning_rate": 6.090284265292038e-06, "loss": 0.9455, "step": 6510 }, { "epoch": 1.2857749681417256, "grad_norm": 2.234375, "learning_rate": 6.089255798315941e-06, "loss": 1.0456, "step": 6511 }, { "epoch": 1.2859748631968217, "grad_norm": 2.125, "learning_rate": 6.088227282955216e-06, "loss": 0.9585, "step": 6512 }, { "epoch": 1.2861747582519176, "grad_norm": 2.34375, "learning_rate": 6.0871987192555524e-06, "loss": 1.0309, "step": 6513 }, { "epoch": 1.2863746533070137, "grad_norm": 2.15625, "learning_rate": 6.086170107262638e-06, "loss": 0.9564, "step": 6514 }, { "epoch": 1.2865745483621098, "grad_norm": 2.203125, "learning_rate": 6.085141447022162e-06, "loss": 1.031, "step": 6515 }, { "epoch": 1.286774443417206, "grad_norm": 2.09375, "learning_rate": 6.084112738579821e-06, "loss": 1.0041, "step": 6516 }, { "epoch": 1.286974338472302, "grad_norm": 2.15625, "learning_rate": 6.083083981981307e-06, "loss": 1.0031, "step": 6517 }, { "epoch": 1.2871742335273981, "grad_norm": 2.125, "learning_rate": 6.0820551772723195e-06, "loss": 1.0031, "step": 6518 }, { "epoch": 1.2873741285824942, "grad_norm": 2.234375, "learning_rate": 6.081026324498554e-06, "loss": 0.9412, "step": 6519 }, { "epoch": 1.2875740236375903, "grad_norm": 2.171875, "learning_rate": 6.0799974237057155e-06, "loss": 1.1451, "step": 6520 }, { "epoch": 1.2877739186926864, "grad_norm": 2.1875, "learning_rate": 6.078968474939508e-06, "loss": 1.0042, "step": 6521 }, { "epoch": 1.2879738137477825, "grad_norm": 2.140625, "learning_rate": 6.0779394782456355e-06, "loss": 0.9027, "step": 6522 }, { "epoch": 1.2881737088028786, "grad_norm": 2.078125, "learning_rate": 6.076910433669806e-06, "loss": 0.9414, "step": 6523 }, { "epoch": 1.2883736038579745, "grad_norm": 2.15625, "learning_rate": 6.0758813412577315e-06, "loss": 1.0103, "step": 6524 }, { "epoch": 1.2885734989130706, "grad_norm": 2.265625, "learning_rate": 6.074852201055121e-06, "loss": 1.0374, "step": 6525 }, { "epoch": 1.2887733939681667, "grad_norm": 2.109375, "learning_rate": 6.0738230131076915e-06, "loss": 1.066, "step": 6526 }, { "epoch": 1.2889732890232628, "grad_norm": 2.3125, "learning_rate": 6.072793777461159e-06, "loss": 1.0472, "step": 6527 }, { "epoch": 1.289173184078359, "grad_norm": 2.203125, "learning_rate": 6.07176449416124e-06, "loss": 0.9168, "step": 6528 }, { "epoch": 1.289373079133455, "grad_norm": 2.125, "learning_rate": 6.070735163253656e-06, "loss": 0.9151, "step": 6529 }, { "epoch": 1.2895729741885509, "grad_norm": 2.140625, "learning_rate": 6.069705784784133e-06, "loss": 1.0055, "step": 6530 }, { "epoch": 1.289772869243647, "grad_norm": 2.03125, "learning_rate": 6.068676358798391e-06, "loss": 0.9039, "step": 6531 }, { "epoch": 1.289972764298743, "grad_norm": 2.203125, "learning_rate": 6.067646885342161e-06, "loss": 0.9601, "step": 6532 }, { "epoch": 1.2901726593538392, "grad_norm": 2.203125, "learning_rate": 6.066617364461169e-06, "loss": 0.8968, "step": 6533 }, { "epoch": 1.2903725544089353, "grad_norm": 2.015625, "learning_rate": 6.065587796201148e-06, "loss": 0.9923, "step": 6534 }, { "epoch": 1.2905724494640314, "grad_norm": 2.234375, "learning_rate": 6.0645581806078315e-06, "loss": 1.0246, "step": 6535 }, { "epoch": 1.2907723445191275, "grad_norm": 2.1875, "learning_rate": 6.063528517726953e-06, "loss": 0.9303, "step": 6536 }, { "epoch": 1.2909722395742236, "grad_norm": 2.234375, "learning_rate": 6.0624988076042525e-06, "loss": 0.9379, "step": 6537 }, { "epoch": 1.2911721346293197, "grad_norm": 2.046875, "learning_rate": 6.061469050285469e-06, "loss": 0.9697, "step": 6538 }, { "epoch": 1.2913720296844158, "grad_norm": 2.0625, "learning_rate": 6.060439245816342e-06, "loss": 0.9549, "step": 6539 }, { "epoch": 1.2915719247395119, "grad_norm": 2.0, "learning_rate": 6.059409394242618e-06, "loss": 0.9163, "step": 6540 }, { "epoch": 1.2917718197946078, "grad_norm": 2.171875, "learning_rate": 6.058379495610043e-06, "loss": 1.0255, "step": 6541 }, { "epoch": 1.2919717148497039, "grad_norm": 2.109375, "learning_rate": 6.057349549964362e-06, "loss": 0.9965, "step": 6542 }, { "epoch": 1.2921716099048, "grad_norm": 2.25, "learning_rate": 6.056319557351327e-06, "loss": 0.9442, "step": 6543 }, { "epoch": 1.292371504959896, "grad_norm": 2.15625, "learning_rate": 6.055289517816691e-06, "loss": 1.004, "step": 6544 }, { "epoch": 1.2925714000149922, "grad_norm": 2.265625, "learning_rate": 6.054259431406206e-06, "loss": 1.0521, "step": 6545 }, { "epoch": 1.2927712950700883, "grad_norm": 2.1875, "learning_rate": 6.0532292981656315e-06, "loss": 0.9739, "step": 6546 }, { "epoch": 1.2929711901251844, "grad_norm": 2.140625, "learning_rate": 6.0521991181407225e-06, "loss": 0.9814, "step": 6547 }, { "epoch": 1.2931710851802802, "grad_norm": 2.171875, "learning_rate": 6.051168891377242e-06, "loss": 0.9492, "step": 6548 }, { "epoch": 1.2933709802353763, "grad_norm": 3.0625, "learning_rate": 6.050138617920951e-06, "loss": 1.0261, "step": 6549 }, { "epoch": 1.2935708752904724, "grad_norm": 2.234375, "learning_rate": 6.049108297817614e-06, "loss": 1.0818, "step": 6550 }, { "epoch": 1.2937707703455685, "grad_norm": 2.171875, "learning_rate": 6.048077931112999e-06, "loss": 0.9663, "step": 6551 }, { "epoch": 1.2939706654006646, "grad_norm": 2.3125, "learning_rate": 6.047047517852875e-06, "loss": 1.0825, "step": 6552 }, { "epoch": 1.2941705604557607, "grad_norm": 2.109375, "learning_rate": 6.046017058083011e-06, "loss": 0.9238, "step": 6553 }, { "epoch": 1.2943704555108568, "grad_norm": 2.234375, "learning_rate": 6.0449865518491825e-06, "loss": 1.0634, "step": 6554 }, { "epoch": 1.294570350565953, "grad_norm": 2.09375, "learning_rate": 6.0439559991971615e-06, "loss": 0.9655, "step": 6555 }, { "epoch": 1.294770245621049, "grad_norm": 2.0625, "learning_rate": 6.042925400172729e-06, "loss": 0.971, "step": 6556 }, { "epoch": 1.2949701406761451, "grad_norm": 2.015625, "learning_rate": 6.041894754821659e-06, "loss": 1.0065, "step": 6557 }, { "epoch": 1.2951700357312412, "grad_norm": 2.328125, "learning_rate": 6.040864063189738e-06, "loss": 1.0393, "step": 6558 }, { "epoch": 1.295369930786337, "grad_norm": 2.140625, "learning_rate": 6.039833325322744e-06, "loss": 1.006, "step": 6559 }, { "epoch": 1.2955698258414332, "grad_norm": 2.1875, "learning_rate": 6.038802541266464e-06, "loss": 0.9824, "step": 6560 }, { "epoch": 1.2957697208965293, "grad_norm": 2.078125, "learning_rate": 6.037771711066688e-06, "loss": 0.8906, "step": 6561 }, { "epoch": 1.2959696159516254, "grad_norm": 2.203125, "learning_rate": 6.036740834769202e-06, "loss": 1.0133, "step": 6562 }, { "epoch": 1.2961695110067215, "grad_norm": 2.09375, "learning_rate": 6.035709912419801e-06, "loss": 0.9655, "step": 6563 }, { "epoch": 1.2963694060618176, "grad_norm": 2.15625, "learning_rate": 6.0346789440642736e-06, "loss": 0.9906, "step": 6564 }, { "epoch": 1.2965693011169135, "grad_norm": 2.203125, "learning_rate": 6.0336479297484195e-06, "loss": 1.0288, "step": 6565 }, { "epoch": 1.2967691961720096, "grad_norm": 2.3125, "learning_rate": 6.032616869518036e-06, "loss": 1.0503, "step": 6566 }, { "epoch": 1.2969690912271057, "grad_norm": 2.15625, "learning_rate": 6.031585763418919e-06, "loss": 0.9395, "step": 6567 }, { "epoch": 1.2971689862822018, "grad_norm": 2.171875, "learning_rate": 6.030554611496873e-06, "loss": 1.0274, "step": 6568 }, { "epoch": 1.2973688813372979, "grad_norm": 2.125, "learning_rate": 6.029523413797703e-06, "loss": 0.9283, "step": 6569 }, { "epoch": 1.297568776392394, "grad_norm": 2.046875, "learning_rate": 6.028492170367211e-06, "loss": 0.9313, "step": 6570 }, { "epoch": 1.29776867144749, "grad_norm": 2.25, "learning_rate": 6.027460881251208e-06, "loss": 1.0137, "step": 6571 }, { "epoch": 1.2979685665025862, "grad_norm": 2.203125, "learning_rate": 6.026429546495502e-06, "loss": 1.0137, "step": 6572 }, { "epoch": 1.2981684615576823, "grad_norm": 2.21875, "learning_rate": 6.025398166145905e-06, "loss": 0.9563, "step": 6573 }, { "epoch": 1.2983683566127784, "grad_norm": 2.1875, "learning_rate": 6.024366740248231e-06, "loss": 0.9904, "step": 6574 }, { "epoch": 1.2985682516678745, "grad_norm": 2.140625, "learning_rate": 6.023335268848296e-06, "loss": 0.8159, "step": 6575 }, { "epoch": 1.2987681467229704, "grad_norm": 2.390625, "learning_rate": 6.022303751991917e-06, "loss": 0.957, "step": 6576 }, { "epoch": 1.2989680417780665, "grad_norm": 2.046875, "learning_rate": 6.021272189724916e-06, "loss": 0.8763, "step": 6577 }, { "epoch": 1.2991679368331626, "grad_norm": 2.0, "learning_rate": 6.020240582093112e-06, "loss": 0.8495, "step": 6578 }, { "epoch": 1.2993678318882587, "grad_norm": 2.28125, "learning_rate": 6.019208929142329e-06, "loss": 0.9718, "step": 6579 }, { "epoch": 1.2995677269433548, "grad_norm": 2.15625, "learning_rate": 6.018177230918397e-06, "loss": 0.9856, "step": 6580 }, { "epoch": 1.2997676219984509, "grad_norm": 2.265625, "learning_rate": 6.017145487467139e-06, "loss": 1.1287, "step": 6581 }, { "epoch": 1.299967517053547, "grad_norm": 2.28125, "learning_rate": 6.016113698834388e-06, "loss": 0.9454, "step": 6582 }, { "epoch": 1.3001674121086428, "grad_norm": 2.296875, "learning_rate": 6.015081865065977e-06, "loss": 1.0867, "step": 6583 }, { "epoch": 1.300367307163739, "grad_norm": 2.15625, "learning_rate": 6.014049986207735e-06, "loss": 0.9523, "step": 6584 }, { "epoch": 1.300567202218835, "grad_norm": 2.171875, "learning_rate": 6.013018062305502e-06, "loss": 1.0047, "step": 6585 }, { "epoch": 1.3007670972739311, "grad_norm": 2.28125, "learning_rate": 6.011986093405115e-06, "loss": 0.9859, "step": 6586 }, { "epoch": 1.3009669923290272, "grad_norm": 2.09375, "learning_rate": 6.010954079552414e-06, "loss": 0.9844, "step": 6587 }, { "epoch": 1.3011668873841233, "grad_norm": 2.09375, "learning_rate": 6.009922020793241e-06, "loss": 0.9852, "step": 6588 }, { "epoch": 1.3013667824392194, "grad_norm": 2.171875, "learning_rate": 6.008889917173439e-06, "loss": 1.069, "step": 6589 }, { "epoch": 1.3015666774943155, "grad_norm": 2.203125, "learning_rate": 6.007857768738855e-06, "loss": 1.019, "step": 6590 }, { "epoch": 1.3017665725494116, "grad_norm": 2.0625, "learning_rate": 6.006825575535337e-06, "loss": 0.9025, "step": 6591 }, { "epoch": 1.3019664676045077, "grad_norm": 2.109375, "learning_rate": 6.005793337608733e-06, "loss": 0.9806, "step": 6592 }, { "epoch": 1.3021663626596038, "grad_norm": 2.15625, "learning_rate": 6.004761055004897e-06, "loss": 0.999, "step": 6593 }, { "epoch": 1.3023662577146997, "grad_norm": 2.0625, "learning_rate": 6.003728727769683e-06, "loss": 0.9815, "step": 6594 }, { "epoch": 1.3025661527697958, "grad_norm": 2.125, "learning_rate": 6.002696355948945e-06, "loss": 0.9643, "step": 6595 }, { "epoch": 1.302766047824892, "grad_norm": 2.078125, "learning_rate": 6.0016639395885424e-06, "loss": 0.9539, "step": 6596 }, { "epoch": 1.302965942879988, "grad_norm": 2.265625, "learning_rate": 6.000631478734336e-06, "loss": 1.0288, "step": 6597 }, { "epoch": 1.3031658379350841, "grad_norm": 2.171875, "learning_rate": 5.999598973432184e-06, "loss": 1.0162, "step": 6598 }, { "epoch": 1.3033657329901802, "grad_norm": 2.15625, "learning_rate": 5.9985664237279515e-06, "loss": 0.9597, "step": 6599 }, { "epoch": 1.303565628045276, "grad_norm": 2.203125, "learning_rate": 5.997533829667508e-06, "loss": 1.013, "step": 6600 }, { "epoch": 1.3037655231003722, "grad_norm": 2.140625, "learning_rate": 5.996501191296717e-06, "loss": 0.9786, "step": 6601 }, { "epoch": 1.3039654181554683, "grad_norm": 2.109375, "learning_rate": 5.995468508661451e-06, "loss": 0.9443, "step": 6602 }, { "epoch": 1.3041653132105644, "grad_norm": 2.140625, "learning_rate": 5.99443578180758e-06, "loss": 0.9783, "step": 6603 }, { "epoch": 1.3043652082656605, "grad_norm": 2.21875, "learning_rate": 5.993403010780975e-06, "loss": 0.9471, "step": 6604 }, { "epoch": 1.3045651033207566, "grad_norm": 2.3125, "learning_rate": 5.992370195627516e-06, "loss": 1.0114, "step": 6605 }, { "epoch": 1.3047649983758527, "grad_norm": 2.1875, "learning_rate": 5.99133733639308e-06, "loss": 1.0314, "step": 6606 }, { "epoch": 1.3049648934309488, "grad_norm": 2.140625, "learning_rate": 5.990304433123543e-06, "loss": 1.0279, "step": 6607 }, { "epoch": 1.305164788486045, "grad_norm": 2.140625, "learning_rate": 5.98927148586479e-06, "loss": 0.8717, "step": 6608 }, { "epoch": 1.305364683541141, "grad_norm": 2.109375, "learning_rate": 5.988238494662704e-06, "loss": 0.9683, "step": 6609 }, { "epoch": 1.305564578596237, "grad_norm": 2.25, "learning_rate": 5.987205459563168e-06, "loss": 1.0831, "step": 6610 }, { "epoch": 1.305764473651333, "grad_norm": 2.109375, "learning_rate": 5.986172380612073e-06, "loss": 0.979, "step": 6611 }, { "epoch": 1.305964368706429, "grad_norm": 2.078125, "learning_rate": 5.985139257855304e-06, "loss": 0.9041, "step": 6612 }, { "epoch": 1.3061642637615252, "grad_norm": 2.203125, "learning_rate": 5.984106091338756e-06, "loss": 0.9586, "step": 6613 }, { "epoch": 1.3063641588166213, "grad_norm": 2.09375, "learning_rate": 5.983072881108321e-06, "loss": 0.9586, "step": 6614 }, { "epoch": 1.3065640538717174, "grad_norm": 2.40625, "learning_rate": 5.982039627209891e-06, "loss": 0.9355, "step": 6615 }, { "epoch": 1.3067639489268135, "grad_norm": 2.40625, "learning_rate": 5.981006329689368e-06, "loss": 1.0287, "step": 6616 }, { "epoch": 1.3069638439819096, "grad_norm": 2.203125, "learning_rate": 5.979972988592648e-06, "loss": 1.0191, "step": 6617 }, { "epoch": 1.3071637390370054, "grad_norm": 2.171875, "learning_rate": 5.978939603965633e-06, "loss": 1.0091, "step": 6618 }, { "epoch": 1.3073636340921015, "grad_norm": 2.125, "learning_rate": 5.977906175854224e-06, "loss": 0.9668, "step": 6619 }, { "epoch": 1.3075635291471976, "grad_norm": 2.28125, "learning_rate": 5.976872704304328e-06, "loss": 0.9464, "step": 6620 }, { "epoch": 1.3077634242022937, "grad_norm": 2.09375, "learning_rate": 5.975839189361851e-06, "loss": 1.0497, "step": 6621 }, { "epoch": 1.3079633192573898, "grad_norm": 2.234375, "learning_rate": 5.974805631072702e-06, "loss": 1.0606, "step": 6622 }, { "epoch": 1.308163214312486, "grad_norm": 2.234375, "learning_rate": 5.97377202948279e-06, "loss": 1.0158, "step": 6623 }, { "epoch": 1.308363109367582, "grad_norm": 2.171875, "learning_rate": 5.9727383846380275e-06, "loss": 0.996, "step": 6624 }, { "epoch": 1.3085630044226781, "grad_norm": 2.203125, "learning_rate": 5.971704696584332e-06, "loss": 1.0327, "step": 6625 }, { "epoch": 1.3087628994777742, "grad_norm": 2.0625, "learning_rate": 5.9706709653676156e-06, "loss": 0.9993, "step": 6626 }, { "epoch": 1.3089627945328703, "grad_norm": 2.09375, "learning_rate": 5.9696371910337995e-06, "loss": 1.0207, "step": 6627 }, { "epoch": 1.3091626895879664, "grad_norm": 2.21875, "learning_rate": 5.968603373628803e-06, "loss": 0.9862, "step": 6628 }, { "epoch": 1.3093625846430623, "grad_norm": 2.046875, "learning_rate": 5.967569513198548e-06, "loss": 0.9262, "step": 6629 }, { "epoch": 1.3095624796981584, "grad_norm": 2.125, "learning_rate": 5.966535609788959e-06, "loss": 1.0114, "step": 6630 }, { "epoch": 1.3097623747532545, "grad_norm": 2.140625, "learning_rate": 5.965501663445961e-06, "loss": 0.9882, "step": 6631 }, { "epoch": 1.3099622698083506, "grad_norm": 2.109375, "learning_rate": 5.9644676742154814e-06, "loss": 0.9676, "step": 6632 }, { "epoch": 1.3101621648634467, "grad_norm": 2.15625, "learning_rate": 5.963433642143452e-06, "loss": 0.954, "step": 6633 }, { "epoch": 1.3103620599185428, "grad_norm": 2.09375, "learning_rate": 5.962399567275804e-06, "loss": 0.9889, "step": 6634 }, { "epoch": 1.310561954973639, "grad_norm": 2.09375, "learning_rate": 5.961365449658469e-06, "loss": 0.9578, "step": 6635 }, { "epoch": 1.3107618500287348, "grad_norm": 2.046875, "learning_rate": 5.960331289337383e-06, "loss": 0.9138, "step": 6636 }, { "epoch": 1.310961745083831, "grad_norm": 2.3125, "learning_rate": 5.9592970863584835e-06, "loss": 1.0682, "step": 6637 }, { "epoch": 1.311161640138927, "grad_norm": 2.140625, "learning_rate": 5.95826284076771e-06, "loss": 0.9996, "step": 6638 }, { "epoch": 1.311361535194023, "grad_norm": 2.1875, "learning_rate": 5.957228552611004e-06, "loss": 1.0233, "step": 6639 }, { "epoch": 1.3115614302491192, "grad_norm": 2.140625, "learning_rate": 5.956194221934307e-06, "loss": 0.9941, "step": 6640 }, { "epoch": 1.3117613253042153, "grad_norm": 2.171875, "learning_rate": 5.955159848783565e-06, "loss": 0.9846, "step": 6641 }, { "epoch": 1.3119612203593114, "grad_norm": 2.203125, "learning_rate": 5.954125433204726e-06, "loss": 0.9759, "step": 6642 }, { "epoch": 1.3121611154144075, "grad_norm": 2.140625, "learning_rate": 5.953090975243735e-06, "loss": 0.989, "step": 6643 }, { "epoch": 1.3123610104695036, "grad_norm": 2.296875, "learning_rate": 5.952056474946546e-06, "loss": 0.9784, "step": 6644 }, { "epoch": 1.3125609055245997, "grad_norm": 2.140625, "learning_rate": 5.9510219323591104e-06, "loss": 0.9401, "step": 6645 }, { "epoch": 1.3127608005796958, "grad_norm": 2.15625, "learning_rate": 5.94998734752738e-06, "loss": 0.9958, "step": 6646 }, { "epoch": 1.3129606956347917, "grad_norm": 2.125, "learning_rate": 5.9489527204973165e-06, "loss": 0.9949, "step": 6647 }, { "epoch": 1.3131605906898878, "grad_norm": 2.1875, "learning_rate": 5.9479180513148716e-06, "loss": 1.0382, "step": 6648 }, { "epoch": 1.3133604857449839, "grad_norm": 2.078125, "learning_rate": 5.94688334002601e-06, "loss": 1.0021, "step": 6649 }, { "epoch": 1.31356038080008, "grad_norm": 2.109375, "learning_rate": 5.94584858667669e-06, "loss": 0.8882, "step": 6650 }, { "epoch": 1.313760275855176, "grad_norm": 2.21875, "learning_rate": 5.944813791312878e-06, "loss": 0.9116, "step": 6651 }, { "epoch": 1.3139601709102722, "grad_norm": 2.125, "learning_rate": 5.943778953980538e-06, "loss": 1.0571, "step": 6652 }, { "epoch": 1.314160065965368, "grad_norm": 2.078125, "learning_rate": 5.942744074725638e-06, "loss": 0.9611, "step": 6653 }, { "epoch": 1.3143599610204642, "grad_norm": 2.125, "learning_rate": 5.941709153594146e-06, "loss": 0.9936, "step": 6654 }, { "epoch": 1.3145598560755603, "grad_norm": 2.203125, "learning_rate": 5.940674190632034e-06, "loss": 0.967, "step": 6655 }, { "epoch": 1.3147597511306564, "grad_norm": 2.125, "learning_rate": 5.939639185885276e-06, "loss": 0.9805, "step": 6656 }, { "epoch": 1.3149596461857525, "grad_norm": 2.140625, "learning_rate": 5.938604139399844e-06, "loss": 0.9485, "step": 6657 }, { "epoch": 1.3151595412408486, "grad_norm": 2.21875, "learning_rate": 5.937569051221716e-06, "loss": 0.9486, "step": 6658 }, { "epoch": 1.3153594362959447, "grad_norm": 2.15625, "learning_rate": 5.936533921396874e-06, "loss": 1.0129, "step": 6659 }, { "epoch": 1.3155593313510408, "grad_norm": 2.09375, "learning_rate": 5.935498749971293e-06, "loss": 1.0116, "step": 6660 }, { "epoch": 1.3157592264061369, "grad_norm": 2.15625, "learning_rate": 5.934463536990959e-06, "loss": 0.9824, "step": 6661 }, { "epoch": 1.315959121461233, "grad_norm": 2.15625, "learning_rate": 5.933428282501853e-06, "loss": 0.9549, "step": 6662 }, { "epoch": 1.316159016516329, "grad_norm": 2.0, "learning_rate": 5.9323929865499615e-06, "loss": 0.9275, "step": 6663 }, { "epoch": 1.316358911571425, "grad_norm": 2.125, "learning_rate": 5.931357649181275e-06, "loss": 0.9875, "step": 6664 }, { "epoch": 1.316558806626521, "grad_norm": 2.25, "learning_rate": 5.930322270441781e-06, "loss": 0.9468, "step": 6665 }, { "epoch": 1.3167587016816171, "grad_norm": 2.265625, "learning_rate": 5.929286850377471e-06, "loss": 0.9012, "step": 6666 }, { "epoch": 1.3169585967367132, "grad_norm": 2.375, "learning_rate": 5.928251389034338e-06, "loss": 0.958, "step": 6667 }, { "epoch": 1.3171584917918093, "grad_norm": 2.109375, "learning_rate": 5.927215886458377e-06, "loss": 0.9074, "step": 6668 }, { "epoch": 1.3173583868469054, "grad_norm": 2.171875, "learning_rate": 5.926180342695587e-06, "loss": 0.9661, "step": 6669 }, { "epoch": 1.3175582819020015, "grad_norm": 2.1875, "learning_rate": 5.925144757791964e-06, "loss": 0.9343, "step": 6670 }, { "epoch": 1.3177581769570974, "grad_norm": 2.09375, "learning_rate": 5.92410913179351e-06, "loss": 0.9184, "step": 6671 }, { "epoch": 1.3179580720121935, "grad_norm": 2.046875, "learning_rate": 5.923073464746225e-06, "loss": 0.9682, "step": 6672 }, { "epoch": 1.3181579670672896, "grad_norm": 2.15625, "learning_rate": 5.9220377566961175e-06, "loss": 1.0067, "step": 6673 }, { "epoch": 1.3183578621223857, "grad_norm": 2.109375, "learning_rate": 5.921002007689191e-06, "loss": 0.9897, "step": 6674 }, { "epoch": 1.3185577571774818, "grad_norm": 2.265625, "learning_rate": 5.919966217771454e-06, "loss": 0.8978, "step": 6675 }, { "epoch": 1.318757652232578, "grad_norm": 2.265625, "learning_rate": 5.9189303869889145e-06, "loss": 1.0046, "step": 6676 }, { "epoch": 1.318957547287674, "grad_norm": 2.140625, "learning_rate": 5.9178945153875856e-06, "loss": 0.981, "step": 6677 }, { "epoch": 1.31915744234277, "grad_norm": 2.140625, "learning_rate": 5.916858603013482e-06, "loss": 0.9779, "step": 6678 }, { "epoch": 1.3193573373978662, "grad_norm": 2.21875, "learning_rate": 5.915822649912616e-06, "loss": 1.0136, "step": 6679 }, { "epoch": 1.3195572324529623, "grad_norm": 2.171875, "learning_rate": 5.914786656131007e-06, "loss": 0.9407, "step": 6680 }, { "epoch": 1.3197571275080584, "grad_norm": 2.125, "learning_rate": 5.913750621714671e-06, "loss": 1.0132, "step": 6681 }, { "epoch": 1.3199570225631543, "grad_norm": 2.359375, "learning_rate": 5.91271454670963e-06, "loss": 0.975, "step": 6682 }, { "epoch": 1.3201569176182504, "grad_norm": 2.15625, "learning_rate": 5.911678431161907e-06, "loss": 0.9969, "step": 6683 }, { "epoch": 1.3203568126733465, "grad_norm": 2.109375, "learning_rate": 5.910642275117525e-06, "loss": 0.9714, "step": 6684 }, { "epoch": 1.3205567077284426, "grad_norm": 2.234375, "learning_rate": 5.9096060786225095e-06, "loss": 0.9532, "step": 6685 }, { "epoch": 1.3207566027835387, "grad_norm": 2.15625, "learning_rate": 5.90856984172289e-06, "loss": 0.9656, "step": 6686 }, { "epoch": 1.3209564978386348, "grad_norm": 2.078125, "learning_rate": 5.907533564464696e-06, "loss": 0.9187, "step": 6687 }, { "epoch": 1.3211563928937307, "grad_norm": 2.078125, "learning_rate": 5.9064972468939565e-06, "loss": 0.9709, "step": 6688 }, { "epoch": 1.3213562879488268, "grad_norm": 2.125, "learning_rate": 5.905460889056707e-06, "loss": 0.9907, "step": 6689 }, { "epoch": 1.3215561830039229, "grad_norm": 2.1875, "learning_rate": 5.904424490998981e-06, "loss": 0.9996, "step": 6690 }, { "epoch": 1.321756078059019, "grad_norm": 2.15625, "learning_rate": 5.903388052766817e-06, "loss": 0.9861, "step": 6691 }, { "epoch": 1.321955973114115, "grad_norm": 2.296875, "learning_rate": 5.902351574406251e-06, "loss": 1.0058, "step": 6692 }, { "epoch": 1.3221558681692112, "grad_norm": 2.234375, "learning_rate": 5.9013150559633245e-06, "loss": 1.1108, "step": 6693 }, { "epoch": 1.3223557632243073, "grad_norm": 2.078125, "learning_rate": 5.900278497484079e-06, "loss": 0.9455, "step": 6694 }, { "epoch": 1.3225556582794034, "grad_norm": 2.125, "learning_rate": 5.89924189901456e-06, "loss": 0.9911, "step": 6695 }, { "epoch": 1.3227555533344995, "grad_norm": 2.265625, "learning_rate": 5.898205260600812e-06, "loss": 0.8872, "step": 6696 }, { "epoch": 1.3229554483895956, "grad_norm": 2.171875, "learning_rate": 5.897168582288881e-06, "loss": 0.8304, "step": 6697 }, { "epoch": 1.3231553434446917, "grad_norm": 2.09375, "learning_rate": 5.8961318641248185e-06, "loss": 0.9498, "step": 6698 }, { "epoch": 1.3233552384997875, "grad_norm": 2.25, "learning_rate": 5.895095106154674e-06, "loss": 0.9766, "step": 6699 }, { "epoch": 1.3235551335548836, "grad_norm": 2.125, "learning_rate": 5.8940583084245e-06, "loss": 0.9235, "step": 6700 }, { "epoch": 1.3237550286099797, "grad_norm": 2.078125, "learning_rate": 5.893021470980352e-06, "loss": 0.9945, "step": 6701 }, { "epoch": 1.3239549236650758, "grad_norm": 2.203125, "learning_rate": 5.891984593868285e-06, "loss": 1.0195, "step": 6702 }, { "epoch": 1.324154818720172, "grad_norm": 2.3125, "learning_rate": 5.890947677134358e-06, "loss": 1.0657, "step": 6703 }, { "epoch": 1.324354713775268, "grad_norm": 2.3125, "learning_rate": 5.889910720824631e-06, "loss": 1.0126, "step": 6704 }, { "epoch": 1.3245546088303641, "grad_norm": 2.171875, "learning_rate": 5.888873724985163e-06, "loss": 0.9802, "step": 6705 }, { "epoch": 1.32475450388546, "grad_norm": 2.171875, "learning_rate": 5.887836689662021e-06, "loss": 1.0151, "step": 6706 }, { "epoch": 1.324954398940556, "grad_norm": 2.203125, "learning_rate": 5.886799614901267e-06, "loss": 1.0367, "step": 6707 }, { "epoch": 1.3251542939956522, "grad_norm": 2.15625, "learning_rate": 5.885762500748968e-06, "loss": 0.99, "step": 6708 }, { "epoch": 1.3253541890507483, "grad_norm": 2.1875, "learning_rate": 5.8847253472511956e-06, "loss": 1.0279, "step": 6709 }, { "epoch": 1.3255540841058444, "grad_norm": 2.265625, "learning_rate": 5.883688154454016e-06, "loss": 0.9555, "step": 6710 }, { "epoch": 1.3257539791609405, "grad_norm": 2.1875, "learning_rate": 5.8826509224035046e-06, "loss": 0.9638, "step": 6711 }, { "epoch": 1.3259538742160366, "grad_norm": 2.078125, "learning_rate": 5.881613651145732e-06, "loss": 0.9608, "step": 6712 }, { "epoch": 1.3261537692711327, "grad_norm": 2.1875, "learning_rate": 5.880576340726776e-06, "loss": 1.0941, "step": 6713 }, { "epoch": 1.3263536643262288, "grad_norm": 2.15625, "learning_rate": 5.8795389911927126e-06, "loss": 1.0361, "step": 6714 }, { "epoch": 1.326553559381325, "grad_norm": 2.0625, "learning_rate": 5.8785016025896216e-06, "loss": 0.9403, "step": 6715 }, { "epoch": 1.326753454436421, "grad_norm": 2.140625, "learning_rate": 5.877464174963582e-06, "loss": 0.9459, "step": 6716 }, { "epoch": 1.3269533494915169, "grad_norm": 2.1875, "learning_rate": 5.876426708360678e-06, "loss": 1.0179, "step": 6717 }, { "epoch": 1.327153244546613, "grad_norm": 2.359375, "learning_rate": 5.875389202826994e-06, "loss": 1.0085, "step": 6718 }, { "epoch": 1.327353139601709, "grad_norm": 2.15625, "learning_rate": 5.874351658408614e-06, "loss": 0.9004, "step": 6719 }, { "epoch": 1.3275530346568052, "grad_norm": 2.125, "learning_rate": 5.873314075151628e-06, "loss": 1.0149, "step": 6720 }, { "epoch": 1.3277529297119013, "grad_norm": 2.203125, "learning_rate": 5.872276453102122e-06, "loss": 1.0591, "step": 6721 }, { "epoch": 1.3279528247669974, "grad_norm": 2.203125, "learning_rate": 5.8712387923061905e-06, "loss": 1.0282, "step": 6722 }, { "epoch": 1.3281527198220933, "grad_norm": 2.15625, "learning_rate": 5.870201092809926e-06, "loss": 0.917, "step": 6723 }, { "epoch": 1.3283526148771894, "grad_norm": 2.109375, "learning_rate": 5.8691633546594206e-06, "loss": 0.9753, "step": 6724 }, { "epoch": 1.3285525099322855, "grad_norm": 2.109375, "learning_rate": 5.868125577900772e-06, "loss": 0.9051, "step": 6725 }, { "epoch": 1.3287524049873816, "grad_norm": 2.0625, "learning_rate": 5.867087762580079e-06, "loss": 0.913, "step": 6726 }, { "epoch": 1.3289523000424777, "grad_norm": 2.1875, "learning_rate": 5.866049908743439e-06, "loss": 1.0433, "step": 6727 }, { "epoch": 1.3291521950975738, "grad_norm": 2.09375, "learning_rate": 5.865012016436954e-06, "loss": 1.0055, "step": 6728 }, { "epoch": 1.3293520901526699, "grad_norm": 2.109375, "learning_rate": 5.863974085706729e-06, "loss": 0.9556, "step": 6729 }, { "epoch": 1.329551985207766, "grad_norm": 2.34375, "learning_rate": 5.862936116598866e-06, "loss": 0.9934, "step": 6730 }, { "epoch": 1.329751880262862, "grad_norm": 2.015625, "learning_rate": 5.861898109159473e-06, "loss": 0.8601, "step": 6731 }, { "epoch": 1.3299517753179582, "grad_norm": 2.265625, "learning_rate": 5.860860063434659e-06, "loss": 1.0253, "step": 6732 }, { "epoch": 1.3301516703730543, "grad_norm": 2.015625, "learning_rate": 5.859821979470532e-06, "loss": 0.8942, "step": 6733 }, { "epoch": 1.3303515654281501, "grad_norm": 2.15625, "learning_rate": 5.858783857313205e-06, "loss": 1.0331, "step": 6734 }, { "epoch": 1.3305514604832462, "grad_norm": 2.171875, "learning_rate": 5.857745697008792e-06, "loss": 0.9352, "step": 6735 }, { "epoch": 1.3307513555383423, "grad_norm": 2.21875, "learning_rate": 5.856707498603404e-06, "loss": 1.0373, "step": 6736 }, { "epoch": 1.3309512505934384, "grad_norm": 2.203125, "learning_rate": 5.855669262143163e-06, "loss": 0.9652, "step": 6737 }, { "epoch": 1.3311511456485345, "grad_norm": 2.125, "learning_rate": 5.854630987674184e-06, "loss": 0.8334, "step": 6738 }, { "epoch": 1.3313510407036306, "grad_norm": 2.15625, "learning_rate": 5.853592675242587e-06, "loss": 0.8961, "step": 6739 }, { "epoch": 1.3315509357587267, "grad_norm": 2.203125, "learning_rate": 5.852554324894495e-06, "loss": 0.9398, "step": 6740 }, { "epoch": 1.3317508308138226, "grad_norm": 2.1875, "learning_rate": 5.851515936676031e-06, "loss": 1.0227, "step": 6741 }, { "epoch": 1.3319507258689187, "grad_norm": 2.078125, "learning_rate": 5.85047751063332e-06, "loss": 0.9559, "step": 6742 }, { "epoch": 1.3321506209240148, "grad_norm": 2.34375, "learning_rate": 5.84943904681249e-06, "loss": 1.0473, "step": 6743 }, { "epoch": 1.332350515979111, "grad_norm": 2.234375, "learning_rate": 5.848400545259667e-06, "loss": 0.9805, "step": 6744 }, { "epoch": 1.332550411034207, "grad_norm": 2.15625, "learning_rate": 5.847362006020982e-06, "loss": 1.0385, "step": 6745 }, { "epoch": 1.3327503060893031, "grad_norm": 2.015625, "learning_rate": 5.846323429142569e-06, "loss": 0.879, "step": 6746 }, { "epoch": 1.3329502011443992, "grad_norm": 2.171875, "learning_rate": 5.845284814670556e-06, "loss": 0.9384, "step": 6747 }, { "epoch": 1.3331500961994953, "grad_norm": 2.171875, "learning_rate": 5.844246162651085e-06, "loss": 1.0013, "step": 6748 }, { "epoch": 1.3333499912545914, "grad_norm": 2.21875, "learning_rate": 5.843207473130289e-06, "loss": 1.0776, "step": 6749 }, { "epoch": 1.3335498863096875, "grad_norm": 2.09375, "learning_rate": 5.842168746154306e-06, "loss": 1.0119, "step": 6750 }, { "epoch": 1.3337497813647836, "grad_norm": 2.125, "learning_rate": 5.841129981769279e-06, "loss": 0.9853, "step": 6751 }, { "epoch": 1.3339496764198795, "grad_norm": 2.03125, "learning_rate": 5.840091180021348e-06, "loss": 0.8704, "step": 6752 }, { "epoch": 1.3341495714749756, "grad_norm": 2.171875, "learning_rate": 5.839052340956654e-06, "loss": 0.9847, "step": 6753 }, { "epoch": 1.3343494665300717, "grad_norm": 2.171875, "learning_rate": 5.838013464621347e-06, "loss": 0.998, "step": 6754 }, { "epoch": 1.3345493615851678, "grad_norm": 2.21875, "learning_rate": 5.836974551061571e-06, "loss": 1.0238, "step": 6755 }, { "epoch": 1.334749256640264, "grad_norm": 2.125, "learning_rate": 5.835935600323473e-06, "loss": 1.0121, "step": 6756 }, { "epoch": 1.33494915169536, "grad_norm": 2.109375, "learning_rate": 5.834896612453209e-06, "loss": 0.9981, "step": 6757 }, { "epoch": 1.335149046750456, "grad_norm": 2.28125, "learning_rate": 5.8338575874969235e-06, "loss": 0.937, "step": 6758 }, { "epoch": 1.335348941805552, "grad_norm": 2.140625, "learning_rate": 5.832818525500773e-06, "loss": 0.9186, "step": 6759 }, { "epoch": 1.335548836860648, "grad_norm": 2.25, "learning_rate": 5.831779426510914e-06, "loss": 1.0156, "step": 6760 }, { "epoch": 1.3357487319157442, "grad_norm": 2.171875, "learning_rate": 5.830740290573501e-06, "loss": 0.9893, "step": 6761 }, { "epoch": 1.3359486269708403, "grad_norm": 2.25, "learning_rate": 5.829701117734692e-06, "loss": 1.0315, "step": 6762 }, { "epoch": 1.3361485220259364, "grad_norm": 2.109375, "learning_rate": 5.828661908040649e-06, "loss": 0.9556, "step": 6763 }, { "epoch": 1.3363484170810325, "grad_norm": 2.15625, "learning_rate": 5.827622661537533e-06, "loss": 0.928, "step": 6764 }, { "epoch": 1.3365483121361286, "grad_norm": 2.21875, "learning_rate": 5.826583378271506e-06, "loss": 1.0471, "step": 6765 }, { "epoch": 1.3367482071912247, "grad_norm": 2.171875, "learning_rate": 5.825544058288735e-06, "loss": 1.0082, "step": 6766 }, { "epoch": 1.3369481022463208, "grad_norm": 2.125, "learning_rate": 5.824504701635383e-06, "loss": 1.0759, "step": 6767 }, { "epoch": 1.3371479973014169, "grad_norm": 2.046875, "learning_rate": 5.823465308357621e-06, "loss": 0.99, "step": 6768 }, { "epoch": 1.337347892356513, "grad_norm": 2.0625, "learning_rate": 5.822425878501618e-06, "loss": 1.0142, "step": 6769 }, { "epoch": 1.3375477874116088, "grad_norm": 2.1875, "learning_rate": 5.821386412113546e-06, "loss": 0.9711, "step": 6770 }, { "epoch": 1.337747682466705, "grad_norm": 2.234375, "learning_rate": 5.820346909239576e-06, "loss": 1.06, "step": 6771 }, { "epoch": 1.337947577521801, "grad_norm": 2.1875, "learning_rate": 5.819307369925884e-06, "loss": 0.9441, "step": 6772 }, { "epoch": 1.3381474725768971, "grad_norm": 2.15625, "learning_rate": 5.8182677942186485e-06, "loss": 0.9636, "step": 6773 }, { "epoch": 1.3383473676319932, "grad_norm": 2.125, "learning_rate": 5.817228182164043e-06, "loss": 0.8985, "step": 6774 }, { "epoch": 1.3385472626870893, "grad_norm": 2.09375, "learning_rate": 5.816188533808249e-06, "loss": 1.0072, "step": 6775 }, { "epoch": 1.3387471577421852, "grad_norm": 2.328125, "learning_rate": 5.815148849197447e-06, "loss": 0.993, "step": 6776 }, { "epoch": 1.3389470527972813, "grad_norm": 2.171875, "learning_rate": 5.814109128377822e-06, "loss": 0.9225, "step": 6777 }, { "epoch": 1.3391469478523774, "grad_norm": 2.078125, "learning_rate": 5.8130693713955545e-06, "loss": 0.8882, "step": 6778 }, { "epoch": 1.3393468429074735, "grad_norm": 2.109375, "learning_rate": 5.8120295782968325e-06, "loss": 0.8944, "step": 6779 }, { "epoch": 1.3395467379625696, "grad_norm": 2.140625, "learning_rate": 5.810989749127844e-06, "loss": 0.921, "step": 6780 }, { "epoch": 1.3397466330176657, "grad_norm": 2.09375, "learning_rate": 5.809949883934778e-06, "loss": 0.9186, "step": 6781 }, { "epoch": 1.3399465280727618, "grad_norm": 2.1875, "learning_rate": 5.808909982763825e-06, "loss": 0.9882, "step": 6782 }, { "epoch": 1.340146423127858, "grad_norm": 2.265625, "learning_rate": 5.807870045661176e-06, "loss": 0.9886, "step": 6783 }, { "epoch": 1.340346318182954, "grad_norm": 2.125, "learning_rate": 5.806830072673025e-06, "loss": 0.9413, "step": 6784 }, { "epoch": 1.3405462132380501, "grad_norm": 2.15625, "learning_rate": 5.80579006384557e-06, "loss": 0.9557, "step": 6785 }, { "epoch": 1.3407461082931462, "grad_norm": 2.21875, "learning_rate": 5.804750019225005e-06, "loss": 1.0898, "step": 6786 }, { "epoch": 1.340946003348242, "grad_norm": 2.140625, "learning_rate": 5.8037099388575305e-06, "loss": 0.9668, "step": 6787 }, { "epoch": 1.3411458984033382, "grad_norm": 2.234375, "learning_rate": 5.802669822789349e-06, "loss": 0.9791, "step": 6788 }, { "epoch": 1.3413457934584343, "grad_norm": 2.28125, "learning_rate": 5.801629671066657e-06, "loss": 1.0095, "step": 6789 }, { "epoch": 1.3415456885135304, "grad_norm": 2.171875, "learning_rate": 5.80058948373566e-06, "loss": 1.0419, "step": 6790 }, { "epoch": 1.3417455835686265, "grad_norm": 2.1875, "learning_rate": 5.799549260842565e-06, "loss": 1.065, "step": 6791 }, { "epoch": 1.3419454786237226, "grad_norm": 2.125, "learning_rate": 5.7985090024335766e-06, "loss": 0.9716, "step": 6792 }, { "epoch": 1.3421453736788187, "grad_norm": 2.1875, "learning_rate": 5.797468708554903e-06, "loss": 0.963, "step": 6793 }, { "epoch": 1.3423452687339146, "grad_norm": 2.296875, "learning_rate": 5.796428379252756e-06, "loss": 0.9844, "step": 6794 }, { "epoch": 1.3425451637890107, "grad_norm": 2.0625, "learning_rate": 5.795388014573345e-06, "loss": 0.9513, "step": 6795 }, { "epoch": 1.3427450588441068, "grad_norm": 2.21875, "learning_rate": 5.794347614562884e-06, "loss": 1.0168, "step": 6796 }, { "epoch": 1.3429449538992029, "grad_norm": 2.109375, "learning_rate": 5.793307179267585e-06, "loss": 1.0011, "step": 6797 }, { "epoch": 1.343144848954299, "grad_norm": 2.0625, "learning_rate": 5.792266708733667e-06, "loss": 0.9518, "step": 6798 }, { "epoch": 1.343344744009395, "grad_norm": 2.109375, "learning_rate": 5.791226203007346e-06, "loss": 1.0294, "step": 6799 }, { "epoch": 1.3435446390644912, "grad_norm": 2.0625, "learning_rate": 5.790185662134842e-06, "loss": 0.9154, "step": 6800 }, { "epoch": 1.3437445341195873, "grad_norm": 2.125, "learning_rate": 5.789145086162374e-06, "loss": 0.9629, "step": 6801 }, { "epoch": 1.3439444291746834, "grad_norm": 2.203125, "learning_rate": 5.788104475136168e-06, "loss": 0.9986, "step": 6802 }, { "epoch": 1.3441443242297795, "grad_norm": 2.078125, "learning_rate": 5.7870638291024436e-06, "loss": 1.0083, "step": 6803 }, { "epoch": 1.3443442192848756, "grad_norm": 2.3125, "learning_rate": 5.786023148107429e-06, "loss": 1.0264, "step": 6804 }, { "epoch": 1.3445441143399715, "grad_norm": 2.078125, "learning_rate": 5.7849824321973505e-06, "loss": 0.8659, "step": 6805 }, { "epoch": 1.3447440093950676, "grad_norm": 2.234375, "learning_rate": 5.783941681418435e-06, "loss": 0.9921, "step": 6806 }, { "epoch": 1.3449439044501637, "grad_norm": 2.203125, "learning_rate": 5.782900895816913e-06, "loss": 0.9895, "step": 6807 }, { "epoch": 1.3451437995052598, "grad_norm": 2.078125, "learning_rate": 5.781860075439019e-06, "loss": 1.0123, "step": 6808 }, { "epoch": 1.3453436945603559, "grad_norm": 2.140625, "learning_rate": 5.780819220330982e-06, "loss": 0.9928, "step": 6809 }, { "epoch": 1.345543589615452, "grad_norm": 2.15625, "learning_rate": 5.77977833053904e-06, "loss": 0.9065, "step": 6810 }, { "epoch": 1.3457434846705478, "grad_norm": 2.265625, "learning_rate": 5.778737406109427e-06, "loss": 0.9993, "step": 6811 }, { "epoch": 1.345943379725644, "grad_norm": 2.0625, "learning_rate": 5.777696447088381e-06, "loss": 0.9916, "step": 6812 }, { "epoch": 1.34614327478074, "grad_norm": 2.125, "learning_rate": 5.7766554535221435e-06, "loss": 0.9238, "step": 6813 }, { "epoch": 1.3463431698358361, "grad_norm": 2.234375, "learning_rate": 5.775614425456953e-06, "loss": 0.9169, "step": 6814 }, { "epoch": 1.3465430648909322, "grad_norm": 2.125, "learning_rate": 5.774573362939051e-06, "loss": 0.8883, "step": 6815 }, { "epoch": 1.3467429599460283, "grad_norm": 2.0625, "learning_rate": 5.773532266014686e-06, "loss": 0.8809, "step": 6816 }, { "epoch": 1.3469428550011244, "grad_norm": 2.203125, "learning_rate": 5.772491134730097e-06, "loss": 0.9436, "step": 6817 }, { "epoch": 1.3471427500562205, "grad_norm": 2.140625, "learning_rate": 5.771449969131536e-06, "loss": 0.8461, "step": 6818 }, { "epoch": 1.3473426451113166, "grad_norm": 2.203125, "learning_rate": 5.770408769265252e-06, "loss": 1.0222, "step": 6819 }, { "epoch": 1.3475425401664127, "grad_norm": 2.09375, "learning_rate": 5.7693675351774905e-06, "loss": 0.9295, "step": 6820 }, { "epoch": 1.3477424352215088, "grad_norm": 2.125, "learning_rate": 5.768326266914505e-06, "loss": 0.9204, "step": 6821 }, { "epoch": 1.3479423302766047, "grad_norm": 2.15625, "learning_rate": 5.767284964522549e-06, "loss": 1.0075, "step": 6822 }, { "epoch": 1.3481422253317008, "grad_norm": 2.03125, "learning_rate": 5.766243628047876e-06, "loss": 0.8984, "step": 6823 }, { "epoch": 1.348342120386797, "grad_norm": 2.15625, "learning_rate": 5.765202257536744e-06, "loss": 0.8898, "step": 6824 }, { "epoch": 1.348542015441893, "grad_norm": 2.296875, "learning_rate": 5.76416085303541e-06, "loss": 1.1051, "step": 6825 }, { "epoch": 1.348741910496989, "grad_norm": 2.09375, "learning_rate": 5.7631194145901315e-06, "loss": 0.9619, "step": 6826 }, { "epoch": 1.3489418055520852, "grad_norm": 2.109375, "learning_rate": 5.76207794224717e-06, "loss": 0.9741, "step": 6827 }, { "epoch": 1.3491417006071813, "grad_norm": 2.15625, "learning_rate": 5.761036436052788e-06, "loss": 1.0536, "step": 6828 }, { "epoch": 1.3493415956622772, "grad_norm": 2.0625, "learning_rate": 5.75999489605325e-06, "loss": 0.9193, "step": 6829 }, { "epoch": 1.3495414907173733, "grad_norm": 2.171875, "learning_rate": 5.7589533222948176e-06, "loss": 0.9696, "step": 6830 }, { "epoch": 1.3497413857724694, "grad_norm": 2.078125, "learning_rate": 5.757911714823761e-06, "loss": 0.9368, "step": 6831 }, { "epoch": 1.3499412808275655, "grad_norm": 2.1875, "learning_rate": 5.756870073686347e-06, "loss": 1.0816, "step": 6832 }, { "epoch": 1.3501411758826616, "grad_norm": 2.140625, "learning_rate": 5.755828398928845e-06, "loss": 1.013, "step": 6833 }, { "epoch": 1.3503410709377577, "grad_norm": 2.0625, "learning_rate": 5.754786690597527e-06, "loss": 0.931, "step": 6834 }, { "epoch": 1.3505409659928538, "grad_norm": 2.125, "learning_rate": 5.753744948738663e-06, "loss": 1.0375, "step": 6835 }, { "epoch": 1.3507408610479499, "grad_norm": 2.359375, "learning_rate": 5.75270317339853e-06, "loss": 0.9461, "step": 6836 }, { "epoch": 1.350940756103046, "grad_norm": 2.125, "learning_rate": 5.751661364623403e-06, "loss": 0.9873, "step": 6837 }, { "epoch": 1.351140651158142, "grad_norm": 2.34375, "learning_rate": 5.750619522459558e-06, "loss": 1.1163, "step": 6838 }, { "epoch": 1.3513405462132382, "grad_norm": 2.140625, "learning_rate": 5.749577646953274e-06, "loss": 1.0067, "step": 6839 }, { "epoch": 1.351540441268334, "grad_norm": 1.9921875, "learning_rate": 5.74853573815083e-06, "loss": 0.8746, "step": 6840 }, { "epoch": 1.3517403363234302, "grad_norm": 2.15625, "learning_rate": 5.74749379609851e-06, "loss": 0.9809, "step": 6841 }, { "epoch": 1.3519402313785263, "grad_norm": 2.15625, "learning_rate": 5.746451820842595e-06, "loss": 0.9222, "step": 6842 }, { "epoch": 1.3521401264336224, "grad_norm": 2.140625, "learning_rate": 5.7454098124293675e-06, "loss": 0.9275, "step": 6843 }, { "epoch": 1.3523400214887185, "grad_norm": 2.203125, "learning_rate": 5.744367770905119e-06, "loss": 1.0583, "step": 6844 }, { "epoch": 1.3525399165438146, "grad_norm": 2.140625, "learning_rate": 5.743325696316132e-06, "loss": 0.9374, "step": 6845 }, { "epoch": 1.3527398115989104, "grad_norm": 2.109375, "learning_rate": 5.742283588708697e-06, "loss": 1.0193, "step": 6846 }, { "epoch": 1.3529397066540065, "grad_norm": 2.21875, "learning_rate": 5.741241448129105e-06, "loss": 1.0164, "step": 6847 }, { "epoch": 1.3531396017091026, "grad_norm": 2.125, "learning_rate": 5.740199274623647e-06, "loss": 0.954, "step": 6848 }, { "epoch": 1.3533394967641987, "grad_norm": 2.109375, "learning_rate": 5.7391570682386165e-06, "loss": 0.9195, "step": 6849 }, { "epoch": 1.3535393918192948, "grad_norm": 2.203125, "learning_rate": 5.738114829020307e-06, "loss": 1.0024, "step": 6850 }, { "epoch": 1.353739286874391, "grad_norm": 2.234375, "learning_rate": 5.737072557015016e-06, "loss": 1.0244, "step": 6851 }, { "epoch": 1.353939181929487, "grad_norm": 2.171875, "learning_rate": 5.73603025226904e-06, "loss": 0.9202, "step": 6852 }, { "epoch": 1.3541390769845831, "grad_norm": 2.125, "learning_rate": 5.7349879148286804e-06, "loss": 1.0214, "step": 6853 }, { "epoch": 1.3543389720396792, "grad_norm": 2.109375, "learning_rate": 5.733945544740235e-06, "loss": 0.8318, "step": 6854 }, { "epoch": 1.3545388670947753, "grad_norm": 2.140625, "learning_rate": 5.732903142050008e-06, "loss": 0.9818, "step": 6855 }, { "epoch": 1.3547387621498714, "grad_norm": 2.21875, "learning_rate": 5.731860706804301e-06, "loss": 0.9657, "step": 6856 }, { "epoch": 1.3549386572049673, "grad_norm": 2.171875, "learning_rate": 5.7308182390494185e-06, "loss": 1.0145, "step": 6857 }, { "epoch": 1.3551385522600634, "grad_norm": 2.078125, "learning_rate": 5.729775738831669e-06, "loss": 0.9326, "step": 6858 }, { "epoch": 1.3553384473151595, "grad_norm": 2.21875, "learning_rate": 5.728733206197359e-06, "loss": 1.0115, "step": 6859 }, { "epoch": 1.3555383423702556, "grad_norm": 1.96875, "learning_rate": 5.727690641192797e-06, "loss": 0.9357, "step": 6860 }, { "epoch": 1.3557382374253517, "grad_norm": 2.140625, "learning_rate": 5.726648043864296e-06, "loss": 1.0141, "step": 6861 }, { "epoch": 1.3559381324804478, "grad_norm": 2.1875, "learning_rate": 5.725605414258165e-06, "loss": 0.988, "step": 6862 }, { "epoch": 1.356138027535544, "grad_norm": 2.328125, "learning_rate": 5.724562752420719e-06, "loss": 0.9958, "step": 6863 }, { "epoch": 1.3563379225906398, "grad_norm": 2.15625, "learning_rate": 5.723520058398275e-06, "loss": 1.0179, "step": 6864 }, { "epoch": 1.3565378176457359, "grad_norm": 2.09375, "learning_rate": 5.7224773322371466e-06, "loss": 0.935, "step": 6865 }, { "epoch": 1.356737712700832, "grad_norm": 1.9453125, "learning_rate": 5.721434573983651e-06, "loss": 0.8396, "step": 6866 }, { "epoch": 1.356937607755928, "grad_norm": 2.171875, "learning_rate": 5.720391783684109e-06, "loss": 0.9771, "step": 6867 }, { "epoch": 1.3571375028110242, "grad_norm": 2.0, "learning_rate": 5.7193489613848414e-06, "loss": 0.8618, "step": 6868 }, { "epoch": 1.3573373978661203, "grad_norm": 2.15625, "learning_rate": 5.718306107132169e-06, "loss": 0.9778, "step": 6869 }, { "epoch": 1.3575372929212164, "grad_norm": 2.140625, "learning_rate": 5.717263220972418e-06, "loss": 0.9241, "step": 6870 }, { "epoch": 1.3577371879763125, "grad_norm": 2.109375, "learning_rate": 5.716220302951909e-06, "loss": 1.0834, "step": 6871 }, { "epoch": 1.3579370830314086, "grad_norm": 2.15625, "learning_rate": 5.715177353116972e-06, "loss": 0.9576, "step": 6872 }, { "epoch": 1.3581369780865047, "grad_norm": 2.140625, "learning_rate": 5.714134371513932e-06, "loss": 0.9635, "step": 6873 }, { "epoch": 1.3583368731416008, "grad_norm": 2.34375, "learning_rate": 5.71309135818912e-06, "loss": 0.9623, "step": 6874 }, { "epoch": 1.3585367681966967, "grad_norm": 2.140625, "learning_rate": 5.712048313188867e-06, "loss": 0.9739, "step": 6875 }, { "epoch": 1.3587366632517928, "grad_norm": 2.265625, "learning_rate": 5.711005236559503e-06, "loss": 1.0418, "step": 6876 }, { "epoch": 1.3589365583068889, "grad_norm": 2.203125, "learning_rate": 5.7099621283473625e-06, "loss": 0.9763, "step": 6877 }, { "epoch": 1.359136453361985, "grad_norm": 2.125, "learning_rate": 5.708918988598781e-06, "loss": 1.0662, "step": 6878 }, { "epoch": 1.359336348417081, "grad_norm": 2.140625, "learning_rate": 5.707875817360092e-06, "loss": 0.848, "step": 6879 }, { "epoch": 1.3595362434721772, "grad_norm": 2.078125, "learning_rate": 5.706832614677637e-06, "loss": 1.0198, "step": 6880 }, { "epoch": 1.3597361385272733, "grad_norm": 2.21875, "learning_rate": 5.7057893805977525e-06, "loss": 1.109, "step": 6881 }, { "epoch": 1.3599360335823691, "grad_norm": 2.15625, "learning_rate": 5.704746115166777e-06, "loss": 0.9673, "step": 6882 }, { "epoch": 1.3601359286374652, "grad_norm": 2.34375, "learning_rate": 5.703702818431055e-06, "loss": 1.0013, "step": 6883 }, { "epoch": 1.3603358236925613, "grad_norm": 2.234375, "learning_rate": 5.70265949043693e-06, "loss": 1.0541, "step": 6884 }, { "epoch": 1.3605357187476574, "grad_norm": 2.171875, "learning_rate": 5.701616131230745e-06, "loss": 0.9834, "step": 6885 }, { "epoch": 1.3607356138027535, "grad_norm": 2.125, "learning_rate": 5.700572740858847e-06, "loss": 0.9302, "step": 6886 }, { "epoch": 1.3609355088578496, "grad_norm": 2.09375, "learning_rate": 5.699529319367581e-06, "loss": 1.0564, "step": 6887 }, { "epoch": 1.3611354039129457, "grad_norm": 2.234375, "learning_rate": 5.698485866803298e-06, "loss": 1.0996, "step": 6888 }, { "epoch": 1.3613352989680418, "grad_norm": 2.21875, "learning_rate": 5.6974423832123494e-06, "loss": 0.9551, "step": 6889 }, { "epoch": 1.361535194023138, "grad_norm": 2.171875, "learning_rate": 5.696398868641082e-06, "loss": 0.9805, "step": 6890 }, { "epoch": 1.361735089078234, "grad_norm": 2.140625, "learning_rate": 5.695355323135852e-06, "loss": 1.0134, "step": 6891 }, { "epoch": 1.3619349841333301, "grad_norm": 2.140625, "learning_rate": 5.694311746743013e-06, "loss": 0.9429, "step": 6892 }, { "epoch": 1.362134879188426, "grad_norm": 2.046875, "learning_rate": 5.693268139508921e-06, "loss": 0.8925, "step": 6893 }, { "epoch": 1.3623347742435221, "grad_norm": 2.421875, "learning_rate": 5.6922245014799316e-06, "loss": 1.0224, "step": 6894 }, { "epoch": 1.3625346692986182, "grad_norm": 2.3125, "learning_rate": 5.6911808327024035e-06, "loss": 1.0771, "step": 6895 }, { "epoch": 1.3627345643537143, "grad_norm": 2.125, "learning_rate": 5.6901371332226975e-06, "loss": 0.9291, "step": 6896 }, { "epoch": 1.3629344594088104, "grad_norm": 2.125, "learning_rate": 5.689093403087173e-06, "loss": 1.0084, "step": 6897 }, { "epoch": 1.3631343544639065, "grad_norm": 2.28125, "learning_rate": 5.6880496423421936e-06, "loss": 1.0037, "step": 6898 }, { "epoch": 1.3633342495190024, "grad_norm": 2.265625, "learning_rate": 5.687005851034122e-06, "loss": 1.0394, "step": 6899 }, { "epoch": 1.3635341445740985, "grad_norm": 2.109375, "learning_rate": 5.685962029209325e-06, "loss": 1.0239, "step": 6900 }, { "epoch": 1.3637340396291946, "grad_norm": 2.34375, "learning_rate": 5.684918176914167e-06, "loss": 1.044, "step": 6901 }, { "epoch": 1.3639339346842907, "grad_norm": 2.09375, "learning_rate": 5.683874294195017e-06, "loss": 0.9391, "step": 6902 }, { "epoch": 1.3641338297393868, "grad_norm": 2.015625, "learning_rate": 5.682830381098243e-06, "loss": 0.9159, "step": 6903 }, { "epoch": 1.364333724794483, "grad_norm": 2.046875, "learning_rate": 5.681786437670217e-06, "loss": 0.9494, "step": 6904 }, { "epoch": 1.364533619849579, "grad_norm": 2.15625, "learning_rate": 5.680742463957311e-06, "loss": 0.9288, "step": 6905 }, { "epoch": 1.364733514904675, "grad_norm": 2.15625, "learning_rate": 5.679698460005897e-06, "loss": 0.982, "step": 6906 }, { "epoch": 1.3649334099597712, "grad_norm": 2.09375, "learning_rate": 5.678654425862349e-06, "loss": 0.9884, "step": 6907 }, { "epoch": 1.3651333050148673, "grad_norm": 2.1875, "learning_rate": 5.677610361573045e-06, "loss": 0.978, "step": 6908 }, { "epoch": 1.3653332000699634, "grad_norm": 2.171875, "learning_rate": 5.6765662671843615e-06, "loss": 1.0478, "step": 6909 }, { "epoch": 1.3655330951250593, "grad_norm": 2.078125, "learning_rate": 5.675522142742675e-06, "loss": 0.971, "step": 6910 }, { "epoch": 1.3657329901801554, "grad_norm": 2.28125, "learning_rate": 5.674477988294369e-06, "loss": 1.0029, "step": 6911 }, { "epoch": 1.3659328852352515, "grad_norm": 2.234375, "learning_rate": 5.6734338038858225e-06, "loss": 0.9451, "step": 6912 }, { "epoch": 1.3661327802903476, "grad_norm": 2.1875, "learning_rate": 5.672389589563417e-06, "loss": 0.9869, "step": 6913 }, { "epoch": 1.3663326753454437, "grad_norm": 2.171875, "learning_rate": 5.671345345373538e-06, "loss": 1.0203, "step": 6914 }, { "epoch": 1.3665325704005398, "grad_norm": 2.171875, "learning_rate": 5.6703010713625715e-06, "loss": 1.0001, "step": 6915 }, { "epoch": 1.3667324654556359, "grad_norm": 2.078125, "learning_rate": 5.669256767576903e-06, "loss": 0.9559, "step": 6916 }, { "epoch": 1.3669323605107317, "grad_norm": 2.25, "learning_rate": 5.6682124340629195e-06, "loss": 0.9698, "step": 6917 }, { "epoch": 1.3671322555658278, "grad_norm": 2.25, "learning_rate": 5.667168070867012e-06, "loss": 1.0643, "step": 6918 }, { "epoch": 1.367332150620924, "grad_norm": 2.15625, "learning_rate": 5.666123678035569e-06, "loss": 1.0569, "step": 6919 }, { "epoch": 1.36753204567602, "grad_norm": 2.21875, "learning_rate": 5.665079255614984e-06, "loss": 0.9833, "step": 6920 }, { "epoch": 1.3677319407311161, "grad_norm": 2.078125, "learning_rate": 5.664034803651649e-06, "loss": 0.9298, "step": 6921 }, { "epoch": 1.3679318357862122, "grad_norm": 2.21875, "learning_rate": 5.662990322191959e-06, "loss": 1.0319, "step": 6922 }, { "epoch": 1.3681317308413083, "grad_norm": 2.171875, "learning_rate": 5.66194581128231e-06, "loss": 1.0111, "step": 6923 }, { "epoch": 1.3683316258964044, "grad_norm": 2.453125, "learning_rate": 5.660901270969098e-06, "loss": 1.0736, "step": 6924 }, { "epoch": 1.3685315209515005, "grad_norm": 2.171875, "learning_rate": 5.659856701298724e-06, "loss": 1.0559, "step": 6925 }, { "epoch": 1.3687314160065966, "grad_norm": 2.09375, "learning_rate": 5.658812102317583e-06, "loss": 0.9378, "step": 6926 }, { "epoch": 1.3689313110616927, "grad_norm": 2.21875, "learning_rate": 5.657767474072082e-06, "loss": 0.8943, "step": 6927 }, { "epoch": 1.3691312061167886, "grad_norm": 2.203125, "learning_rate": 5.656722816608619e-06, "loss": 1.0167, "step": 6928 }, { "epoch": 1.3693311011718847, "grad_norm": 2.125, "learning_rate": 5.655678129973597e-06, "loss": 1.0068, "step": 6929 }, { "epoch": 1.3695309962269808, "grad_norm": 2.15625, "learning_rate": 5.654633414213424e-06, "loss": 0.9642, "step": 6930 }, { "epoch": 1.369730891282077, "grad_norm": 2.09375, "learning_rate": 5.6535886693745044e-06, "loss": 0.8641, "step": 6931 }, { "epoch": 1.369930786337173, "grad_norm": 2.1875, "learning_rate": 5.652543895503246e-06, "loss": 1.0691, "step": 6932 }, { "epoch": 1.3701306813922691, "grad_norm": 2.09375, "learning_rate": 5.651499092646056e-06, "loss": 0.9777, "step": 6933 }, { "epoch": 1.370330576447365, "grad_norm": 2.203125, "learning_rate": 5.650454260849349e-06, "loss": 0.9519, "step": 6934 }, { "epoch": 1.370530471502461, "grad_norm": 2.0625, "learning_rate": 5.64940940015953e-06, "loss": 0.8727, "step": 6935 }, { "epoch": 1.3707303665575572, "grad_norm": 2.109375, "learning_rate": 5.648364510623016e-06, "loss": 0.9795, "step": 6936 }, { "epoch": 1.3709302616126533, "grad_norm": 2.078125, "learning_rate": 5.64731959228622e-06, "loss": 0.8466, "step": 6937 }, { "epoch": 1.3711301566677494, "grad_norm": 2.140625, "learning_rate": 5.646274645195556e-06, "loss": 1.0228, "step": 6938 }, { "epoch": 1.3713300517228455, "grad_norm": 2.234375, "learning_rate": 5.645229669397443e-06, "loss": 0.9517, "step": 6939 }, { "epoch": 1.3715299467779416, "grad_norm": 2.390625, "learning_rate": 5.644184664938296e-06, "loss": 0.995, "step": 6940 }, { "epoch": 1.3717298418330377, "grad_norm": 2.109375, "learning_rate": 5.643139631864534e-06, "loss": 0.9714, "step": 6941 }, { "epoch": 1.3719297368881338, "grad_norm": 2.078125, "learning_rate": 5.642094570222579e-06, "loss": 1.0516, "step": 6942 }, { "epoch": 1.37212963194323, "grad_norm": 2.1875, "learning_rate": 5.641049480058853e-06, "loss": 1.0417, "step": 6943 }, { "epoch": 1.372329526998326, "grad_norm": 2.3125, "learning_rate": 5.640004361419776e-06, "loss": 1.0357, "step": 6944 }, { "epoch": 1.3725294220534219, "grad_norm": 2.09375, "learning_rate": 5.638959214351775e-06, "loss": 0.9773, "step": 6945 }, { "epoch": 1.372729317108518, "grad_norm": 2.234375, "learning_rate": 5.637914038901273e-06, "loss": 1.0186, "step": 6946 }, { "epoch": 1.372929212163614, "grad_norm": 2.203125, "learning_rate": 5.6368688351146975e-06, "loss": 1.0735, "step": 6947 }, { "epoch": 1.3731291072187102, "grad_norm": 2.328125, "learning_rate": 5.635823603038476e-06, "loss": 1.0468, "step": 6948 }, { "epoch": 1.3733290022738063, "grad_norm": 2.09375, "learning_rate": 5.634778342719038e-06, "loss": 1.0165, "step": 6949 }, { "epoch": 1.3735288973289024, "grad_norm": 2.015625, "learning_rate": 5.633733054202814e-06, "loss": 0.9065, "step": 6950 }, { "epoch": 1.3737287923839985, "grad_norm": 2.046875, "learning_rate": 5.632687737536236e-06, "loss": 0.8383, "step": 6951 }, { "epoch": 1.3739286874390944, "grad_norm": 2.1875, "learning_rate": 5.631642392765736e-06, "loss": 0.9439, "step": 6952 }, { "epoch": 1.3741285824941905, "grad_norm": 2.234375, "learning_rate": 5.630597019937748e-06, "loss": 1.0172, "step": 6953 }, { "epoch": 1.3743284775492866, "grad_norm": 2.21875, "learning_rate": 5.6295516190987075e-06, "loss": 0.996, "step": 6954 }, { "epoch": 1.3745283726043827, "grad_norm": 2.21875, "learning_rate": 5.628506190295052e-06, "loss": 1.0551, "step": 6955 }, { "epoch": 1.3747282676594788, "grad_norm": 2.21875, "learning_rate": 5.627460733573219e-06, "loss": 0.9798, "step": 6956 }, { "epoch": 1.3749281627145749, "grad_norm": 2.140625, "learning_rate": 5.626415248979646e-06, "loss": 1.0186, "step": 6957 }, { "epoch": 1.375128057769671, "grad_norm": 2.1875, "learning_rate": 5.625369736560776e-06, "loss": 0.9548, "step": 6958 }, { "epoch": 1.375327952824767, "grad_norm": 2.203125, "learning_rate": 5.624324196363048e-06, "loss": 0.9933, "step": 6959 }, { "epoch": 1.3755278478798632, "grad_norm": 2.171875, "learning_rate": 5.623278628432907e-06, "loss": 1.0, "step": 6960 }, { "epoch": 1.3757277429349593, "grad_norm": 2.28125, "learning_rate": 5.622233032816795e-06, "loss": 1.0621, "step": 6961 }, { "epoch": 1.3759276379900554, "grad_norm": 2.28125, "learning_rate": 5.6211874095611585e-06, "loss": 0.9978, "step": 6962 }, { "epoch": 1.3761275330451512, "grad_norm": 1.984375, "learning_rate": 5.620141758712443e-06, "loss": 0.9002, "step": 6963 }, { "epoch": 1.3763274281002473, "grad_norm": 2.125, "learning_rate": 5.619096080317097e-06, "loss": 0.9789, "step": 6964 }, { "epoch": 1.3765273231553434, "grad_norm": 2.328125, "learning_rate": 5.618050374421569e-06, "loss": 1.0389, "step": 6965 }, { "epoch": 1.3767272182104395, "grad_norm": 2.125, "learning_rate": 5.61700464107231e-06, "loss": 0.9557, "step": 6966 }, { "epoch": 1.3769271132655356, "grad_norm": 2.0625, "learning_rate": 5.6159588803157705e-06, "loss": 0.9106, "step": 6967 }, { "epoch": 1.3771270083206317, "grad_norm": 2.09375, "learning_rate": 5.614913092198404e-06, "loss": 0.9686, "step": 6968 }, { "epoch": 1.3773269033757276, "grad_norm": 2.125, "learning_rate": 5.613867276766662e-06, "loss": 0.9622, "step": 6969 }, { "epoch": 1.3775267984308237, "grad_norm": 2.53125, "learning_rate": 5.612821434067003e-06, "loss": 0.9964, "step": 6970 }, { "epoch": 1.3777266934859198, "grad_norm": 2.234375, "learning_rate": 5.61177556414588e-06, "loss": 0.9981, "step": 6971 }, { "epoch": 1.377926588541016, "grad_norm": 2.125, "learning_rate": 5.610729667049751e-06, "loss": 0.9791, "step": 6972 }, { "epoch": 1.378126483596112, "grad_norm": 1.9296875, "learning_rate": 5.609683742825078e-06, "loss": 0.9303, "step": 6973 }, { "epoch": 1.378326378651208, "grad_norm": 2.25, "learning_rate": 5.608637791518318e-06, "loss": 1.0528, "step": 6974 }, { "epoch": 1.3785262737063042, "grad_norm": 2.21875, "learning_rate": 5.60759181317593e-06, "loss": 0.934, "step": 6975 }, { "epoch": 1.3787261687614003, "grad_norm": 2.203125, "learning_rate": 5.60654580784438e-06, "loss": 0.9334, "step": 6976 }, { "epoch": 1.3789260638164964, "grad_norm": 2.421875, "learning_rate": 5.60549977557013e-06, "loss": 0.946, "step": 6977 }, { "epoch": 1.3791259588715925, "grad_norm": 2.21875, "learning_rate": 5.604453716399643e-06, "loss": 0.9288, "step": 6978 }, { "epoch": 1.3793258539266886, "grad_norm": 2.171875, "learning_rate": 5.60340763037939e-06, "loss": 0.9337, "step": 6979 }, { "epoch": 1.3795257489817845, "grad_norm": 2.09375, "learning_rate": 5.602361517555831e-06, "loss": 0.8842, "step": 6980 }, { "epoch": 1.3797256440368806, "grad_norm": 2.28125, "learning_rate": 5.601315377975439e-06, "loss": 1.0394, "step": 6981 }, { "epoch": 1.3799255390919767, "grad_norm": 2.15625, "learning_rate": 5.600269211684683e-06, "loss": 1.0313, "step": 6982 }, { "epoch": 1.3801254341470728, "grad_norm": 2.25, "learning_rate": 5.599223018730031e-06, "loss": 1.0387, "step": 6983 }, { "epoch": 1.3803253292021689, "grad_norm": 2.125, "learning_rate": 5.5981767991579575e-06, "loss": 1.0096, "step": 6984 }, { "epoch": 1.380525224257265, "grad_norm": 2.234375, "learning_rate": 5.597130553014934e-06, "loss": 1.0312, "step": 6985 }, { "epoch": 1.380725119312361, "grad_norm": 2.1875, "learning_rate": 5.596084280347435e-06, "loss": 1.0451, "step": 6986 }, { "epoch": 1.380925014367457, "grad_norm": 2.15625, "learning_rate": 5.595037981201939e-06, "loss": 0.9005, "step": 6987 }, { "epoch": 1.381124909422553, "grad_norm": 2.125, "learning_rate": 5.5939916556249155e-06, "loss": 1.0645, "step": 6988 }, { "epoch": 1.3813248044776492, "grad_norm": 2.171875, "learning_rate": 5.592945303662848e-06, "loss": 0.9322, "step": 6989 }, { "epoch": 1.3815246995327453, "grad_norm": 2.140625, "learning_rate": 5.591898925362213e-06, "loss": 0.9669, "step": 6990 }, { "epoch": 1.3817245945878414, "grad_norm": 2.234375, "learning_rate": 5.590852520769491e-06, "loss": 0.993, "step": 6991 }, { "epoch": 1.3819244896429375, "grad_norm": 2.140625, "learning_rate": 5.589806089931163e-06, "loss": 1.0358, "step": 6992 }, { "epoch": 1.3821243846980336, "grad_norm": 2.09375, "learning_rate": 5.588759632893712e-06, "loss": 0.9827, "step": 6993 }, { "epoch": 1.3823242797531297, "grad_norm": 2.140625, "learning_rate": 5.587713149703619e-06, "loss": 0.9969, "step": 6994 }, { "epoch": 1.3825241748082258, "grad_norm": 2.15625, "learning_rate": 5.586666640407373e-06, "loss": 0.9349, "step": 6995 }, { "epoch": 1.3827240698633219, "grad_norm": 2.21875, "learning_rate": 5.585620105051457e-06, "loss": 1.069, "step": 6996 }, { "epoch": 1.382923964918418, "grad_norm": 2.09375, "learning_rate": 5.584573543682358e-06, "loss": 0.945, "step": 6997 }, { "epoch": 1.3831238599735138, "grad_norm": 2.140625, "learning_rate": 5.583526956346564e-06, "loss": 0.9837, "step": 6998 }, { "epoch": 1.38332375502861, "grad_norm": 2.265625, "learning_rate": 5.582480343090566e-06, "loss": 0.8857, "step": 6999 }, { "epoch": 1.383523650083706, "grad_norm": 2.046875, "learning_rate": 5.581433703960853e-06, "loss": 0.887, "step": 7000 }, { "epoch": 1.3837235451388021, "grad_norm": 2.09375, "learning_rate": 5.580387039003918e-06, "loss": 0.9843, "step": 7001 }, { "epoch": 1.3839234401938982, "grad_norm": 2.09375, "learning_rate": 5.579340348266251e-06, "loss": 0.9901, "step": 7002 }, { "epoch": 1.3841233352489943, "grad_norm": 2.15625, "learning_rate": 5.578293631794348e-06, "loss": 0.9512, "step": 7003 }, { "epoch": 1.3843232303040904, "grad_norm": 2.171875, "learning_rate": 5.5772468896347045e-06, "loss": 0.9057, "step": 7004 }, { "epoch": 1.3845231253591863, "grad_norm": 2.15625, "learning_rate": 5.576200121833816e-06, "loss": 0.9077, "step": 7005 }, { "epoch": 1.3847230204142824, "grad_norm": 2.125, "learning_rate": 5.575153328438178e-06, "loss": 1.0334, "step": 7006 }, { "epoch": 1.3849229154693785, "grad_norm": 2.09375, "learning_rate": 5.574106509494292e-06, "loss": 0.9105, "step": 7007 }, { "epoch": 1.3851228105244746, "grad_norm": 2.265625, "learning_rate": 5.573059665048656e-06, "loss": 1.0605, "step": 7008 }, { "epoch": 1.3853227055795707, "grad_norm": 2.171875, "learning_rate": 5.57201279514777e-06, "loss": 1.0571, "step": 7009 }, { "epoch": 1.3855226006346668, "grad_norm": 2.09375, "learning_rate": 5.570965899838138e-06, "loss": 0.9358, "step": 7010 }, { "epoch": 1.385722495689763, "grad_norm": 2.21875, "learning_rate": 5.56991897916626e-06, "loss": 0.9903, "step": 7011 }, { "epoch": 1.385922390744859, "grad_norm": 2.21875, "learning_rate": 5.5688720331786425e-06, "loss": 1.0151, "step": 7012 }, { "epoch": 1.3861222857999551, "grad_norm": 2.125, "learning_rate": 5.567825061921791e-06, "loss": 0.9319, "step": 7013 }, { "epoch": 1.3863221808550512, "grad_norm": 2.3125, "learning_rate": 5.56677806544221e-06, "loss": 0.9802, "step": 7014 }, { "epoch": 1.3865220759101473, "grad_norm": 2.1875, "learning_rate": 5.565731043786409e-06, "loss": 0.9866, "step": 7015 }, { "epoch": 1.3867219709652432, "grad_norm": 2.25, "learning_rate": 5.564683997000893e-06, "loss": 1.0267, "step": 7016 }, { "epoch": 1.3869218660203393, "grad_norm": 2.25, "learning_rate": 5.5636369251321765e-06, "loss": 1.038, "step": 7017 }, { "epoch": 1.3871217610754354, "grad_norm": 2.34375, "learning_rate": 5.5625898282267675e-06, "loss": 0.9183, "step": 7018 }, { "epoch": 1.3873216561305315, "grad_norm": 2.171875, "learning_rate": 5.561542706331178e-06, "loss": 1.0668, "step": 7019 }, { "epoch": 1.3875215511856276, "grad_norm": 2.28125, "learning_rate": 5.560495559491922e-06, "loss": 1.0296, "step": 7020 }, { "epoch": 1.3877214462407237, "grad_norm": 2.078125, "learning_rate": 5.559448387755513e-06, "loss": 0.9755, "step": 7021 }, { "epoch": 1.3879213412958196, "grad_norm": 2.15625, "learning_rate": 5.558401191168465e-06, "loss": 1.003, "step": 7022 }, { "epoch": 1.3881212363509157, "grad_norm": 2.1875, "learning_rate": 5.557353969777297e-06, "loss": 0.9762, "step": 7023 }, { "epoch": 1.3883211314060118, "grad_norm": 2.265625, "learning_rate": 5.556306723628526e-06, "loss": 0.9916, "step": 7024 }, { "epoch": 1.3885210264611079, "grad_norm": 2.140625, "learning_rate": 5.555259452768668e-06, "loss": 0.9925, "step": 7025 }, { "epoch": 1.388720921516204, "grad_norm": 2.125, "learning_rate": 5.5542121572442446e-06, "loss": 0.9753, "step": 7026 }, { "epoch": 1.3889208165713, "grad_norm": 2.09375, "learning_rate": 5.553164837101778e-06, "loss": 0.971, "step": 7027 }, { "epoch": 1.3891207116263962, "grad_norm": 2.046875, "learning_rate": 5.552117492387786e-06, "loss": 0.8416, "step": 7028 }, { "epoch": 1.3893206066814923, "grad_norm": 2.125, "learning_rate": 5.551070123148795e-06, "loss": 0.9155, "step": 7029 }, { "epoch": 1.3895205017365884, "grad_norm": 2.15625, "learning_rate": 5.5500227294313295e-06, "loss": 1.0296, "step": 7030 }, { "epoch": 1.3897203967916845, "grad_norm": 2.203125, "learning_rate": 5.548975311281911e-06, "loss": 1.0358, "step": 7031 }, { "epoch": 1.3899202918467806, "grad_norm": 2.21875, "learning_rate": 5.547927868747069e-06, "loss": 1.0207, "step": 7032 }, { "epoch": 1.3901201869018764, "grad_norm": 2.046875, "learning_rate": 5.546880401873329e-06, "loss": 0.8628, "step": 7033 }, { "epoch": 1.3903200819569725, "grad_norm": 2.125, "learning_rate": 5.54583291070722e-06, "loss": 0.9888, "step": 7034 }, { "epoch": 1.3905199770120686, "grad_norm": 2.0, "learning_rate": 5.544785395295273e-06, "loss": 0.9659, "step": 7035 }, { "epoch": 1.3907198720671647, "grad_norm": 2.078125, "learning_rate": 5.543737855684015e-06, "loss": 0.9611, "step": 7036 }, { "epoch": 1.3909197671222608, "grad_norm": 2.25, "learning_rate": 5.54269029191998e-06, "loss": 0.8994, "step": 7037 }, { "epoch": 1.391119662177357, "grad_norm": 2.109375, "learning_rate": 5.541642704049701e-06, "loss": 0.9457, "step": 7038 }, { "epoch": 1.391319557232453, "grad_norm": 2.078125, "learning_rate": 5.540595092119709e-06, "loss": 0.9801, "step": 7039 }, { "epoch": 1.391519452287549, "grad_norm": 2.125, "learning_rate": 5.539547456176543e-06, "loss": 0.965, "step": 7040 }, { "epoch": 1.391719347342645, "grad_norm": 2.171875, "learning_rate": 5.538499796266735e-06, "loss": 0.9907, "step": 7041 }, { "epoch": 1.3919192423977411, "grad_norm": 2.078125, "learning_rate": 5.537452112436824e-06, "loss": 0.9189, "step": 7042 }, { "epoch": 1.3921191374528372, "grad_norm": 2.078125, "learning_rate": 5.536404404733348e-06, "loss": 0.9072, "step": 7043 }, { "epoch": 1.3923190325079333, "grad_norm": 2.140625, "learning_rate": 5.535356673202845e-06, "loss": 0.9507, "step": 7044 }, { "epoch": 1.3925189275630294, "grad_norm": 2.171875, "learning_rate": 5.534308917891856e-06, "loss": 0.9909, "step": 7045 }, { "epoch": 1.3927188226181255, "grad_norm": 2.1875, "learning_rate": 5.533261138846922e-06, "loss": 0.9652, "step": 7046 }, { "epoch": 1.3929187176732216, "grad_norm": 2.21875, "learning_rate": 5.532213336114586e-06, "loss": 0.9624, "step": 7047 }, { "epoch": 1.3931186127283177, "grad_norm": 2.171875, "learning_rate": 5.531165509741388e-06, "loss": 0.9854, "step": 7048 }, { "epoch": 1.3933185077834138, "grad_norm": 2.234375, "learning_rate": 5.5301176597738785e-06, "loss": 1.0033, "step": 7049 }, { "epoch": 1.39351840283851, "grad_norm": 2.0625, "learning_rate": 5.529069786258596e-06, "loss": 0.9078, "step": 7050 }, { "epoch": 1.3937182978936058, "grad_norm": 2.125, "learning_rate": 5.5280218892420925e-06, "loss": 0.9249, "step": 7051 }, { "epoch": 1.393918192948702, "grad_norm": 2.15625, "learning_rate": 5.526973968770911e-06, "loss": 1.0552, "step": 7052 }, { "epoch": 1.394118088003798, "grad_norm": 2.078125, "learning_rate": 5.525926024891603e-06, "loss": 1.0302, "step": 7053 }, { "epoch": 1.394317983058894, "grad_norm": 2.25, "learning_rate": 5.524878057650717e-06, "loss": 1.002, "step": 7054 }, { "epoch": 1.3945178781139902, "grad_norm": 2.03125, "learning_rate": 5.5238300670948044e-06, "loss": 0.9046, "step": 7055 }, { "epoch": 1.3947177731690863, "grad_norm": 2.125, "learning_rate": 5.522782053270414e-06, "loss": 1.0101, "step": 7056 }, { "epoch": 1.3949176682241822, "grad_norm": 2.046875, "learning_rate": 5.521734016224103e-06, "loss": 1.0308, "step": 7057 }, { "epoch": 1.3951175632792783, "grad_norm": 2.3125, "learning_rate": 5.5206859560024215e-06, "loss": 1.0799, "step": 7058 }, { "epoch": 1.3953174583343744, "grad_norm": 2.09375, "learning_rate": 5.519637872651926e-06, "loss": 0.932, "step": 7059 }, { "epoch": 1.3955173533894705, "grad_norm": 2.28125, "learning_rate": 5.518589766219173e-06, "loss": 1.0846, "step": 7060 }, { "epoch": 1.3957172484445666, "grad_norm": 2.109375, "learning_rate": 5.517541636750715e-06, "loss": 0.9484, "step": 7061 }, { "epoch": 1.3959171434996627, "grad_norm": 2.140625, "learning_rate": 5.516493484293114e-06, "loss": 1.0155, "step": 7062 }, { "epoch": 1.3961170385547588, "grad_norm": 2.140625, "learning_rate": 5.515445308892928e-06, "loss": 0.9161, "step": 7063 }, { "epoch": 1.3963169336098549, "grad_norm": 2.203125, "learning_rate": 5.514397110596716e-06, "loss": 0.9939, "step": 7064 }, { "epoch": 1.396516828664951, "grad_norm": 2.125, "learning_rate": 5.513348889451039e-06, "loss": 0.903, "step": 7065 }, { "epoch": 1.396716723720047, "grad_norm": 2.203125, "learning_rate": 5.512300645502462e-06, "loss": 0.9597, "step": 7066 }, { "epoch": 1.3969166187751432, "grad_norm": 2.125, "learning_rate": 5.511252378797542e-06, "loss": 0.9626, "step": 7067 }, { "epoch": 1.397116513830239, "grad_norm": 2.1875, "learning_rate": 5.510204089382847e-06, "loss": 1.0461, "step": 7068 }, { "epoch": 1.3973164088853351, "grad_norm": 2.078125, "learning_rate": 5.509155777304941e-06, "loss": 0.9781, "step": 7069 }, { "epoch": 1.3975163039404312, "grad_norm": 2.40625, "learning_rate": 5.508107442610388e-06, "loss": 1.0899, "step": 7070 }, { "epoch": 1.3977161989955273, "grad_norm": 2.09375, "learning_rate": 5.507059085345759e-06, "loss": 0.9567, "step": 7071 }, { "epoch": 1.3979160940506234, "grad_norm": 2.109375, "learning_rate": 5.506010705557618e-06, "loss": 0.9734, "step": 7072 }, { "epoch": 1.3981159891057195, "grad_norm": 2.125, "learning_rate": 5.5049623032925355e-06, "loss": 0.8962, "step": 7073 }, { "epoch": 1.3983158841608156, "grad_norm": 2.21875, "learning_rate": 5.503913878597082e-06, "loss": 0.9915, "step": 7074 }, { "epoch": 1.3985157792159115, "grad_norm": 2.203125, "learning_rate": 5.502865431517828e-06, "loss": 0.966, "step": 7075 }, { "epoch": 1.3987156742710076, "grad_norm": 2.265625, "learning_rate": 5.501816962101345e-06, "loss": 0.973, "step": 7076 }, { "epoch": 1.3989155693261037, "grad_norm": 2.25, "learning_rate": 5.500768470394207e-06, "loss": 1.0737, "step": 7077 }, { "epoch": 1.3991154643811998, "grad_norm": 2.078125, "learning_rate": 5.499719956442985e-06, "loss": 0.867, "step": 7078 }, { "epoch": 1.399315359436296, "grad_norm": 2.125, "learning_rate": 5.498671420294257e-06, "loss": 0.9767, "step": 7079 }, { "epoch": 1.399515254491392, "grad_norm": 2.21875, "learning_rate": 5.497622861994598e-06, "loss": 1.0266, "step": 7080 }, { "epoch": 1.3997151495464881, "grad_norm": 2.0625, "learning_rate": 5.496574281590585e-06, "loss": 0.9242, "step": 7081 }, { "epoch": 1.3999150446015842, "grad_norm": 2.015625, "learning_rate": 5.4955256791287946e-06, "loss": 0.9276, "step": 7082 }, { "epoch": 1.4001149396566803, "grad_norm": 2.296875, "learning_rate": 5.494477054655808e-06, "loss": 0.9677, "step": 7083 }, { "epoch": 1.4003148347117764, "grad_norm": 2.1875, "learning_rate": 5.493428408218202e-06, "loss": 0.9379, "step": 7084 }, { "epoch": 1.4005147297668725, "grad_norm": 2.203125, "learning_rate": 5.492379739862559e-06, "loss": 0.9599, "step": 7085 }, { "epoch": 1.4007146248219684, "grad_norm": 2.296875, "learning_rate": 5.491331049635461e-06, "loss": 1.0505, "step": 7086 }, { "epoch": 1.4009145198770645, "grad_norm": 2.109375, "learning_rate": 5.490282337583489e-06, "loss": 0.9282, "step": 7087 }, { "epoch": 1.4011144149321606, "grad_norm": 2.109375, "learning_rate": 5.489233603753228e-06, "loss": 0.8507, "step": 7088 }, { "epoch": 1.4013143099872567, "grad_norm": 2.15625, "learning_rate": 5.488184848191265e-06, "loss": 0.9696, "step": 7089 }, { "epoch": 1.4015142050423528, "grad_norm": 2.296875, "learning_rate": 5.48713607094418e-06, "loss": 0.9663, "step": 7090 }, { "epoch": 1.401714100097449, "grad_norm": 2.078125, "learning_rate": 5.486087272058566e-06, "loss": 0.987, "step": 7091 }, { "epoch": 1.4019139951525448, "grad_norm": 2.171875, "learning_rate": 5.485038451581004e-06, "loss": 0.9364, "step": 7092 }, { "epoch": 1.4021138902076409, "grad_norm": 2.03125, "learning_rate": 5.4839896095580865e-06, "loss": 0.9397, "step": 7093 }, { "epoch": 1.402313785262737, "grad_norm": 2.171875, "learning_rate": 5.4829407460364045e-06, "loss": 0.9994, "step": 7094 }, { "epoch": 1.402513680317833, "grad_norm": 2.046875, "learning_rate": 5.481891861062545e-06, "loss": 0.9481, "step": 7095 }, { "epoch": 1.4027135753729292, "grad_norm": 2.25, "learning_rate": 5.480842954683099e-06, "loss": 0.9541, "step": 7096 }, { "epoch": 1.4029134704280253, "grad_norm": 2.171875, "learning_rate": 5.479794026944663e-06, "loss": 1.041, "step": 7097 }, { "epoch": 1.4031133654831214, "grad_norm": 2.25, "learning_rate": 5.478745077893827e-06, "loss": 1.0423, "step": 7098 }, { "epoch": 1.4033132605382175, "grad_norm": 2.15625, "learning_rate": 5.477696107577184e-06, "loss": 1.0669, "step": 7099 }, { "epoch": 1.4035131555933136, "grad_norm": 2.09375, "learning_rate": 5.476647116041332e-06, "loss": 0.9499, "step": 7100 }, { "epoch": 1.4037130506484097, "grad_norm": 2.171875, "learning_rate": 5.4755981033328655e-06, "loss": 1.0964, "step": 7101 }, { "epoch": 1.4039129457035058, "grad_norm": 2.109375, "learning_rate": 5.474549069498381e-06, "loss": 0.8984, "step": 7102 }, { "epoch": 1.4041128407586017, "grad_norm": 2.15625, "learning_rate": 5.473500014584481e-06, "loss": 0.9057, "step": 7103 }, { "epoch": 1.4043127358136978, "grad_norm": 2.140625, "learning_rate": 5.472450938637758e-06, "loss": 0.9872, "step": 7104 }, { "epoch": 1.4045126308687939, "grad_norm": 2.125, "learning_rate": 5.471401841704816e-06, "loss": 0.9796, "step": 7105 }, { "epoch": 1.40471252592389, "grad_norm": 2.171875, "learning_rate": 5.470352723832254e-06, "loss": 1.0426, "step": 7106 }, { "epoch": 1.404912420978986, "grad_norm": 2.265625, "learning_rate": 5.4693035850666734e-06, "loss": 1.0271, "step": 7107 }, { "epoch": 1.4051123160340822, "grad_norm": 2.171875, "learning_rate": 5.46825442545468e-06, "loss": 0.9901, "step": 7108 }, { "epoch": 1.4053122110891783, "grad_norm": 2.234375, "learning_rate": 5.467205245042873e-06, "loss": 0.9917, "step": 7109 }, { "epoch": 1.4055121061442741, "grad_norm": 2.09375, "learning_rate": 5.466156043877859e-06, "loss": 1.0446, "step": 7110 }, { "epoch": 1.4057120011993702, "grad_norm": 2.171875, "learning_rate": 5.465106822006244e-06, "loss": 0.9722, "step": 7111 }, { "epoch": 1.4059118962544663, "grad_norm": 2.125, "learning_rate": 5.4640575794746335e-06, "loss": 1.0074, "step": 7112 }, { "epoch": 1.4061117913095624, "grad_norm": 2.296875, "learning_rate": 5.463008316329636e-06, "loss": 1.0497, "step": 7113 }, { "epoch": 1.4063116863646585, "grad_norm": 2.140625, "learning_rate": 5.461959032617857e-06, "loss": 1.0138, "step": 7114 }, { "epoch": 1.4065115814197546, "grad_norm": 2.40625, "learning_rate": 5.4609097283859084e-06, "loss": 0.9636, "step": 7115 }, { "epoch": 1.4067114764748507, "grad_norm": 2.15625, "learning_rate": 5.459860403680397e-06, "loss": 0.9554, "step": 7116 }, { "epoch": 1.4069113715299468, "grad_norm": 2.171875, "learning_rate": 5.458811058547937e-06, "loss": 1.0052, "step": 7117 }, { "epoch": 1.407111266585043, "grad_norm": 2.1875, "learning_rate": 5.457761693035139e-06, "loss": 1.023, "step": 7118 }, { "epoch": 1.407311161640139, "grad_norm": 2.15625, "learning_rate": 5.456712307188614e-06, "loss": 1.0921, "step": 7119 }, { "epoch": 1.4075110566952351, "grad_norm": 2.203125, "learning_rate": 5.4556629010549785e-06, "loss": 0.9256, "step": 7120 }, { "epoch": 1.407710951750331, "grad_norm": 2.046875, "learning_rate": 5.454613474680844e-06, "loss": 0.854, "step": 7121 }, { "epoch": 1.407910846805427, "grad_norm": 2.125, "learning_rate": 5.45356402811283e-06, "loss": 0.9923, "step": 7122 }, { "epoch": 1.4081107418605232, "grad_norm": 2.171875, "learning_rate": 5.452514561397549e-06, "loss": 0.9985, "step": 7123 }, { "epoch": 1.4083106369156193, "grad_norm": 2.15625, "learning_rate": 5.451465074581619e-06, "loss": 1.0087, "step": 7124 }, { "epoch": 1.4085105319707154, "grad_norm": 2.125, "learning_rate": 5.450415567711659e-06, "loss": 0.9521, "step": 7125 }, { "epoch": 1.4087104270258115, "grad_norm": 2.15625, "learning_rate": 5.449366040834287e-06, "loss": 0.9508, "step": 7126 }, { "epoch": 1.4089103220809076, "grad_norm": 2.15625, "learning_rate": 5.448316493996124e-06, "loss": 1.0183, "step": 7127 }, { "epoch": 1.4091102171360035, "grad_norm": 2.234375, "learning_rate": 5.447266927243792e-06, "loss": 1.042, "step": 7128 }, { "epoch": 1.4093101121910996, "grad_norm": 2.15625, "learning_rate": 5.446217340623909e-06, "loss": 1.0149, "step": 7129 }, { "epoch": 1.4095100072461957, "grad_norm": 2.203125, "learning_rate": 5.445167734183099e-06, "loss": 0.9286, "step": 7130 }, { "epoch": 1.4097099023012918, "grad_norm": 2.40625, "learning_rate": 5.444118107967987e-06, "loss": 1.0246, "step": 7131 }, { "epoch": 1.4099097973563879, "grad_norm": 2.609375, "learning_rate": 5.443068462025195e-06, "loss": 1.0767, "step": 7132 }, { "epoch": 1.410109692411484, "grad_norm": 2.1875, "learning_rate": 5.442018796401349e-06, "loss": 0.9486, "step": 7133 }, { "epoch": 1.41030958746658, "grad_norm": 2.1875, "learning_rate": 5.440969111143076e-06, "loss": 0.9949, "step": 7134 }, { "epoch": 1.4105094825216762, "grad_norm": 2.109375, "learning_rate": 5.439919406297002e-06, "loss": 0.8934, "step": 7135 }, { "epoch": 1.4107093775767723, "grad_norm": 2.140625, "learning_rate": 5.438869681909757e-06, "loss": 0.9253, "step": 7136 }, { "epoch": 1.4109092726318684, "grad_norm": 2.109375, "learning_rate": 5.437819938027964e-06, "loss": 0.8921, "step": 7137 }, { "epoch": 1.4111091676869643, "grad_norm": 2.125, "learning_rate": 5.436770174698257e-06, "loss": 1.0573, "step": 7138 }, { "epoch": 1.4113090627420604, "grad_norm": 1.984375, "learning_rate": 5.435720391967267e-06, "loss": 0.9288, "step": 7139 }, { "epoch": 1.4115089577971565, "grad_norm": 2.21875, "learning_rate": 5.434670589881623e-06, "loss": 0.9548, "step": 7140 }, { "epoch": 1.4117088528522526, "grad_norm": 2.203125, "learning_rate": 5.433620768487957e-06, "loss": 0.94, "step": 7141 }, { "epoch": 1.4119087479073487, "grad_norm": 2.109375, "learning_rate": 5.432570927832906e-06, "loss": 0.9368, "step": 7142 }, { "epoch": 1.4121086429624448, "grad_norm": 2.1875, "learning_rate": 5.431521067963097e-06, "loss": 0.9632, "step": 7143 }, { "epoch": 1.4123085380175409, "grad_norm": 2.125, "learning_rate": 5.430471188925169e-06, "loss": 0.9206, "step": 7144 }, { "epoch": 1.4125084330726367, "grad_norm": 2.25, "learning_rate": 5.4294212907657584e-06, "loss": 0.9516, "step": 7145 }, { "epoch": 1.4127083281277328, "grad_norm": 2.1875, "learning_rate": 5.428371373531498e-06, "loss": 0.9555, "step": 7146 }, { "epoch": 1.412908223182829, "grad_norm": 2.15625, "learning_rate": 5.427321437269027e-06, "loss": 0.9503, "step": 7147 }, { "epoch": 1.413108118237925, "grad_norm": 2.15625, "learning_rate": 5.4262714820249855e-06, "loss": 0.9878, "step": 7148 }, { "epoch": 1.4133080132930211, "grad_norm": 2.28125, "learning_rate": 5.425221507846008e-06, "loss": 1.0085, "step": 7149 }, { "epoch": 1.4135079083481172, "grad_norm": 2.34375, "learning_rate": 5.424171514778738e-06, "loss": 1.0855, "step": 7150 }, { "epoch": 1.4137078034032133, "grad_norm": 2.1875, "learning_rate": 5.423121502869814e-06, "loss": 1.0379, "step": 7151 }, { "epoch": 1.4139076984583094, "grad_norm": 2.359375, "learning_rate": 5.422071472165877e-06, "loss": 0.9778, "step": 7152 }, { "epoch": 1.4141075935134055, "grad_norm": 2.171875, "learning_rate": 5.421021422713573e-06, "loss": 0.9182, "step": 7153 }, { "epoch": 1.4143074885685016, "grad_norm": 2.15625, "learning_rate": 5.41997135455954e-06, "loss": 1.0276, "step": 7154 }, { "epoch": 1.4145073836235977, "grad_norm": 2.15625, "learning_rate": 5.418921267750425e-06, "loss": 0.9951, "step": 7155 }, { "epoch": 1.4147072786786936, "grad_norm": 2.234375, "learning_rate": 5.417871162332872e-06, "loss": 0.9143, "step": 7156 }, { "epoch": 1.4149071737337897, "grad_norm": 2.125, "learning_rate": 5.416821038353526e-06, "loss": 0.9185, "step": 7157 }, { "epoch": 1.4151070687888858, "grad_norm": 2.203125, "learning_rate": 5.415770895859034e-06, "loss": 0.9611, "step": 7158 }, { "epoch": 1.415306963843982, "grad_norm": 2.328125, "learning_rate": 5.4147207348960466e-06, "loss": 0.9433, "step": 7159 }, { "epoch": 1.415506858899078, "grad_norm": 2.296875, "learning_rate": 5.413670555511204e-06, "loss": 0.8962, "step": 7160 }, { "epoch": 1.4157067539541741, "grad_norm": 2.15625, "learning_rate": 5.412620357751161e-06, "loss": 0.9701, "step": 7161 }, { "epoch": 1.4159066490092702, "grad_norm": 2.15625, "learning_rate": 5.411570141662567e-06, "loss": 0.9616, "step": 7162 }, { "epoch": 1.416106544064366, "grad_norm": 2.34375, "learning_rate": 5.41051990729207e-06, "loss": 0.9964, "step": 7163 }, { "epoch": 1.4163064391194622, "grad_norm": 2.15625, "learning_rate": 5.409469654686323e-06, "loss": 0.9548, "step": 7164 }, { "epoch": 1.4165063341745583, "grad_norm": 2.265625, "learning_rate": 5.408419383891978e-06, "loss": 1.064, "step": 7165 }, { "epoch": 1.4167062292296544, "grad_norm": 2.109375, "learning_rate": 5.407369094955685e-06, "loss": 0.9294, "step": 7166 }, { "epoch": 1.4169061242847505, "grad_norm": 2.125, "learning_rate": 5.406318787924104e-06, "loss": 0.9913, "step": 7167 }, { "epoch": 1.4171060193398466, "grad_norm": 2.1875, "learning_rate": 5.4052684628438836e-06, "loss": 0.9951, "step": 7168 }, { "epoch": 1.4173059143949427, "grad_norm": 2.1875, "learning_rate": 5.404218119761682e-06, "loss": 1.0213, "step": 7169 }, { "epoch": 1.4175058094500388, "grad_norm": 2.15625, "learning_rate": 5.403167758724155e-06, "loss": 0.9688, "step": 7170 }, { "epoch": 1.417705704505135, "grad_norm": 2.296875, "learning_rate": 5.402117379777958e-06, "loss": 0.982, "step": 7171 }, { "epoch": 1.417905599560231, "grad_norm": 2.3125, "learning_rate": 5.401066982969751e-06, "loss": 0.9972, "step": 7172 }, { "epoch": 1.418105494615327, "grad_norm": 2.390625, "learning_rate": 5.400016568346192e-06, "loss": 0.9878, "step": 7173 }, { "epoch": 1.418305389670423, "grad_norm": 2.21875, "learning_rate": 5.398966135953938e-06, "loss": 0.9699, "step": 7174 }, { "epoch": 1.418505284725519, "grad_norm": 2.109375, "learning_rate": 5.397915685839652e-06, "loss": 0.9125, "step": 7175 }, { "epoch": 1.4187051797806152, "grad_norm": 2.140625, "learning_rate": 5.396865218049995e-06, "loss": 0.9142, "step": 7176 }, { "epoch": 1.4189050748357113, "grad_norm": 2.296875, "learning_rate": 5.395814732631625e-06, "loss": 0.9923, "step": 7177 }, { "epoch": 1.4191049698908074, "grad_norm": 2.265625, "learning_rate": 5.394764229631207e-06, "loss": 0.976, "step": 7178 }, { "epoch": 1.4193048649459035, "grad_norm": 2.21875, "learning_rate": 5.393713709095406e-06, "loss": 0.894, "step": 7179 }, { "epoch": 1.4195047600009993, "grad_norm": 2.140625, "learning_rate": 5.3926631710708835e-06, "loss": 0.956, "step": 7180 }, { "epoch": 1.4197046550560954, "grad_norm": 2.015625, "learning_rate": 5.3916126156043045e-06, "loss": 0.8711, "step": 7181 }, { "epoch": 1.4199045501111915, "grad_norm": 2.15625, "learning_rate": 5.3905620427423344e-06, "loss": 0.9661, "step": 7182 }, { "epoch": 1.4201044451662876, "grad_norm": 2.1875, "learning_rate": 5.38951145253164e-06, "loss": 0.963, "step": 7183 }, { "epoch": 1.4203043402213837, "grad_norm": 2.1875, "learning_rate": 5.388460845018889e-06, "loss": 1.0755, "step": 7184 }, { "epoch": 1.4205042352764798, "grad_norm": 2.21875, "learning_rate": 5.3874102202507485e-06, "loss": 0.934, "step": 7185 }, { "epoch": 1.420704130331576, "grad_norm": 2.046875, "learning_rate": 5.386359578273888e-06, "loss": 0.9272, "step": 7186 }, { "epoch": 1.420904025386672, "grad_norm": 2.09375, "learning_rate": 5.385308919134976e-06, "loss": 0.9468, "step": 7187 }, { "epoch": 1.4211039204417681, "grad_norm": 2.203125, "learning_rate": 5.384258242880682e-06, "loss": 1.0303, "step": 7188 }, { "epoch": 1.4213038154968642, "grad_norm": 2.359375, "learning_rate": 5.3832075495576794e-06, "loss": 0.9291, "step": 7189 }, { "epoch": 1.4215037105519603, "grad_norm": 2.25, "learning_rate": 5.382156839212639e-06, "loss": 0.9736, "step": 7190 }, { "epoch": 1.4217036056070562, "grad_norm": 2.234375, "learning_rate": 5.381106111892231e-06, "loss": 0.9767, "step": 7191 }, { "epoch": 1.4219035006621523, "grad_norm": 2.25, "learning_rate": 5.38005536764313e-06, "loss": 1.0116, "step": 7192 }, { "epoch": 1.4221033957172484, "grad_norm": 2.21875, "learning_rate": 5.3790046065120116e-06, "loss": 0.9492, "step": 7193 }, { "epoch": 1.4223032907723445, "grad_norm": 2.109375, "learning_rate": 5.377953828545548e-06, "loss": 0.9302, "step": 7194 }, { "epoch": 1.4225031858274406, "grad_norm": 2.140625, "learning_rate": 5.376903033790416e-06, "loss": 1.0104, "step": 7195 }, { "epoch": 1.4227030808825367, "grad_norm": 2.15625, "learning_rate": 5.375852222293292e-06, "loss": 0.9255, "step": 7196 }, { "epoch": 1.4229029759376328, "grad_norm": 2.125, "learning_rate": 5.374801394100851e-06, "loss": 0.9987, "step": 7197 }, { "epoch": 1.4231028709927287, "grad_norm": 2.140625, "learning_rate": 5.373750549259773e-06, "loss": 0.9395, "step": 7198 }, { "epoch": 1.4233027660478248, "grad_norm": 2.359375, "learning_rate": 5.372699687816736e-06, "loss": 0.8662, "step": 7199 }, { "epoch": 1.423502661102921, "grad_norm": 2.359375, "learning_rate": 5.3716488098184175e-06, "loss": 0.9063, "step": 7200 }, { "epoch": 1.423702556158017, "grad_norm": 2.203125, "learning_rate": 5.3705979153115e-06, "loss": 0.9422, "step": 7201 }, { "epoch": 1.423902451213113, "grad_norm": 2.25, "learning_rate": 5.369547004342661e-06, "loss": 0.9789, "step": 7202 }, { "epoch": 1.4241023462682092, "grad_norm": 2.078125, "learning_rate": 5.368496076958584e-06, "loss": 0.9639, "step": 7203 }, { "epoch": 1.4243022413233053, "grad_norm": 2.0625, "learning_rate": 5.367445133205952e-06, "loss": 0.9358, "step": 7204 }, { "epoch": 1.4245021363784014, "grad_norm": 2.203125, "learning_rate": 5.366394173131445e-06, "loss": 0.9495, "step": 7205 }, { "epoch": 1.4247020314334975, "grad_norm": 2.046875, "learning_rate": 5.365343196781749e-06, "loss": 0.813, "step": 7206 }, { "epoch": 1.4249019264885936, "grad_norm": 2.09375, "learning_rate": 5.364292204203548e-06, "loss": 1.0009, "step": 7207 }, { "epoch": 1.4251018215436897, "grad_norm": 2.046875, "learning_rate": 5.363241195443524e-06, "loss": 1.0062, "step": 7208 }, { "epoch": 1.4253017165987856, "grad_norm": 2.046875, "learning_rate": 5.362190170548365e-06, "loss": 0.9212, "step": 7209 }, { "epoch": 1.4255016116538817, "grad_norm": 2.0625, "learning_rate": 5.3611391295647585e-06, "loss": 0.8932, "step": 7210 }, { "epoch": 1.4257015067089778, "grad_norm": 2.234375, "learning_rate": 5.36008807253939e-06, "loss": 0.9705, "step": 7211 }, { "epoch": 1.4259014017640739, "grad_norm": 2.09375, "learning_rate": 5.359036999518948e-06, "loss": 0.9085, "step": 7212 }, { "epoch": 1.42610129681917, "grad_norm": 2.28125, "learning_rate": 5.35798591055012e-06, "loss": 0.9985, "step": 7213 }, { "epoch": 1.426301191874266, "grad_norm": 2.28125, "learning_rate": 5.356934805679597e-06, "loss": 0.8962, "step": 7214 }, { "epoch": 1.426501086929362, "grad_norm": 2.28125, "learning_rate": 5.355883684954068e-06, "loss": 0.9946, "step": 7215 }, { "epoch": 1.426700981984458, "grad_norm": 2.234375, "learning_rate": 5.354832548420222e-06, "loss": 1.0097, "step": 7216 }, { "epoch": 1.4269008770395541, "grad_norm": 2.15625, "learning_rate": 5.3537813961247546e-06, "loss": 0.9663, "step": 7217 }, { "epoch": 1.4271007720946502, "grad_norm": 2.140625, "learning_rate": 5.352730228114354e-06, "loss": 0.9505, "step": 7218 }, { "epoch": 1.4273006671497463, "grad_norm": 2.25, "learning_rate": 5.351679044435714e-06, "loss": 1.0214, "step": 7219 }, { "epoch": 1.4275005622048424, "grad_norm": 2.25, "learning_rate": 5.35062784513553e-06, "loss": 1.0575, "step": 7220 }, { "epoch": 1.4277004572599385, "grad_norm": 2.25, "learning_rate": 5.3495766302604945e-06, "loss": 0.9601, "step": 7221 }, { "epoch": 1.4279003523150346, "grad_norm": 2.015625, "learning_rate": 5.348525399857301e-06, "loss": 0.9333, "step": 7222 }, { "epoch": 1.4281002473701307, "grad_norm": 2.109375, "learning_rate": 5.347474153972647e-06, "loss": 1.025, "step": 7223 }, { "epoch": 1.4283001424252268, "grad_norm": 2.15625, "learning_rate": 5.346422892653229e-06, "loss": 0.9268, "step": 7224 }, { "epoch": 1.428500037480323, "grad_norm": 2.25, "learning_rate": 5.345371615945742e-06, "loss": 0.971, "step": 7225 }, { "epoch": 1.4286999325354188, "grad_norm": 2.265625, "learning_rate": 5.344320323896886e-06, "loss": 0.9422, "step": 7226 }, { "epoch": 1.428899827590515, "grad_norm": 2.203125, "learning_rate": 5.343269016553355e-06, "loss": 0.9879, "step": 7227 }, { "epoch": 1.429099722645611, "grad_norm": 2.140625, "learning_rate": 5.342217693961853e-06, "loss": 0.9466, "step": 7228 }, { "epoch": 1.4292996177007071, "grad_norm": 2.125, "learning_rate": 5.341166356169079e-06, "loss": 0.9764, "step": 7229 }, { "epoch": 1.4294995127558032, "grad_norm": 2.15625, "learning_rate": 5.34011500322173e-06, "loss": 0.9754, "step": 7230 }, { "epoch": 1.4296994078108993, "grad_norm": 2.1875, "learning_rate": 5.339063635166508e-06, "loss": 1.0708, "step": 7231 }, { "epoch": 1.4298993028659954, "grad_norm": 2.140625, "learning_rate": 5.338012252050118e-06, "loss": 1.0137, "step": 7232 }, { "epoch": 1.4300991979210913, "grad_norm": 2.03125, "learning_rate": 5.336960853919259e-06, "loss": 0.9512, "step": 7233 }, { "epoch": 1.4302990929761874, "grad_norm": 1.9921875, "learning_rate": 5.335909440820635e-06, "loss": 0.861, "step": 7234 }, { "epoch": 1.4304989880312835, "grad_norm": 2.296875, "learning_rate": 5.334858012800948e-06, "loss": 1.0068, "step": 7235 }, { "epoch": 1.4306988830863796, "grad_norm": 2.15625, "learning_rate": 5.333806569906904e-06, "loss": 0.959, "step": 7236 }, { "epoch": 1.4308987781414757, "grad_norm": 2.046875, "learning_rate": 5.3327551121852095e-06, "loss": 0.8968, "step": 7237 }, { "epoch": 1.4310986731965718, "grad_norm": 2.03125, "learning_rate": 5.331703639682568e-06, "loss": 0.949, "step": 7238 }, { "epoch": 1.431298568251668, "grad_norm": 2.125, "learning_rate": 5.330652152445686e-06, "loss": 0.9953, "step": 7239 }, { "epoch": 1.431498463306764, "grad_norm": 2.09375, "learning_rate": 5.329600650521272e-06, "loss": 0.9063, "step": 7240 }, { "epoch": 1.43169835836186, "grad_norm": 2.21875, "learning_rate": 5.32854913395603e-06, "loss": 0.9747, "step": 7241 }, { "epoch": 1.4318982534169562, "grad_norm": 2.171875, "learning_rate": 5.327497602796671e-06, "loss": 1.0319, "step": 7242 }, { "epoch": 1.4320981484720523, "grad_norm": 2.21875, "learning_rate": 5.326446057089905e-06, "loss": 0.9916, "step": 7243 }, { "epoch": 1.4322980435271482, "grad_norm": 2.234375, "learning_rate": 5.325394496882439e-06, "loss": 0.9394, "step": 7244 }, { "epoch": 1.4324979385822443, "grad_norm": 2.203125, "learning_rate": 5.324342922220983e-06, "loss": 0.9727, "step": 7245 }, { "epoch": 1.4326978336373404, "grad_norm": 2.09375, "learning_rate": 5.323291333152251e-06, "loss": 1.0183, "step": 7246 }, { "epoch": 1.4328977286924365, "grad_norm": 2.140625, "learning_rate": 5.322239729722951e-06, "loss": 0.9756, "step": 7247 }, { "epoch": 1.4330976237475326, "grad_norm": 2.046875, "learning_rate": 5.321188111979797e-06, "loss": 0.9, "step": 7248 }, { "epoch": 1.4332975188026287, "grad_norm": 2.046875, "learning_rate": 5.320136479969502e-06, "loss": 0.892, "step": 7249 }, { "epoch": 1.4334974138577246, "grad_norm": 2.09375, "learning_rate": 5.319084833738779e-06, "loss": 0.9366, "step": 7250 }, { "epoch": 1.4336973089128207, "grad_norm": 2.1875, "learning_rate": 5.318033173334341e-06, "loss": 0.9634, "step": 7251 }, { "epoch": 1.4338972039679168, "grad_norm": 2.1875, "learning_rate": 5.316981498802905e-06, "loss": 0.8643, "step": 7252 }, { "epoch": 1.4340970990230129, "grad_norm": 2.078125, "learning_rate": 5.315929810191183e-06, "loss": 0.977, "step": 7253 }, { "epoch": 1.434296994078109, "grad_norm": 2.125, "learning_rate": 5.3148781075458924e-06, "loss": 0.9984, "step": 7254 }, { "epoch": 1.434496889133205, "grad_norm": 2.203125, "learning_rate": 5.313826390913751e-06, "loss": 0.9491, "step": 7255 }, { "epoch": 1.4346967841883012, "grad_norm": 2.21875, "learning_rate": 5.312774660341473e-06, "loss": 1.0773, "step": 7256 }, { "epoch": 1.4348966792433973, "grad_norm": 2.1875, "learning_rate": 5.311722915875781e-06, "loss": 1.0079, "step": 7257 }, { "epoch": 1.4350965742984934, "grad_norm": 2.265625, "learning_rate": 5.310671157563387e-06, "loss": 1.0116, "step": 7258 }, { "epoch": 1.4352964693535895, "grad_norm": 2.15625, "learning_rate": 5.309619385451016e-06, "loss": 1.018, "step": 7259 }, { "epoch": 1.4354963644086856, "grad_norm": 2.203125, "learning_rate": 5.308567599585384e-06, "loss": 0.965, "step": 7260 }, { "epoch": 1.4356962594637814, "grad_norm": 2.234375, "learning_rate": 5.307515800013212e-06, "loss": 1.0097, "step": 7261 }, { "epoch": 1.4358961545188775, "grad_norm": 2.125, "learning_rate": 5.3064639867812205e-06, "loss": 1.0474, "step": 7262 }, { "epoch": 1.4360960495739736, "grad_norm": 2.046875, "learning_rate": 5.305412159936133e-06, "loss": 0.9374, "step": 7263 }, { "epoch": 1.4362959446290697, "grad_norm": 2.15625, "learning_rate": 5.3043603195246684e-06, "loss": 0.9425, "step": 7264 }, { "epoch": 1.4364958396841658, "grad_norm": 2.125, "learning_rate": 5.303308465593552e-06, "loss": 0.9616, "step": 7265 }, { "epoch": 1.436695734739262, "grad_norm": 2.125, "learning_rate": 5.3022565981895045e-06, "loss": 0.994, "step": 7266 }, { "epoch": 1.436895629794358, "grad_norm": 2.234375, "learning_rate": 5.301204717359253e-06, "loss": 1.0014, "step": 7267 }, { "epoch": 1.437095524849454, "grad_norm": 2.203125, "learning_rate": 5.300152823149519e-06, "loss": 1.0367, "step": 7268 }, { "epoch": 1.43729541990455, "grad_norm": 2.15625, "learning_rate": 5.299100915607029e-06, "loss": 0.8804, "step": 7269 }, { "epoch": 1.437495314959646, "grad_norm": 2.09375, "learning_rate": 5.298048994778508e-06, "loss": 0.8828, "step": 7270 }, { "epoch": 1.4376952100147422, "grad_norm": 2.125, "learning_rate": 5.296997060710684e-06, "loss": 0.8776, "step": 7271 }, { "epoch": 1.4378951050698383, "grad_norm": 2.203125, "learning_rate": 5.29594511345028e-06, "loss": 0.98, "step": 7272 }, { "epoch": 1.4380950001249344, "grad_norm": 2.265625, "learning_rate": 5.294893153044027e-06, "loss": 1.0607, "step": 7273 }, { "epoch": 1.4382948951800305, "grad_norm": 2.34375, "learning_rate": 5.2938411795386516e-06, "loss": 0.9884, "step": 7274 }, { "epoch": 1.4384947902351266, "grad_norm": 2.265625, "learning_rate": 5.292789192980882e-06, "loss": 1.0028, "step": 7275 }, { "epoch": 1.4386946852902227, "grad_norm": 2.234375, "learning_rate": 5.291737193417448e-06, "loss": 0.9676, "step": 7276 }, { "epoch": 1.4388945803453188, "grad_norm": 2.234375, "learning_rate": 5.290685180895078e-06, "loss": 0.9076, "step": 7277 }, { "epoch": 1.439094475400415, "grad_norm": 2.15625, "learning_rate": 5.289633155460504e-06, "loss": 0.9082, "step": 7278 }, { "epoch": 1.4392943704555108, "grad_norm": 2.109375, "learning_rate": 5.288581117160457e-06, "loss": 0.9861, "step": 7279 }, { "epoch": 1.4394942655106069, "grad_norm": 2.296875, "learning_rate": 5.287529066041666e-06, "loss": 0.9693, "step": 7280 }, { "epoch": 1.439694160565703, "grad_norm": 2.1875, "learning_rate": 5.286477002150866e-06, "loss": 0.9659, "step": 7281 }, { "epoch": 1.439894055620799, "grad_norm": 2.03125, "learning_rate": 5.285424925534788e-06, "loss": 0.8919, "step": 7282 }, { "epoch": 1.4400939506758952, "grad_norm": 2.1875, "learning_rate": 5.284372836240166e-06, "loss": 0.9426, "step": 7283 }, { "epoch": 1.4402938457309913, "grad_norm": 2.15625, "learning_rate": 5.283320734313732e-06, "loss": 0.9294, "step": 7284 }, { "epoch": 1.4404937407860874, "grad_norm": 2.1875, "learning_rate": 5.282268619802221e-06, "loss": 1.0199, "step": 7285 }, { "epoch": 1.4406936358411833, "grad_norm": 2.109375, "learning_rate": 5.281216492752368e-06, "loss": 1.0224, "step": 7286 }, { "epoch": 1.4408935308962794, "grad_norm": 2.1875, "learning_rate": 5.280164353210908e-06, "loss": 0.851, "step": 7287 }, { "epoch": 1.4410934259513755, "grad_norm": 2.1875, "learning_rate": 5.279112201224579e-06, "loss": 1.0081, "step": 7288 }, { "epoch": 1.4412933210064716, "grad_norm": 2.0625, "learning_rate": 5.278060036840115e-06, "loss": 0.9771, "step": 7289 }, { "epoch": 1.4414932160615677, "grad_norm": 2.15625, "learning_rate": 5.2770078601042526e-06, "loss": 0.9269, "step": 7290 }, { "epoch": 1.4416931111166638, "grad_norm": 2.0625, "learning_rate": 5.275955671063733e-06, "loss": 0.9333, "step": 7291 }, { "epoch": 1.4418930061717599, "grad_norm": 2.15625, "learning_rate": 5.27490346976529e-06, "loss": 0.9411, "step": 7292 }, { "epoch": 1.442092901226856, "grad_norm": 2.28125, "learning_rate": 5.273851256255664e-06, "loss": 1.0253, "step": 7293 }, { "epoch": 1.442292796281952, "grad_norm": 2.28125, "learning_rate": 5.272799030581597e-06, "loss": 0.947, "step": 7294 }, { "epoch": 1.4424926913370482, "grad_norm": 2.078125, "learning_rate": 5.271746792789824e-06, "loss": 0.9798, "step": 7295 }, { "epoch": 1.4426925863921443, "grad_norm": 2.125, "learning_rate": 5.270694542927089e-06, "loss": 0.947, "step": 7296 }, { "epoch": 1.4428924814472401, "grad_norm": 2.171875, "learning_rate": 5.269642281040129e-06, "loss": 0.8852, "step": 7297 }, { "epoch": 1.4430923765023362, "grad_norm": 2.15625, "learning_rate": 5.26859000717569e-06, "loss": 0.9008, "step": 7298 }, { "epoch": 1.4432922715574323, "grad_norm": 2.125, "learning_rate": 5.26753772138051e-06, "loss": 1.0346, "step": 7299 }, { "epoch": 1.4434921666125284, "grad_norm": 2.265625, "learning_rate": 5.266485423701335e-06, "loss": 1.0262, "step": 7300 }, { "epoch": 1.4436920616676245, "grad_norm": 2.171875, "learning_rate": 5.265433114184903e-06, "loss": 1.0945, "step": 7301 }, { "epoch": 1.4438919567227206, "grad_norm": 2.109375, "learning_rate": 5.264380792877963e-06, "loss": 0.9568, "step": 7302 }, { "epoch": 1.4440918517778165, "grad_norm": 2.15625, "learning_rate": 5.263328459827256e-06, "loss": 0.9913, "step": 7303 }, { "epoch": 1.4442917468329126, "grad_norm": 2.28125, "learning_rate": 5.262276115079526e-06, "loss": 1.01, "step": 7304 }, { "epoch": 1.4444916418880087, "grad_norm": 2.0625, "learning_rate": 5.26122375868152e-06, "loss": 0.9188, "step": 7305 }, { "epoch": 1.4446915369431048, "grad_norm": 2.15625, "learning_rate": 5.260171390679981e-06, "loss": 1.0598, "step": 7306 }, { "epoch": 1.444891431998201, "grad_norm": 2.25, "learning_rate": 5.2591190111216574e-06, "loss": 0.9469, "step": 7307 }, { "epoch": 1.445091327053297, "grad_norm": 2.125, "learning_rate": 5.258066620053295e-06, "loss": 0.918, "step": 7308 }, { "epoch": 1.4452912221083931, "grad_norm": 2.34375, "learning_rate": 5.257014217521641e-06, "loss": 1.0795, "step": 7309 }, { "epoch": 1.4454911171634892, "grad_norm": 2.234375, "learning_rate": 5.255961803573444e-06, "loss": 1.0236, "step": 7310 }, { "epoch": 1.4456910122185853, "grad_norm": 2.25, "learning_rate": 5.254909378255448e-06, "loss": 1.0371, "step": 7311 }, { "epoch": 1.4458909072736814, "grad_norm": 2.0625, "learning_rate": 5.2538569416144065e-06, "loss": 0.8564, "step": 7312 }, { "epoch": 1.4460908023287775, "grad_norm": 2.3125, "learning_rate": 5.252804493697068e-06, "loss": 1.0691, "step": 7313 }, { "epoch": 1.4462906973838734, "grad_norm": 2.25, "learning_rate": 5.251752034550181e-06, "loss": 1.0646, "step": 7314 }, { "epoch": 1.4464905924389695, "grad_norm": 2.21875, "learning_rate": 5.250699564220493e-06, "loss": 0.9689, "step": 7315 }, { "epoch": 1.4466904874940656, "grad_norm": 2.21875, "learning_rate": 5.24964708275476e-06, "loss": 1.0675, "step": 7316 }, { "epoch": 1.4468903825491617, "grad_norm": 2.09375, "learning_rate": 5.248594590199727e-06, "loss": 0.9403, "step": 7317 }, { "epoch": 1.4470902776042578, "grad_norm": 2.03125, "learning_rate": 5.247542086602151e-06, "loss": 0.8519, "step": 7318 }, { "epoch": 1.447290172659354, "grad_norm": 2.171875, "learning_rate": 5.2464895720087816e-06, "loss": 0.908, "step": 7319 }, { "epoch": 1.44749006771445, "grad_norm": 2.140625, "learning_rate": 5.245437046466371e-06, "loss": 0.9291, "step": 7320 }, { "epoch": 1.4476899627695459, "grad_norm": 2.234375, "learning_rate": 5.244384510021673e-06, "loss": 0.9848, "step": 7321 }, { "epoch": 1.447889857824642, "grad_norm": 2.140625, "learning_rate": 5.243331962721443e-06, "loss": 0.9379, "step": 7322 }, { "epoch": 1.448089752879738, "grad_norm": 1.90625, "learning_rate": 5.242279404612431e-06, "loss": 0.8042, "step": 7323 }, { "epoch": 1.4482896479348342, "grad_norm": 2.125, "learning_rate": 5.241226835741395e-06, "loss": 0.943, "step": 7324 }, { "epoch": 1.4484895429899303, "grad_norm": 2.015625, "learning_rate": 5.240174256155088e-06, "loss": 0.8862, "step": 7325 }, { "epoch": 1.4486894380450264, "grad_norm": 2.1875, "learning_rate": 5.239121665900266e-06, "loss": 0.9228, "step": 7326 }, { "epoch": 1.4488893331001225, "grad_norm": 2.140625, "learning_rate": 5.238069065023686e-06, "loss": 1.0576, "step": 7327 }, { "epoch": 1.4490892281552186, "grad_norm": 2.1875, "learning_rate": 5.237016453572103e-06, "loss": 0.8776, "step": 7328 }, { "epoch": 1.4492891232103147, "grad_norm": 2.078125, "learning_rate": 5.235963831592275e-06, "loss": 1.0057, "step": 7329 }, { "epoch": 1.4494890182654108, "grad_norm": 2.3125, "learning_rate": 5.23491119913096e-06, "loss": 1.0676, "step": 7330 }, { "epoch": 1.4496889133205069, "grad_norm": 2.140625, "learning_rate": 5.233858556234913e-06, "loss": 0.9775, "step": 7331 }, { "epoch": 1.4498888083756027, "grad_norm": 2.28125, "learning_rate": 5.232805902950894e-06, "loss": 1.0474, "step": 7332 }, { "epoch": 1.4500887034306988, "grad_norm": 2.109375, "learning_rate": 5.231753239325662e-06, "loss": 0.9413, "step": 7333 }, { "epoch": 1.450288598485795, "grad_norm": 2.203125, "learning_rate": 5.230700565405975e-06, "loss": 0.9317, "step": 7334 }, { "epoch": 1.450488493540891, "grad_norm": 2.25, "learning_rate": 5.229647881238594e-06, "loss": 1.0024, "step": 7335 }, { "epoch": 1.4506883885959871, "grad_norm": 2.203125, "learning_rate": 5.228595186870279e-06, "loss": 0.9927, "step": 7336 }, { "epoch": 1.4508882836510832, "grad_norm": 2.1875, "learning_rate": 5.227542482347789e-06, "loss": 0.9571, "step": 7337 }, { "epoch": 1.4510881787061791, "grad_norm": 2.125, "learning_rate": 5.226489767717887e-06, "loss": 0.9475, "step": 7338 }, { "epoch": 1.4512880737612752, "grad_norm": 2.140625, "learning_rate": 5.225437043027335e-06, "loss": 1.0594, "step": 7339 }, { "epoch": 1.4514879688163713, "grad_norm": 2.046875, "learning_rate": 5.224384308322892e-06, "loss": 0.9649, "step": 7340 }, { "epoch": 1.4516878638714674, "grad_norm": 2.046875, "learning_rate": 5.2233315636513225e-06, "loss": 1.0089, "step": 7341 }, { "epoch": 1.4518877589265635, "grad_norm": 2.125, "learning_rate": 5.222278809059389e-06, "loss": 1.0211, "step": 7342 }, { "epoch": 1.4520876539816596, "grad_norm": 2.046875, "learning_rate": 5.221226044593855e-06, "loss": 0.8659, "step": 7343 }, { "epoch": 1.4522875490367557, "grad_norm": 2.15625, "learning_rate": 5.220173270301483e-06, "loss": 0.9456, "step": 7344 }, { "epoch": 1.4524874440918518, "grad_norm": 2.140625, "learning_rate": 5.219120486229038e-06, "loss": 0.9313, "step": 7345 }, { "epoch": 1.452687339146948, "grad_norm": 2.125, "learning_rate": 5.2180676924232846e-06, "loss": 0.9382, "step": 7346 }, { "epoch": 1.452887234202044, "grad_norm": 2.140625, "learning_rate": 5.217014888930988e-06, "loss": 0.9936, "step": 7347 }, { "epoch": 1.4530871292571401, "grad_norm": 2.125, "learning_rate": 5.215962075798912e-06, "loss": 1.0392, "step": 7348 }, { "epoch": 1.453287024312236, "grad_norm": 2.203125, "learning_rate": 5.214909253073823e-06, "loss": 1.0348, "step": 7349 }, { "epoch": 1.453486919367332, "grad_norm": 2.140625, "learning_rate": 5.21385642080249e-06, "loss": 0.8826, "step": 7350 }, { "epoch": 1.4536868144224282, "grad_norm": 2.234375, "learning_rate": 5.212803579031676e-06, "loss": 0.9775, "step": 7351 }, { "epoch": 1.4538867094775243, "grad_norm": 1.96875, "learning_rate": 5.21175072780815e-06, "loss": 0.8595, "step": 7352 }, { "epoch": 1.4540866045326204, "grad_norm": 2.359375, "learning_rate": 5.2106978671786815e-06, "loss": 0.9054, "step": 7353 }, { "epoch": 1.4542864995877165, "grad_norm": 2.171875, "learning_rate": 5.209644997190033e-06, "loss": 0.9559, "step": 7354 }, { "epoch": 1.4544863946428126, "grad_norm": 2.09375, "learning_rate": 5.2085921178889785e-06, "loss": 0.9998, "step": 7355 }, { "epoch": 1.4546862896979085, "grad_norm": 2.203125, "learning_rate": 5.207539229322284e-06, "loss": 1.0, "step": 7356 }, { "epoch": 1.4548861847530046, "grad_norm": 2.125, "learning_rate": 5.206486331536718e-06, "loss": 1.0222, "step": 7357 }, { "epoch": 1.4550860798081007, "grad_norm": 2.203125, "learning_rate": 5.205433424579052e-06, "loss": 0.9159, "step": 7358 }, { "epoch": 1.4552859748631968, "grad_norm": 2.140625, "learning_rate": 5.204380508496055e-06, "loss": 0.9389, "step": 7359 }, { "epoch": 1.4554858699182929, "grad_norm": 2.203125, "learning_rate": 5.203327583334499e-06, "loss": 1.0703, "step": 7360 }, { "epoch": 1.455685764973389, "grad_norm": 2.140625, "learning_rate": 5.202274649141152e-06, "loss": 0.9504, "step": 7361 }, { "epoch": 1.455885660028485, "grad_norm": 2.28125, "learning_rate": 5.2012217059627865e-06, "loss": 1.0219, "step": 7362 }, { "epoch": 1.4560855550835812, "grad_norm": 2.09375, "learning_rate": 5.200168753846174e-06, "loss": 0.9764, "step": 7363 }, { "epoch": 1.4562854501386773, "grad_norm": 2.125, "learning_rate": 5.199115792838089e-06, "loss": 1.0029, "step": 7364 }, { "epoch": 1.4564853451937734, "grad_norm": 2.265625, "learning_rate": 5.1980628229852995e-06, "loss": 0.9554, "step": 7365 }, { "epoch": 1.4566852402488695, "grad_norm": 2.3125, "learning_rate": 5.197009844334581e-06, "loss": 0.9349, "step": 7366 }, { "epoch": 1.4568851353039654, "grad_norm": 2.109375, "learning_rate": 5.195956856932707e-06, "loss": 0.9816, "step": 7367 }, { "epoch": 1.4570850303590615, "grad_norm": 2.1875, "learning_rate": 5.194903860826449e-06, "loss": 1.0072, "step": 7368 }, { "epoch": 1.4572849254141576, "grad_norm": 2.109375, "learning_rate": 5.193850856062585e-06, "loss": 0.9783, "step": 7369 }, { "epoch": 1.4574848204692537, "grad_norm": 2.078125, "learning_rate": 5.192797842687883e-06, "loss": 0.8432, "step": 7370 }, { "epoch": 1.4576847155243498, "grad_norm": 2.125, "learning_rate": 5.191744820749124e-06, "loss": 0.8715, "step": 7371 }, { "epoch": 1.4578846105794458, "grad_norm": 2.234375, "learning_rate": 5.1906917902930795e-06, "loss": 0.9895, "step": 7372 }, { "epoch": 1.4580845056345417, "grad_norm": 2.21875, "learning_rate": 5.189638751366527e-06, "loss": 0.9933, "step": 7373 }, { "epoch": 1.4582844006896378, "grad_norm": 2.15625, "learning_rate": 5.1885857040162406e-06, "loss": 1.0211, "step": 7374 }, { "epoch": 1.458484295744734, "grad_norm": 2.296875, "learning_rate": 5.187532648288997e-06, "loss": 0.9895, "step": 7375 }, { "epoch": 1.45868419079983, "grad_norm": 2.140625, "learning_rate": 5.186479584231575e-06, "loss": 1.005, "step": 7376 }, { "epoch": 1.4588840858549261, "grad_norm": 2.140625, "learning_rate": 5.185426511890749e-06, "loss": 1.0098, "step": 7377 }, { "epoch": 1.4590839809100222, "grad_norm": 2.296875, "learning_rate": 5.184373431313297e-06, "loss": 0.9266, "step": 7378 }, { "epoch": 1.4592838759651183, "grad_norm": 1.96875, "learning_rate": 5.183320342545995e-06, "loss": 0.9569, "step": 7379 }, { "epoch": 1.4594837710202144, "grad_norm": 2.046875, "learning_rate": 5.1822672456356245e-06, "loss": 0.9165, "step": 7380 }, { "epoch": 1.4596836660753105, "grad_norm": 2.109375, "learning_rate": 5.181214140628964e-06, "loss": 0.9835, "step": 7381 }, { "epoch": 1.4598835611304066, "grad_norm": 2.15625, "learning_rate": 5.180161027572788e-06, "loss": 0.9744, "step": 7382 }, { "epoch": 1.4600834561855027, "grad_norm": 2.140625, "learning_rate": 5.179107906513879e-06, "loss": 0.974, "step": 7383 }, { "epoch": 1.4602833512405986, "grad_norm": 2.109375, "learning_rate": 5.178054777499018e-06, "loss": 0.9033, "step": 7384 }, { "epoch": 1.4604832462956947, "grad_norm": 2.0625, "learning_rate": 5.177001640574981e-06, "loss": 0.9548, "step": 7385 }, { "epoch": 1.4606831413507908, "grad_norm": 2.078125, "learning_rate": 5.1759484957885505e-06, "loss": 0.9575, "step": 7386 }, { "epoch": 1.460883036405887, "grad_norm": 2.265625, "learning_rate": 5.1748953431865064e-06, "loss": 1.0753, "step": 7387 }, { "epoch": 1.461082931460983, "grad_norm": 2.15625, "learning_rate": 5.1738421828156295e-06, "loss": 0.988, "step": 7388 }, { "epoch": 1.461282826516079, "grad_norm": 2.125, "learning_rate": 5.172789014722702e-06, "loss": 0.9311, "step": 7389 }, { "epoch": 1.4614827215711752, "grad_norm": 2.140625, "learning_rate": 5.171735838954505e-06, "loss": 0.9836, "step": 7390 }, { "epoch": 1.461682616626271, "grad_norm": 2.09375, "learning_rate": 5.17068265555782e-06, "loss": 0.9558, "step": 7391 }, { "epoch": 1.4618825116813672, "grad_norm": 2.203125, "learning_rate": 5.169629464579431e-06, "loss": 0.9883, "step": 7392 }, { "epoch": 1.4620824067364633, "grad_norm": 2.078125, "learning_rate": 5.168576266066118e-06, "loss": 0.9813, "step": 7393 }, { "epoch": 1.4622823017915594, "grad_norm": 2.109375, "learning_rate": 5.1675230600646645e-06, "loss": 0.898, "step": 7394 }, { "epoch": 1.4624821968466555, "grad_norm": 2.171875, "learning_rate": 5.166469846621856e-06, "loss": 0.9725, "step": 7395 }, { "epoch": 1.4626820919017516, "grad_norm": 2.265625, "learning_rate": 5.165416625784474e-06, "loss": 0.9968, "step": 7396 }, { "epoch": 1.4628819869568477, "grad_norm": 2.328125, "learning_rate": 5.164363397599302e-06, "loss": 0.9762, "step": 7397 }, { "epoch": 1.4630818820119438, "grad_norm": 2.109375, "learning_rate": 5.163310162113128e-06, "loss": 1.0231, "step": 7398 }, { "epoch": 1.4632817770670399, "grad_norm": 2.234375, "learning_rate": 5.162256919372732e-06, "loss": 1.0243, "step": 7399 }, { "epoch": 1.463481672122136, "grad_norm": 2.078125, "learning_rate": 5.1612036694249025e-06, "loss": 0.9186, "step": 7400 }, { "epoch": 1.463681567177232, "grad_norm": 2.171875, "learning_rate": 5.160150412316421e-06, "loss": 0.9296, "step": 7401 }, { "epoch": 1.463881462232328, "grad_norm": 2.15625, "learning_rate": 5.159097148094076e-06, "loss": 0.8964, "step": 7402 }, { "epoch": 1.464081357287424, "grad_norm": 2.171875, "learning_rate": 5.158043876804655e-06, "loss": 1.0496, "step": 7403 }, { "epoch": 1.4642812523425202, "grad_norm": 2.1875, "learning_rate": 5.15699059849494e-06, "loss": 0.9805, "step": 7404 }, { "epoch": 1.4644811473976163, "grad_norm": 2.109375, "learning_rate": 5.155937313211719e-06, "loss": 0.8961, "step": 7405 }, { "epoch": 1.4646810424527124, "grad_norm": 2.109375, "learning_rate": 5.15488402100178e-06, "loss": 0.9394, "step": 7406 }, { "epoch": 1.4648809375078085, "grad_norm": 2.296875, "learning_rate": 5.15383072191191e-06, "loss": 0.9571, "step": 7407 }, { "epoch": 1.4650808325629046, "grad_norm": 2.078125, "learning_rate": 5.152777415988894e-06, "loss": 0.9525, "step": 7408 }, { "epoch": 1.4652807276180004, "grad_norm": 2.109375, "learning_rate": 5.151724103279524e-06, "loss": 0.9849, "step": 7409 }, { "epoch": 1.4654806226730965, "grad_norm": 2.140625, "learning_rate": 5.150670783830584e-06, "loss": 0.9647, "step": 7410 }, { "epoch": 1.4656805177281926, "grad_norm": 2.375, "learning_rate": 5.149617457688862e-06, "loss": 0.9923, "step": 7411 }, { "epoch": 1.4658804127832887, "grad_norm": 2.359375, "learning_rate": 5.148564124901152e-06, "loss": 0.9533, "step": 7412 }, { "epoch": 1.4660803078383848, "grad_norm": 2.0625, "learning_rate": 5.147510785514237e-06, "loss": 0.8886, "step": 7413 }, { "epoch": 1.466280202893481, "grad_norm": 2.25, "learning_rate": 5.14645743957491e-06, "loss": 1.0243, "step": 7414 }, { "epoch": 1.466480097948577, "grad_norm": 2.265625, "learning_rate": 5.145404087129959e-06, "loss": 1.0198, "step": 7415 }, { "epoch": 1.4666799930036731, "grad_norm": 2.046875, "learning_rate": 5.144350728226173e-06, "loss": 0.9834, "step": 7416 }, { "epoch": 1.4668798880587692, "grad_norm": 2.171875, "learning_rate": 5.143297362910344e-06, "loss": 1.0275, "step": 7417 }, { "epoch": 1.4670797831138653, "grad_norm": 5.59375, "learning_rate": 5.142243991229261e-06, "loss": 0.9838, "step": 7418 }, { "epoch": 1.4672796781689614, "grad_norm": 2.359375, "learning_rate": 5.141190613229714e-06, "loss": 0.9877, "step": 7419 }, { "epoch": 1.4674795732240573, "grad_norm": 2.15625, "learning_rate": 5.1401372289584975e-06, "loss": 0.9014, "step": 7420 }, { "epoch": 1.4676794682791534, "grad_norm": 2.109375, "learning_rate": 5.139083838462398e-06, "loss": 0.9056, "step": 7421 }, { "epoch": 1.4678793633342495, "grad_norm": 2.25, "learning_rate": 5.138030441788211e-06, "loss": 0.9881, "step": 7422 }, { "epoch": 1.4680792583893456, "grad_norm": 2.125, "learning_rate": 5.136977038982727e-06, "loss": 0.8494, "step": 7423 }, { "epoch": 1.4682791534444417, "grad_norm": 2.171875, "learning_rate": 5.135923630092736e-06, "loss": 1.0048, "step": 7424 }, { "epoch": 1.4684790484995378, "grad_norm": 2.234375, "learning_rate": 5.134870215165032e-06, "loss": 1.0214, "step": 7425 }, { "epoch": 1.4686789435546337, "grad_norm": 2.109375, "learning_rate": 5.133816794246408e-06, "loss": 0.9724, "step": 7426 }, { "epoch": 1.4688788386097298, "grad_norm": 2.21875, "learning_rate": 5.132763367383655e-06, "loss": 1.0002, "step": 7427 }, { "epoch": 1.4690787336648259, "grad_norm": 2.125, "learning_rate": 5.131709934623568e-06, "loss": 0.9988, "step": 7428 }, { "epoch": 1.469278628719922, "grad_norm": 2.28125, "learning_rate": 5.130656496012941e-06, "loss": 0.8107, "step": 7429 }, { "epoch": 1.469478523775018, "grad_norm": 2.125, "learning_rate": 5.129603051598565e-06, "loss": 1.0389, "step": 7430 }, { "epoch": 1.4696784188301142, "grad_norm": 2.265625, "learning_rate": 5.128549601427237e-06, "loss": 1.0429, "step": 7431 }, { "epoch": 1.4698783138852103, "grad_norm": 2.3125, "learning_rate": 5.127496145545747e-06, "loss": 1.1101, "step": 7432 }, { "epoch": 1.4700782089403064, "grad_norm": 2.203125, "learning_rate": 5.126442684000893e-06, "loss": 0.9468, "step": 7433 }, { "epoch": 1.4702781039954025, "grad_norm": 2.125, "learning_rate": 5.125389216839469e-06, "loss": 0.9814, "step": 7434 }, { "epoch": 1.4704779990504986, "grad_norm": 2.203125, "learning_rate": 5.124335744108269e-06, "loss": 0.9914, "step": 7435 }, { "epoch": 1.4706778941055947, "grad_norm": 2.25, "learning_rate": 5.123282265854088e-06, "loss": 0.9354, "step": 7436 }, { "epoch": 1.4708777891606906, "grad_norm": 2.078125, "learning_rate": 5.122228782123723e-06, "loss": 1.0049, "step": 7437 }, { "epoch": 1.4710776842157867, "grad_norm": 2.140625, "learning_rate": 5.121175292963968e-06, "loss": 0.9465, "step": 7438 }, { "epoch": 1.4712775792708828, "grad_norm": 2.28125, "learning_rate": 5.120121798421619e-06, "loss": 0.9718, "step": 7439 }, { "epoch": 1.4714774743259789, "grad_norm": 2.0625, "learning_rate": 5.119068298543474e-06, "loss": 0.9102, "step": 7440 }, { "epoch": 1.471677369381075, "grad_norm": 2.046875, "learning_rate": 5.1180147933763255e-06, "loss": 0.9933, "step": 7441 }, { "epoch": 1.471877264436171, "grad_norm": 2.109375, "learning_rate": 5.116961282966975e-06, "loss": 0.9802, "step": 7442 }, { "epoch": 1.4720771594912672, "grad_norm": 2.0625, "learning_rate": 5.1159077673622145e-06, "loss": 0.9478, "step": 7443 }, { "epoch": 1.472277054546363, "grad_norm": 2.34375, "learning_rate": 5.114854246608845e-06, "loss": 1.0779, "step": 7444 }, { "epoch": 1.4724769496014591, "grad_norm": 2.234375, "learning_rate": 5.113800720753662e-06, "loss": 1.0184, "step": 7445 }, { "epoch": 1.4726768446565552, "grad_norm": 2.1875, "learning_rate": 5.112747189843462e-06, "loss": 1.0157, "step": 7446 }, { "epoch": 1.4728767397116513, "grad_norm": 2.1875, "learning_rate": 5.111693653925045e-06, "loss": 1.0297, "step": 7447 }, { "epoch": 1.4730766347667474, "grad_norm": 2.171875, "learning_rate": 5.110640113045208e-06, "loss": 0.9191, "step": 7448 }, { "epoch": 1.4732765298218435, "grad_norm": 2.15625, "learning_rate": 5.109586567250747e-06, "loss": 0.9916, "step": 7449 }, { "epoch": 1.4734764248769396, "grad_norm": 2.203125, "learning_rate": 5.108533016588464e-06, "loss": 0.9522, "step": 7450 }, { "epoch": 1.4736763199320357, "grad_norm": 2.21875, "learning_rate": 5.107479461105156e-06, "loss": 0.9472, "step": 7451 }, { "epoch": 1.4738762149871318, "grad_norm": 2.171875, "learning_rate": 5.106425900847621e-06, "loss": 0.9726, "step": 7452 }, { "epoch": 1.474076110042228, "grad_norm": 2.125, "learning_rate": 5.10537233586266e-06, "loss": 0.9501, "step": 7453 }, { "epoch": 1.474276005097324, "grad_norm": 2.15625, "learning_rate": 5.104318766197072e-06, "loss": 0.9535, "step": 7454 }, { "epoch": 1.47447590015242, "grad_norm": 2.1875, "learning_rate": 5.103265191897654e-06, "loss": 0.9887, "step": 7455 }, { "epoch": 1.474675795207516, "grad_norm": 2.09375, "learning_rate": 5.102211613011207e-06, "loss": 0.964, "step": 7456 }, { "epoch": 1.4748756902626121, "grad_norm": 2.0625, "learning_rate": 5.101158029584531e-06, "loss": 0.9412, "step": 7457 }, { "epoch": 1.4750755853177082, "grad_norm": 2.1875, "learning_rate": 5.100104441664426e-06, "loss": 1.0356, "step": 7458 }, { "epoch": 1.4752754803728043, "grad_norm": 2.453125, "learning_rate": 5.099050849297694e-06, "loss": 0.9289, "step": 7459 }, { "epoch": 1.4754753754279004, "grad_norm": 2.15625, "learning_rate": 5.097997252531133e-06, "loss": 0.9428, "step": 7460 }, { "epoch": 1.4756752704829963, "grad_norm": 2.078125, "learning_rate": 5.096943651411543e-06, "loss": 0.98, "step": 7461 }, { "epoch": 1.4758751655380924, "grad_norm": 2.203125, "learning_rate": 5.095890045985729e-06, "loss": 0.9712, "step": 7462 }, { "epoch": 1.4760750605931885, "grad_norm": 2.09375, "learning_rate": 5.094836436300486e-06, "loss": 0.9804, "step": 7463 }, { "epoch": 1.4762749556482846, "grad_norm": 2.1875, "learning_rate": 5.093782822402621e-06, "loss": 0.9727, "step": 7464 }, { "epoch": 1.4764748507033807, "grad_norm": 2.265625, "learning_rate": 5.0927292043389335e-06, "loss": 1.0436, "step": 7465 }, { "epoch": 1.4766747457584768, "grad_norm": 2.125, "learning_rate": 5.091675582156224e-06, "loss": 1.0495, "step": 7466 }, { "epoch": 1.476874640813573, "grad_norm": 2.203125, "learning_rate": 5.090621955901294e-06, "loss": 0.9945, "step": 7467 }, { "epoch": 1.477074535868669, "grad_norm": 2.09375, "learning_rate": 5.0895683256209485e-06, "loss": 0.9012, "step": 7468 }, { "epoch": 1.477274430923765, "grad_norm": 2.109375, "learning_rate": 5.088514691361985e-06, "loss": 0.8838, "step": 7469 }, { "epoch": 1.4774743259788612, "grad_norm": 2.140625, "learning_rate": 5.08746105317121e-06, "loss": 0.9647, "step": 7470 }, { "epoch": 1.4776742210339573, "grad_norm": 2.21875, "learning_rate": 5.086407411095424e-06, "loss": 1.0347, "step": 7471 }, { "epoch": 1.4778741160890532, "grad_norm": 2.078125, "learning_rate": 5.08535376518143e-06, "loss": 0.9503, "step": 7472 }, { "epoch": 1.4780740111441493, "grad_norm": 2.171875, "learning_rate": 5.084300115476031e-06, "loss": 1.0924, "step": 7473 }, { "epoch": 1.4782739061992454, "grad_norm": 2.171875, "learning_rate": 5.083246462026029e-06, "loss": 0.9997, "step": 7474 }, { "epoch": 1.4784738012543415, "grad_norm": 2.15625, "learning_rate": 5.082192804878229e-06, "loss": 0.9579, "step": 7475 }, { "epoch": 1.4786736963094376, "grad_norm": 2.15625, "learning_rate": 5.081139144079433e-06, "loss": 1.0236, "step": 7476 }, { "epoch": 1.4788735913645337, "grad_norm": 2.046875, "learning_rate": 5.080085479676445e-06, "loss": 0.9185, "step": 7477 }, { "epoch": 1.4790734864196298, "grad_norm": 2.109375, "learning_rate": 5.079031811716068e-06, "loss": 0.9896, "step": 7478 }, { "epoch": 1.4792733814747256, "grad_norm": 2.265625, "learning_rate": 5.077978140245108e-06, "loss": 1.0319, "step": 7479 }, { "epoch": 1.4794732765298217, "grad_norm": 2.140625, "learning_rate": 5.076924465310365e-06, "loss": 0.9257, "step": 7480 }, { "epoch": 1.4796731715849178, "grad_norm": 2.03125, "learning_rate": 5.075870786958646e-06, "loss": 0.8942, "step": 7481 }, { "epoch": 1.479873066640014, "grad_norm": 2.125, "learning_rate": 5.0748171052367565e-06, "loss": 0.9094, "step": 7482 }, { "epoch": 1.48007296169511, "grad_norm": 2.046875, "learning_rate": 5.073763420191498e-06, "loss": 0.9146, "step": 7483 }, { "epoch": 1.4802728567502061, "grad_norm": 2.078125, "learning_rate": 5.072709731869677e-06, "loss": 0.9358, "step": 7484 }, { "epoch": 1.4804727518053022, "grad_norm": 2.296875, "learning_rate": 5.071656040318098e-06, "loss": 0.9649, "step": 7485 }, { "epoch": 1.4806726468603983, "grad_norm": 2.109375, "learning_rate": 5.070602345583564e-06, "loss": 0.9764, "step": 7486 }, { "epoch": 1.4808725419154944, "grad_norm": 2.203125, "learning_rate": 5.069548647712881e-06, "loss": 0.9947, "step": 7487 }, { "epoch": 1.4810724369705905, "grad_norm": 2.140625, "learning_rate": 5.0684949467528555e-06, "loss": 1.0016, "step": 7488 }, { "epoch": 1.4812723320256866, "grad_norm": 2.21875, "learning_rate": 5.06744124275029e-06, "loss": 1.041, "step": 7489 }, { "epoch": 1.4814722270807825, "grad_norm": 2.125, "learning_rate": 5.066387535751995e-06, "loss": 0.9154, "step": 7490 }, { "epoch": 1.4816721221358786, "grad_norm": 2.1875, "learning_rate": 5.06533382580477e-06, "loss": 1.0142, "step": 7491 }, { "epoch": 1.4818720171909747, "grad_norm": 2.234375, "learning_rate": 5.064280112955424e-06, "loss": 1.034, "step": 7492 }, { "epoch": 1.4820719122460708, "grad_norm": 2.015625, "learning_rate": 5.063226397250763e-06, "loss": 0.9688, "step": 7493 }, { "epoch": 1.482271807301167, "grad_norm": 2.265625, "learning_rate": 5.062172678737591e-06, "loss": 1.0058, "step": 7494 }, { "epoch": 1.482471702356263, "grad_norm": 2.296875, "learning_rate": 5.061118957462716e-06, "loss": 0.992, "step": 7495 }, { "epoch": 1.482671597411359, "grad_norm": 2.109375, "learning_rate": 5.060065233472944e-06, "loss": 1.0407, "step": 7496 }, { "epoch": 1.482871492466455, "grad_norm": 2.21875, "learning_rate": 5.059011506815079e-06, "loss": 1.036, "step": 7497 }, { "epoch": 1.483071387521551, "grad_norm": 2.09375, "learning_rate": 5.0579577775359314e-06, "loss": 0.8635, "step": 7498 }, { "epoch": 1.4832712825766472, "grad_norm": 2.1875, "learning_rate": 5.0569040456823045e-06, "loss": 0.9879, "step": 7499 }, { "epoch": 1.4834711776317433, "grad_norm": 2.1875, "learning_rate": 5.055850311301006e-06, "loss": 0.9769, "step": 7500 }, { "epoch": 1.4834711776317433, "eval_loss": 0.9009870290756226, "eval_runtime": 593.6995, "eval_samples_per_second": 3.601, "eval_steps_per_second": 3.601, "step": 7500 }, { "epoch": 1.4836710726868394, "grad_norm": 2.21875, "learning_rate": 5.054796574438842e-06, "loss": 1.067, "step": 7501 }, { "epoch": 1.4838709677419355, "grad_norm": 2.15625, "learning_rate": 5.053742835142621e-06, "loss": 0.8663, "step": 7502 }, { "epoch": 1.4840708627970316, "grad_norm": 2.109375, "learning_rate": 5.052689093459148e-06, "loss": 0.9994, "step": 7503 }, { "epoch": 1.4842707578521277, "grad_norm": 2.1875, "learning_rate": 5.051635349435232e-06, "loss": 1.0215, "step": 7504 }, { "epoch": 1.4844706529072238, "grad_norm": 2.109375, "learning_rate": 5.050581603117679e-06, "loss": 0.9514, "step": 7505 }, { "epoch": 1.48467054796232, "grad_norm": 2.15625, "learning_rate": 5.049527854553296e-06, "loss": 0.9356, "step": 7506 }, { "epoch": 1.4848704430174158, "grad_norm": 2.1875, "learning_rate": 5.0484741037888915e-06, "loss": 0.996, "step": 7507 }, { "epoch": 1.4850703380725119, "grad_norm": 2.171875, "learning_rate": 5.047420350871273e-06, "loss": 1.0457, "step": 7508 }, { "epoch": 1.485270233127608, "grad_norm": 2.203125, "learning_rate": 5.046366595847246e-06, "loss": 0.9795, "step": 7509 }, { "epoch": 1.485470128182704, "grad_norm": 2.140625, "learning_rate": 5.045312838763622e-06, "loss": 0.9604, "step": 7510 }, { "epoch": 1.4856700232378002, "grad_norm": 2.1875, "learning_rate": 5.044259079667204e-06, "loss": 0.9675, "step": 7511 }, { "epoch": 1.4858699182928963, "grad_norm": 2.171875, "learning_rate": 5.043205318604804e-06, "loss": 0.9365, "step": 7512 }, { "epoch": 1.4860698133479924, "grad_norm": 2.046875, "learning_rate": 5.0421515556232285e-06, "loss": 0.9545, "step": 7513 }, { "epoch": 1.4862697084030883, "grad_norm": 2.1875, "learning_rate": 5.041097790769285e-06, "loss": 1.0955, "step": 7514 }, { "epoch": 1.4864696034581844, "grad_norm": 2.265625, "learning_rate": 5.040044024089782e-06, "loss": 0.9641, "step": 7515 }, { "epoch": 1.4866694985132805, "grad_norm": 2.171875, "learning_rate": 5.0389902556315286e-06, "loss": 1.0313, "step": 7516 }, { "epoch": 1.4868693935683766, "grad_norm": 2.140625, "learning_rate": 5.037936485441332e-06, "loss": 0.9403, "step": 7517 }, { "epoch": 1.4870692886234727, "grad_norm": 2.125, "learning_rate": 5.0368827135659996e-06, "loss": 1.0066, "step": 7518 }, { "epoch": 1.4872691836785688, "grad_norm": 2.21875, "learning_rate": 5.035828940052342e-06, "loss": 1.1098, "step": 7519 }, { "epoch": 1.4874690787336649, "grad_norm": 2.1875, "learning_rate": 5.0347751649471655e-06, "loss": 1.0444, "step": 7520 }, { "epoch": 1.487668973788761, "grad_norm": 2.0625, "learning_rate": 5.033721388297281e-06, "loss": 0.91, "step": 7521 }, { "epoch": 1.487868868843857, "grad_norm": 2.046875, "learning_rate": 5.0326676101494955e-06, "loss": 0.917, "step": 7522 }, { "epoch": 1.4880687638989532, "grad_norm": 2.171875, "learning_rate": 5.031613830550618e-06, "loss": 0.9712, "step": 7523 }, { "epoch": 1.4882686589540493, "grad_norm": 2.109375, "learning_rate": 5.0305600495474586e-06, "loss": 0.9923, "step": 7524 }, { "epoch": 1.4884685540091451, "grad_norm": 2.125, "learning_rate": 5.029506267186823e-06, "loss": 0.9984, "step": 7525 }, { "epoch": 1.4886684490642412, "grad_norm": 2.171875, "learning_rate": 5.028452483515524e-06, "loss": 1.0691, "step": 7526 }, { "epoch": 1.4888683441193373, "grad_norm": 2.125, "learning_rate": 5.027398698580369e-06, "loss": 0.9605, "step": 7527 }, { "epoch": 1.4890682391744334, "grad_norm": 2.171875, "learning_rate": 5.026344912428166e-06, "loss": 0.9946, "step": 7528 }, { "epoch": 1.4892681342295295, "grad_norm": 2.296875, "learning_rate": 5.025291125105725e-06, "loss": 0.9633, "step": 7529 }, { "epoch": 1.4894680292846256, "grad_norm": 2.078125, "learning_rate": 5.024237336659855e-06, "loss": 0.9967, "step": 7530 }, { "epoch": 1.4896679243397217, "grad_norm": 2.078125, "learning_rate": 5.023183547137364e-06, "loss": 1.0632, "step": 7531 }, { "epoch": 1.4898678193948176, "grad_norm": 2.078125, "learning_rate": 5.022129756585063e-06, "loss": 0.9735, "step": 7532 }, { "epoch": 1.4900677144499137, "grad_norm": 2.25, "learning_rate": 5.021075965049762e-06, "loss": 1.0718, "step": 7533 }, { "epoch": 1.4902676095050098, "grad_norm": 2.21875, "learning_rate": 5.0200221725782675e-06, "loss": 1.0503, "step": 7534 }, { "epoch": 1.490467504560106, "grad_norm": 1.9609375, "learning_rate": 5.0189683792173905e-06, "loss": 0.9196, "step": 7535 }, { "epoch": 1.490667399615202, "grad_norm": 2.25, "learning_rate": 5.01791458501394e-06, "loss": 0.9358, "step": 7536 }, { "epoch": 1.490867294670298, "grad_norm": 2.09375, "learning_rate": 5.0168607900147244e-06, "loss": 0.9498, "step": 7537 }, { "epoch": 1.4910671897253942, "grad_norm": 2.125, "learning_rate": 5.015806994266556e-06, "loss": 0.9967, "step": 7538 }, { "epoch": 1.4912670847804903, "grad_norm": 2.203125, "learning_rate": 5.014753197816243e-06, "loss": 0.9562, "step": 7539 }, { "epoch": 1.4914669798355864, "grad_norm": 2.21875, "learning_rate": 5.013699400710592e-06, "loss": 1.0169, "step": 7540 }, { "epoch": 1.4916668748906825, "grad_norm": 2.09375, "learning_rate": 5.0126456029964175e-06, "loss": 1.0031, "step": 7541 }, { "epoch": 1.4918667699457786, "grad_norm": 2.1875, "learning_rate": 5.011591804720526e-06, "loss": 0.9976, "step": 7542 }, { "epoch": 1.4920666650008745, "grad_norm": 2.234375, "learning_rate": 5.010538005929728e-06, "loss": 0.9954, "step": 7543 }, { "epoch": 1.4922665600559706, "grad_norm": 2.40625, "learning_rate": 5.009484206670831e-06, "loss": 1.028, "step": 7544 }, { "epoch": 1.4924664551110667, "grad_norm": 2.140625, "learning_rate": 5.008430406990649e-06, "loss": 0.9847, "step": 7545 }, { "epoch": 1.4926663501661628, "grad_norm": 2.40625, "learning_rate": 5.007376606935989e-06, "loss": 1.0274, "step": 7546 }, { "epoch": 1.4928662452212589, "grad_norm": 2.296875, "learning_rate": 5.0063228065536595e-06, "loss": 0.9803, "step": 7547 }, { "epoch": 1.493066140276355, "grad_norm": 2.109375, "learning_rate": 5.005269005890473e-06, "loss": 0.9394, "step": 7548 }, { "epoch": 1.4932660353314509, "grad_norm": 2.203125, "learning_rate": 5.004215204993237e-06, "loss": 0.9289, "step": 7549 }, { "epoch": 1.493465930386547, "grad_norm": 2.109375, "learning_rate": 5.003161403908762e-06, "loss": 0.9714, "step": 7550 }, { "epoch": 1.493665825441643, "grad_norm": 2.140625, "learning_rate": 5.002107602683858e-06, "loss": 0.8793, "step": 7551 }, { "epoch": 1.4938657204967392, "grad_norm": 2.234375, "learning_rate": 5.0010538013653345e-06, "loss": 1.0672, "step": 7552 }, { "epoch": 1.4940656155518353, "grad_norm": 2.28125, "learning_rate": 5e-06, "loss": 1.0445, "step": 7553 }, { "epoch": 1.4942655106069314, "grad_norm": 2.265625, "learning_rate": 4.998946198634667e-06, "loss": 0.9237, "step": 7554 }, { "epoch": 1.4944654056620275, "grad_norm": 2.171875, "learning_rate": 4.997892397316144e-06, "loss": 0.9672, "step": 7555 }, { "epoch": 1.4946653007171236, "grad_norm": 2.21875, "learning_rate": 4.99683859609124e-06, "loss": 1.0201, "step": 7556 }, { "epoch": 1.4948651957722197, "grad_norm": 2.140625, "learning_rate": 4.995784795006764e-06, "loss": 0.9943, "step": 7557 }, { "epoch": 1.4950650908273158, "grad_norm": 2.109375, "learning_rate": 4.994730994109529e-06, "loss": 0.9561, "step": 7558 }, { "epoch": 1.4952649858824119, "grad_norm": 2.15625, "learning_rate": 4.993677193446341e-06, "loss": 0.99, "step": 7559 }, { "epoch": 1.4954648809375077, "grad_norm": 2.1875, "learning_rate": 4.9926233930640136e-06, "loss": 0.9834, "step": 7560 }, { "epoch": 1.4956647759926038, "grad_norm": 2.125, "learning_rate": 4.991569593009352e-06, "loss": 0.9143, "step": 7561 }, { "epoch": 1.4958646710477, "grad_norm": 2.203125, "learning_rate": 4.990515793329169e-06, "loss": 1.0046, "step": 7562 }, { "epoch": 1.496064566102796, "grad_norm": 2.140625, "learning_rate": 4.9894619940702735e-06, "loss": 0.946, "step": 7563 }, { "epoch": 1.4962644611578921, "grad_norm": 2.3125, "learning_rate": 4.988408195279475e-06, "loss": 1.0447, "step": 7564 }, { "epoch": 1.4964643562129882, "grad_norm": 2.421875, "learning_rate": 4.9873543970035825e-06, "loss": 1.0406, "step": 7565 }, { "epoch": 1.4966642512680843, "grad_norm": 2.21875, "learning_rate": 4.9863005992894085e-06, "loss": 1.0905, "step": 7566 }, { "epoch": 1.4968641463231802, "grad_norm": 2.15625, "learning_rate": 4.985246802183758e-06, "loss": 1.0414, "step": 7567 }, { "epoch": 1.4970640413782763, "grad_norm": 2.09375, "learning_rate": 4.9841930057334446e-06, "loss": 0.9065, "step": 7568 }, { "epoch": 1.4972639364333724, "grad_norm": 2.078125, "learning_rate": 4.983139209985276e-06, "loss": 0.9596, "step": 7569 }, { "epoch": 1.4974638314884685, "grad_norm": 2.296875, "learning_rate": 4.982085414986062e-06, "loss": 1.0192, "step": 7570 }, { "epoch": 1.4976637265435646, "grad_norm": 2.1875, "learning_rate": 4.981031620782612e-06, "loss": 0.9386, "step": 7571 }, { "epoch": 1.4978636215986607, "grad_norm": 2.234375, "learning_rate": 4.979977827421735e-06, "loss": 0.899, "step": 7572 }, { "epoch": 1.4980635166537568, "grad_norm": 2.25, "learning_rate": 4.97892403495024e-06, "loss": 1.084, "step": 7573 }, { "epoch": 1.498263411708853, "grad_norm": 2.109375, "learning_rate": 4.977870243414938e-06, "loss": 0.9711, "step": 7574 }, { "epoch": 1.498463306763949, "grad_norm": 2.171875, "learning_rate": 4.976816452862638e-06, "loss": 0.978, "step": 7575 }, { "epoch": 1.498663201819045, "grad_norm": 2.21875, "learning_rate": 4.975762663340148e-06, "loss": 1.0126, "step": 7576 }, { "epoch": 1.4988630968741412, "grad_norm": 2.203125, "learning_rate": 4.974708874894276e-06, "loss": 1.0988, "step": 7577 }, { "epoch": 1.499062991929237, "grad_norm": 2.25, "learning_rate": 4.973655087571836e-06, "loss": 0.9493, "step": 7578 }, { "epoch": 1.4992628869843332, "grad_norm": 2.1875, "learning_rate": 4.972601301419632e-06, "loss": 0.9735, "step": 7579 }, { "epoch": 1.4994627820394293, "grad_norm": 2.265625, "learning_rate": 4.971547516484477e-06, "loss": 0.9846, "step": 7580 }, { "epoch": 1.4996626770945254, "grad_norm": 2.125, "learning_rate": 4.970493732813178e-06, "loss": 0.9615, "step": 7581 }, { "epoch": 1.4998625721496215, "grad_norm": 2.265625, "learning_rate": 4.969439950452543e-06, "loss": 1.1397, "step": 7582 }, { "epoch": 1.5000624672047174, "grad_norm": 2.234375, "learning_rate": 4.968386169449384e-06, "loss": 0.8919, "step": 7583 }, { "epoch": 1.5002623622598135, "grad_norm": 2.109375, "learning_rate": 4.967332389850506e-06, "loss": 0.9603, "step": 7584 }, { "epoch": 1.5004622573149096, "grad_norm": 2.03125, "learning_rate": 4.96627861170272e-06, "loss": 0.8555, "step": 7585 }, { "epoch": 1.5006621523700057, "grad_norm": 2.328125, "learning_rate": 4.965224835052836e-06, "loss": 0.9833, "step": 7586 }, { "epoch": 1.5008620474251018, "grad_norm": 2.234375, "learning_rate": 4.96417105994766e-06, "loss": 0.9636, "step": 7587 }, { "epoch": 1.5010619424801979, "grad_norm": 2.125, "learning_rate": 4.963117286434003e-06, "loss": 0.9915, "step": 7588 }, { "epoch": 1.501261837535294, "grad_norm": 2.28125, "learning_rate": 4.962063514558671e-06, "loss": 0.9548, "step": 7589 }, { "epoch": 1.50146173259039, "grad_norm": 1.984375, "learning_rate": 4.961009744368472e-06, "loss": 0.8947, "step": 7590 }, { "epoch": 1.5016616276454862, "grad_norm": 2.09375, "learning_rate": 4.959955975910219e-06, "loss": 0.9332, "step": 7591 }, { "epoch": 1.5018615227005823, "grad_norm": 2.1875, "learning_rate": 4.958902209230716e-06, "loss": 1.0048, "step": 7592 }, { "epoch": 1.5020614177556784, "grad_norm": 2.25, "learning_rate": 4.9578484443767714e-06, "loss": 0.9865, "step": 7593 }, { "epoch": 1.5022613128107745, "grad_norm": 2.09375, "learning_rate": 4.956794681395197e-06, "loss": 0.954, "step": 7594 }, { "epoch": 1.5024612078658706, "grad_norm": 2.203125, "learning_rate": 4.955740920332796e-06, "loss": 0.9588, "step": 7595 }, { "epoch": 1.5026611029209664, "grad_norm": 2.03125, "learning_rate": 4.954687161236379e-06, "loss": 0.9834, "step": 7596 }, { "epoch": 1.5028609979760625, "grad_norm": 2.09375, "learning_rate": 4.953633404152754e-06, "loss": 0.9794, "step": 7597 }, { "epoch": 1.5030608930311586, "grad_norm": 2.09375, "learning_rate": 4.952579649128728e-06, "loss": 0.9282, "step": 7598 }, { "epoch": 1.5032607880862547, "grad_norm": 2.234375, "learning_rate": 4.951525896211109e-06, "loss": 0.9748, "step": 7599 }, { "epoch": 1.5034606831413508, "grad_norm": 2.109375, "learning_rate": 4.950472145446706e-06, "loss": 0.992, "step": 7600 }, { "epoch": 1.5036605781964467, "grad_norm": 2.296875, "learning_rate": 4.9494183968823235e-06, "loss": 0.9436, "step": 7601 }, { "epoch": 1.5038604732515428, "grad_norm": 2.296875, "learning_rate": 4.9483646505647705e-06, "loss": 0.9967, "step": 7602 }, { "epoch": 1.504060368306639, "grad_norm": 2.203125, "learning_rate": 4.947310906540854e-06, "loss": 0.9897, "step": 7603 }, { "epoch": 1.504260263361735, "grad_norm": 2.1875, "learning_rate": 4.946257164857382e-06, "loss": 1.0052, "step": 7604 }, { "epoch": 1.5044601584168311, "grad_norm": 2.09375, "learning_rate": 4.94520342556116e-06, "loss": 0.9114, "step": 7605 }, { "epoch": 1.5046600534719272, "grad_norm": 2.1875, "learning_rate": 4.944149688698997e-06, "loss": 0.9928, "step": 7606 }, { "epoch": 1.5048599485270233, "grad_norm": 2.25, "learning_rate": 4.943095954317698e-06, "loss": 0.91, "step": 7607 }, { "epoch": 1.5050598435821194, "grad_norm": 2.1875, "learning_rate": 4.94204222246407e-06, "loss": 0.9799, "step": 7608 }, { "epoch": 1.5052597386372155, "grad_norm": 2.125, "learning_rate": 4.940988493184922e-06, "loss": 0.8856, "step": 7609 }, { "epoch": 1.5054596336923116, "grad_norm": 2.296875, "learning_rate": 4.939934766527058e-06, "loss": 1.062, "step": 7610 }, { "epoch": 1.5056595287474077, "grad_norm": 2.390625, "learning_rate": 4.938881042537286e-06, "loss": 0.9589, "step": 7611 }, { "epoch": 1.5058594238025038, "grad_norm": 2.125, "learning_rate": 4.93782732126241e-06, "loss": 0.982, "step": 7612 }, { "epoch": 1.5060593188576, "grad_norm": 2.15625, "learning_rate": 4.936773602749238e-06, "loss": 1.015, "step": 7613 }, { "epoch": 1.5062592139126958, "grad_norm": 2.140625, "learning_rate": 4.9357198870445775e-06, "loss": 0.98, "step": 7614 }, { "epoch": 1.506459108967792, "grad_norm": 2.125, "learning_rate": 4.934666174195231e-06, "loss": 0.9862, "step": 7615 }, { "epoch": 1.506659004022888, "grad_norm": 2.15625, "learning_rate": 4.933612464248006e-06, "loss": 1.0111, "step": 7616 }, { "epoch": 1.506858899077984, "grad_norm": 2.15625, "learning_rate": 4.932558757249711e-06, "loss": 0.9228, "step": 7617 }, { "epoch": 1.50705879413308, "grad_norm": 2.421875, "learning_rate": 4.931505053247146e-06, "loss": 1.0451, "step": 7618 }, { "epoch": 1.507258689188176, "grad_norm": 2.3125, "learning_rate": 4.930451352287121e-06, "loss": 1.0573, "step": 7619 }, { "epoch": 1.5074585842432722, "grad_norm": 2.140625, "learning_rate": 4.929397654416438e-06, "loss": 0.9944, "step": 7620 }, { "epoch": 1.5076584792983683, "grad_norm": 2.21875, "learning_rate": 4.928343959681904e-06, "loss": 0.9962, "step": 7621 }, { "epoch": 1.5078583743534644, "grad_norm": 2.046875, "learning_rate": 4.927290268130325e-06, "loss": 0.8596, "step": 7622 }, { "epoch": 1.5080582694085605, "grad_norm": 2.1875, "learning_rate": 4.9262365798085024e-06, "loss": 1.0626, "step": 7623 }, { "epoch": 1.5082581644636566, "grad_norm": 2.0625, "learning_rate": 4.925182894763243e-06, "loss": 0.9334, "step": 7624 }, { "epoch": 1.5084580595187527, "grad_norm": 2.109375, "learning_rate": 4.924129213041353e-06, "loss": 1.0244, "step": 7625 }, { "epoch": 1.5086579545738488, "grad_norm": 2.171875, "learning_rate": 4.923075534689635e-06, "loss": 1.0285, "step": 7626 }, { "epoch": 1.5088578496289449, "grad_norm": 2.0, "learning_rate": 4.922021859754894e-06, "loss": 0.9209, "step": 7627 }, { "epoch": 1.509057744684041, "grad_norm": 2.15625, "learning_rate": 4.920968188283932e-06, "loss": 1.0255, "step": 7628 }, { "epoch": 1.509257639739137, "grad_norm": 2.125, "learning_rate": 4.919914520323555e-06, "loss": 0.8351, "step": 7629 }, { "epoch": 1.5094575347942332, "grad_norm": 2.140625, "learning_rate": 4.918860855920568e-06, "loss": 0.9118, "step": 7630 }, { "epoch": 1.5096574298493293, "grad_norm": 2.125, "learning_rate": 4.917807195121773e-06, "loss": 0.983, "step": 7631 }, { "epoch": 1.5098573249044251, "grad_norm": 2.1875, "learning_rate": 4.916753537973973e-06, "loss": 0.991, "step": 7632 }, { "epoch": 1.5100572199595212, "grad_norm": 2.140625, "learning_rate": 4.915699884523971e-06, "loss": 1.0175, "step": 7633 }, { "epoch": 1.5102571150146173, "grad_norm": 2.265625, "learning_rate": 4.914646234818573e-06, "loss": 1.0203, "step": 7634 }, { "epoch": 1.5104570100697134, "grad_norm": 2.234375, "learning_rate": 4.913592588904578e-06, "loss": 0.8762, "step": 7635 }, { "epoch": 1.5106569051248093, "grad_norm": 2.109375, "learning_rate": 4.912538946828792e-06, "loss": 0.9023, "step": 7636 }, { "epoch": 1.5108568001799054, "grad_norm": 2.171875, "learning_rate": 4.9114853086380165e-06, "loss": 0.9672, "step": 7637 }, { "epoch": 1.5110566952350015, "grad_norm": 2.078125, "learning_rate": 4.910431674379054e-06, "loss": 0.9385, "step": 7638 }, { "epoch": 1.5112565902900976, "grad_norm": 2.140625, "learning_rate": 4.909378044098708e-06, "loss": 0.8887, "step": 7639 }, { "epoch": 1.5114564853451937, "grad_norm": 2.234375, "learning_rate": 4.908324417843779e-06, "loss": 0.9445, "step": 7640 }, { "epoch": 1.5116563804002898, "grad_norm": 2.078125, "learning_rate": 4.907270795661068e-06, "loss": 1.0104, "step": 7641 }, { "epoch": 1.511856275455386, "grad_norm": 2.203125, "learning_rate": 4.906217177597381e-06, "loss": 1.0569, "step": 7642 }, { "epoch": 1.512056170510482, "grad_norm": 2.1875, "learning_rate": 4.905163563699515e-06, "loss": 1.0554, "step": 7643 }, { "epoch": 1.5122560655655781, "grad_norm": 2.28125, "learning_rate": 4.904109954014273e-06, "loss": 1.0148, "step": 7644 }, { "epoch": 1.5124559606206742, "grad_norm": 2.046875, "learning_rate": 4.9030563485884585e-06, "loss": 0.8805, "step": 7645 }, { "epoch": 1.5126558556757703, "grad_norm": 2.21875, "learning_rate": 4.902002747468869e-06, "loss": 1.0549, "step": 7646 }, { "epoch": 1.5128557507308664, "grad_norm": 2.234375, "learning_rate": 4.900949150702308e-06, "loss": 1.0335, "step": 7647 }, { "epoch": 1.5130556457859625, "grad_norm": 2.15625, "learning_rate": 4.8998955583355755e-06, "loss": 0.935, "step": 7648 }, { "epoch": 1.5132555408410584, "grad_norm": 2.15625, "learning_rate": 4.89884197041547e-06, "loss": 1.0581, "step": 7649 }, { "epoch": 1.5134554358961545, "grad_norm": 1.8984375, "learning_rate": 4.897788386988796e-06, "loss": 0.8837, "step": 7650 }, { "epoch": 1.5136553309512506, "grad_norm": 2.390625, "learning_rate": 4.896734808102348e-06, "loss": 0.9951, "step": 7651 }, { "epoch": 1.5138552260063467, "grad_norm": 2.265625, "learning_rate": 4.89568123380293e-06, "loss": 0.9621, "step": 7652 }, { "epoch": 1.5140551210614426, "grad_norm": 2.109375, "learning_rate": 4.8946276641373406e-06, "loss": 0.9108, "step": 7653 }, { "epoch": 1.5142550161165387, "grad_norm": 2.171875, "learning_rate": 4.893574099152379e-06, "loss": 1.0131, "step": 7654 }, { "epoch": 1.5144549111716348, "grad_norm": 2.15625, "learning_rate": 4.892520538894844e-06, "loss": 0.9307, "step": 7655 }, { "epoch": 1.5146548062267309, "grad_norm": 1.96875, "learning_rate": 4.891466983411537e-06, "loss": 0.8952, "step": 7656 }, { "epoch": 1.514854701281827, "grad_norm": 2.5, "learning_rate": 4.890413432749253e-06, "loss": 1.0152, "step": 7657 }, { "epoch": 1.515054596336923, "grad_norm": 2.09375, "learning_rate": 4.889359886954794e-06, "loss": 0.9754, "step": 7658 }, { "epoch": 1.5152544913920192, "grad_norm": 2.0625, "learning_rate": 4.888306346074956e-06, "loss": 0.8848, "step": 7659 }, { "epoch": 1.5154543864471153, "grad_norm": 2.28125, "learning_rate": 4.887252810156538e-06, "loss": 1.0255, "step": 7660 }, { "epoch": 1.5156542815022114, "grad_norm": 2.109375, "learning_rate": 4.8861992792463396e-06, "loss": 0.927, "step": 7661 }, { "epoch": 1.5158541765573075, "grad_norm": 2.125, "learning_rate": 4.885145753391158e-06, "loss": 1.0345, "step": 7662 }, { "epoch": 1.5160540716124036, "grad_norm": 2.0625, "learning_rate": 4.884092232637787e-06, "loss": 0.9341, "step": 7663 }, { "epoch": 1.5162539666674997, "grad_norm": 2.234375, "learning_rate": 4.883038717033028e-06, "loss": 1.0379, "step": 7664 }, { "epoch": 1.5164538617225958, "grad_norm": 2.078125, "learning_rate": 4.881985206623676e-06, "loss": 0.8957, "step": 7665 }, { "epoch": 1.5166537567776919, "grad_norm": 2.09375, "learning_rate": 4.8809317014565286e-06, "loss": 1.0101, "step": 7666 }, { "epoch": 1.5168536518327878, "grad_norm": 2.046875, "learning_rate": 4.879878201578382e-06, "loss": 0.9537, "step": 7667 }, { "epoch": 1.5170535468878839, "grad_norm": 2.46875, "learning_rate": 4.878824707036034e-06, "loss": 0.9919, "step": 7668 }, { "epoch": 1.51725344194298, "grad_norm": 2.171875, "learning_rate": 4.877771217876279e-06, "loss": 0.9453, "step": 7669 }, { "epoch": 1.517453336998076, "grad_norm": 2.234375, "learning_rate": 4.876717734145914e-06, "loss": 1.0619, "step": 7670 }, { "epoch": 1.517653232053172, "grad_norm": 2.203125, "learning_rate": 4.875664255891733e-06, "loss": 0.915, "step": 7671 }, { "epoch": 1.517853127108268, "grad_norm": 2.21875, "learning_rate": 4.874610783160532e-06, "loss": 0.9857, "step": 7672 }, { "epoch": 1.5180530221633641, "grad_norm": 2.21875, "learning_rate": 4.873557315999108e-06, "loss": 1.0185, "step": 7673 }, { "epoch": 1.5182529172184602, "grad_norm": 2.09375, "learning_rate": 4.872503854454254e-06, "loss": 0.9332, "step": 7674 }, { "epoch": 1.5184528122735563, "grad_norm": 2.203125, "learning_rate": 4.871450398572764e-06, "loss": 0.9758, "step": 7675 }, { "epoch": 1.5186527073286524, "grad_norm": 2.25, "learning_rate": 4.870396948401436e-06, "loss": 0.994, "step": 7676 }, { "epoch": 1.5188526023837485, "grad_norm": 2.265625, "learning_rate": 4.86934350398706e-06, "loss": 1.0261, "step": 7677 }, { "epoch": 1.5190524974388446, "grad_norm": 2.09375, "learning_rate": 4.868290065376433e-06, "loss": 0.909, "step": 7678 }, { "epoch": 1.5192523924939407, "grad_norm": 2.1875, "learning_rate": 4.8672366326163466e-06, "loss": 0.9689, "step": 7679 }, { "epoch": 1.5194522875490368, "grad_norm": 2.046875, "learning_rate": 4.866183205753593e-06, "loss": 0.9509, "step": 7680 }, { "epoch": 1.519652182604133, "grad_norm": 2.0, "learning_rate": 4.86512978483497e-06, "loss": 0.8929, "step": 7681 }, { "epoch": 1.519852077659229, "grad_norm": 2.1875, "learning_rate": 4.864076369907265e-06, "loss": 0.9983, "step": 7682 }, { "epoch": 1.5200519727143251, "grad_norm": 2.1875, "learning_rate": 4.863022961017275e-06, "loss": 1.0158, "step": 7683 }, { "epoch": 1.520251867769421, "grad_norm": 2.234375, "learning_rate": 4.86196955821179e-06, "loss": 1.0297, "step": 7684 }, { "epoch": 1.520451762824517, "grad_norm": 2.34375, "learning_rate": 4.860916161537602e-06, "loss": 1.0098, "step": 7685 }, { "epoch": 1.5206516578796132, "grad_norm": 2.140625, "learning_rate": 4.859862771041503e-06, "loss": 0.9938, "step": 7686 }, { "epoch": 1.5208515529347093, "grad_norm": 2.0625, "learning_rate": 4.858809386770286e-06, "loss": 0.9878, "step": 7687 }, { "epoch": 1.5210514479898054, "grad_norm": 2.078125, "learning_rate": 4.85775600877074e-06, "loss": 0.9156, "step": 7688 }, { "epoch": 1.5212513430449013, "grad_norm": 2.125, "learning_rate": 4.856702637089657e-06, "loss": 0.9137, "step": 7689 }, { "epoch": 1.5214512380999974, "grad_norm": 2.421875, "learning_rate": 4.8556492717738275e-06, "loss": 1.0033, "step": 7690 }, { "epoch": 1.5216511331550935, "grad_norm": 2.1875, "learning_rate": 4.854595912870041e-06, "loss": 0.9716, "step": 7691 }, { "epoch": 1.5218510282101896, "grad_norm": 2.09375, "learning_rate": 4.8535425604250915e-06, "loss": 0.9526, "step": 7692 }, { "epoch": 1.5220509232652857, "grad_norm": 2.125, "learning_rate": 4.8524892144857655e-06, "loss": 0.9911, "step": 7693 }, { "epoch": 1.5222508183203818, "grad_norm": 2.25, "learning_rate": 4.851435875098851e-06, "loss": 0.9205, "step": 7694 }, { "epoch": 1.5224507133754779, "grad_norm": 2.046875, "learning_rate": 4.850382542311139e-06, "loss": 0.918, "step": 7695 }, { "epoch": 1.522650608430574, "grad_norm": 2.15625, "learning_rate": 4.849329216169419e-06, "loss": 1.0557, "step": 7696 }, { "epoch": 1.52285050348567, "grad_norm": 2.21875, "learning_rate": 4.848275896720478e-06, "loss": 1.0096, "step": 7697 }, { "epoch": 1.5230503985407662, "grad_norm": 2.234375, "learning_rate": 4.847222584011107e-06, "loss": 1.0347, "step": 7698 }, { "epoch": 1.5232502935958623, "grad_norm": 2.1875, "learning_rate": 4.846169278088092e-06, "loss": 0.9796, "step": 7699 }, { "epoch": 1.5234501886509584, "grad_norm": 2.265625, "learning_rate": 4.845115978998221e-06, "loss": 0.9395, "step": 7700 }, { "epoch": 1.5236500837060545, "grad_norm": 2.21875, "learning_rate": 4.844062686788282e-06, "loss": 1.0408, "step": 7701 }, { "epoch": 1.5238499787611504, "grad_norm": 2.078125, "learning_rate": 4.843009401505062e-06, "loss": 1.001, "step": 7702 }, { "epoch": 1.5240498738162465, "grad_norm": 2.140625, "learning_rate": 4.841956123195347e-06, "loss": 0.9442, "step": 7703 }, { "epoch": 1.5242497688713426, "grad_norm": 2.21875, "learning_rate": 4.8409028519059246e-06, "loss": 0.9601, "step": 7704 }, { "epoch": 1.5244496639264387, "grad_norm": 2.1875, "learning_rate": 4.8398495876835795e-06, "loss": 0.933, "step": 7705 }, { "epoch": 1.5246495589815345, "grad_norm": 2.15625, "learning_rate": 4.838796330575099e-06, "loss": 1.0609, "step": 7706 }, { "epoch": 1.5248494540366306, "grad_norm": 2.1875, "learning_rate": 4.837743080627269e-06, "loss": 0.8928, "step": 7707 }, { "epoch": 1.5250493490917267, "grad_norm": 2.15625, "learning_rate": 4.836689837886874e-06, "loss": 0.917, "step": 7708 }, { "epoch": 1.5252492441468228, "grad_norm": 2.171875, "learning_rate": 4.835636602400699e-06, "loss": 0.9897, "step": 7709 }, { "epoch": 1.525449139201919, "grad_norm": 2.078125, "learning_rate": 4.834583374215528e-06, "loss": 0.9326, "step": 7710 }, { "epoch": 1.525649034257015, "grad_norm": 2.125, "learning_rate": 4.833530153378145e-06, "loss": 0.9718, "step": 7711 }, { "epoch": 1.5258489293121111, "grad_norm": 2.1875, "learning_rate": 4.832476939935337e-06, "loss": 1.0237, "step": 7712 }, { "epoch": 1.5260488243672072, "grad_norm": 2.171875, "learning_rate": 4.831423733933884e-06, "loss": 0.9722, "step": 7713 }, { "epoch": 1.5262487194223033, "grad_norm": 2.09375, "learning_rate": 4.83037053542057e-06, "loss": 0.9436, "step": 7714 }, { "epoch": 1.5264486144773994, "grad_norm": 2.078125, "learning_rate": 4.829317344442181e-06, "loss": 0.9367, "step": 7715 }, { "epoch": 1.5266485095324955, "grad_norm": 2.15625, "learning_rate": 4.828264161045496e-06, "loss": 0.9521, "step": 7716 }, { "epoch": 1.5268484045875916, "grad_norm": 2.15625, "learning_rate": 4.827210985277299e-06, "loss": 1.0105, "step": 7717 }, { "epoch": 1.5270482996426877, "grad_norm": 2.140625, "learning_rate": 4.8261578171843705e-06, "loss": 0.9382, "step": 7718 }, { "epoch": 1.5272481946977836, "grad_norm": 2.203125, "learning_rate": 4.825104656813494e-06, "loss": 0.9781, "step": 7719 }, { "epoch": 1.5274480897528797, "grad_norm": 2.140625, "learning_rate": 4.82405150421145e-06, "loss": 1.0679, "step": 7720 }, { "epoch": 1.5276479848079758, "grad_norm": 2.203125, "learning_rate": 4.82299835942502e-06, "loss": 0.9701, "step": 7721 }, { "epoch": 1.527847879863072, "grad_norm": 2.125, "learning_rate": 4.821945222500983e-06, "loss": 0.9713, "step": 7722 }, { "epoch": 1.528047774918168, "grad_norm": 2.0625, "learning_rate": 4.8208920934861206e-06, "loss": 0.9887, "step": 7723 }, { "epoch": 1.528247669973264, "grad_norm": 2.09375, "learning_rate": 4.819838972427214e-06, "loss": 0.9757, "step": 7724 }, { "epoch": 1.52844756502836, "grad_norm": 2.15625, "learning_rate": 4.818785859371039e-06, "loss": 0.9716, "step": 7725 }, { "epoch": 1.528647460083456, "grad_norm": 2.03125, "learning_rate": 4.817732754364376e-06, "loss": 0.8541, "step": 7726 }, { "epoch": 1.5288473551385522, "grad_norm": 2.171875, "learning_rate": 4.8166796574540065e-06, "loss": 0.9456, "step": 7727 }, { "epoch": 1.5290472501936483, "grad_norm": 2.171875, "learning_rate": 4.815626568686706e-06, "loss": 0.9526, "step": 7728 }, { "epoch": 1.5292471452487444, "grad_norm": 2.078125, "learning_rate": 4.8145734881092545e-06, "loss": 1.0064, "step": 7729 }, { "epoch": 1.5294470403038405, "grad_norm": 2.03125, "learning_rate": 4.813520415768427e-06, "loss": 0.9533, "step": 7730 }, { "epoch": 1.5296469353589366, "grad_norm": 2.1875, "learning_rate": 4.812467351711004e-06, "loss": 1.0011, "step": 7731 }, { "epoch": 1.5298468304140327, "grad_norm": 2.328125, "learning_rate": 4.811414295983761e-06, "loss": 0.9818, "step": 7732 }, { "epoch": 1.5300467254691288, "grad_norm": 2.1875, "learning_rate": 4.810361248633475e-06, "loss": 1.009, "step": 7733 }, { "epoch": 1.5302466205242249, "grad_norm": 2.15625, "learning_rate": 4.809308209706921e-06, "loss": 0.9351, "step": 7734 }, { "epoch": 1.530446515579321, "grad_norm": 2.28125, "learning_rate": 4.808255179250878e-06, "loss": 0.9453, "step": 7735 }, { "epoch": 1.530646410634417, "grad_norm": 2.109375, "learning_rate": 4.807202157312118e-06, "loss": 0.8974, "step": 7736 }, { "epoch": 1.530846305689513, "grad_norm": 2.09375, "learning_rate": 4.806149143937417e-06, "loss": 0.9417, "step": 7737 }, { "epoch": 1.531046200744609, "grad_norm": 2.09375, "learning_rate": 4.8050961391735516e-06, "loss": 0.8945, "step": 7738 }, { "epoch": 1.5312460957997052, "grad_norm": 2.203125, "learning_rate": 4.804043143067295e-06, "loss": 1.0163, "step": 7739 }, { "epoch": 1.5314459908548013, "grad_norm": 2.375, "learning_rate": 4.802990155665421e-06, "loss": 0.964, "step": 7740 }, { "epoch": 1.5316458859098971, "grad_norm": 2.078125, "learning_rate": 4.801937177014702e-06, "loss": 0.9406, "step": 7741 }, { "epoch": 1.5318457809649932, "grad_norm": 2.109375, "learning_rate": 4.800884207161913e-06, "loss": 0.9723, "step": 7742 }, { "epoch": 1.5320456760200893, "grad_norm": 2.265625, "learning_rate": 4.799831246153827e-06, "loss": 0.9697, "step": 7743 }, { "epoch": 1.5322455710751854, "grad_norm": 2.03125, "learning_rate": 4.798778294037214e-06, "loss": 0.919, "step": 7744 }, { "epoch": 1.5324454661302815, "grad_norm": 2.109375, "learning_rate": 4.7977253508588496e-06, "loss": 0.9584, "step": 7745 }, { "epoch": 1.5326453611853776, "grad_norm": 2.1875, "learning_rate": 4.796672416665503e-06, "loss": 1.0064, "step": 7746 }, { "epoch": 1.5328452562404737, "grad_norm": 2.0625, "learning_rate": 4.795619491503946e-06, "loss": 0.9836, "step": 7747 }, { "epoch": 1.5330451512955698, "grad_norm": 2.25, "learning_rate": 4.794566575420949e-06, "loss": 1.0584, "step": 7748 }, { "epoch": 1.533245046350666, "grad_norm": 2.171875, "learning_rate": 4.793513668463283e-06, "loss": 0.9774, "step": 7749 }, { "epoch": 1.533444941405762, "grad_norm": 3.0, "learning_rate": 4.792460770677716e-06, "loss": 0.956, "step": 7750 }, { "epoch": 1.5336448364608581, "grad_norm": 2.171875, "learning_rate": 4.791407882111022e-06, "loss": 1.0692, "step": 7751 }, { "epoch": 1.5338447315159542, "grad_norm": 2.078125, "learning_rate": 4.790355002809967e-06, "loss": 0.9051, "step": 7752 }, { "epoch": 1.5340446265710503, "grad_norm": 2.109375, "learning_rate": 4.789302132821319e-06, "loss": 1.0195, "step": 7753 }, { "epoch": 1.5342445216261462, "grad_norm": 2.171875, "learning_rate": 4.78824927219185e-06, "loss": 0.966, "step": 7754 }, { "epoch": 1.5344444166812423, "grad_norm": 2.140625, "learning_rate": 4.787196420968326e-06, "loss": 1.0108, "step": 7755 }, { "epoch": 1.5346443117363384, "grad_norm": 2.109375, "learning_rate": 4.7861435791975124e-06, "loss": 0.9161, "step": 7756 }, { "epoch": 1.5348442067914345, "grad_norm": 2.171875, "learning_rate": 4.785090746926178e-06, "loss": 0.9542, "step": 7757 }, { "epoch": 1.5350441018465306, "grad_norm": 2.28125, "learning_rate": 4.784037924201091e-06, "loss": 1.0465, "step": 7758 }, { "epoch": 1.5352439969016265, "grad_norm": 2.1875, "learning_rate": 4.782985111069015e-06, "loss": 1.057, "step": 7759 }, { "epoch": 1.5354438919567226, "grad_norm": 2.109375, "learning_rate": 4.781932307576718e-06, "loss": 0.9282, "step": 7760 }, { "epoch": 1.5356437870118187, "grad_norm": 2.40625, "learning_rate": 4.780879513770964e-06, "loss": 1.0559, "step": 7761 }, { "epoch": 1.5358436820669148, "grad_norm": 2.078125, "learning_rate": 4.779826729698519e-06, "loss": 1.0127, "step": 7762 }, { "epoch": 1.536043577122011, "grad_norm": 2.140625, "learning_rate": 4.778773955406148e-06, "loss": 1.0193, "step": 7763 }, { "epoch": 1.536243472177107, "grad_norm": 2.125, "learning_rate": 4.777721190940612e-06, "loss": 0.9641, "step": 7764 }, { "epoch": 1.536443367232203, "grad_norm": 2.296875, "learning_rate": 4.776668436348678e-06, "loss": 0.9669, "step": 7765 }, { "epoch": 1.5366432622872992, "grad_norm": 2.15625, "learning_rate": 4.77561569167711e-06, "loss": 0.9652, "step": 7766 }, { "epoch": 1.5368431573423953, "grad_norm": 2.15625, "learning_rate": 4.7745629569726664e-06, "loss": 0.9537, "step": 7767 }, { "epoch": 1.5370430523974914, "grad_norm": 2.265625, "learning_rate": 4.7735102322821145e-06, "loss": 1.0082, "step": 7768 }, { "epoch": 1.5372429474525875, "grad_norm": 2.03125, "learning_rate": 4.772457517652212e-06, "loss": 0.8862, "step": 7769 }, { "epoch": 1.5374428425076836, "grad_norm": 2.1875, "learning_rate": 4.771404813129722e-06, "loss": 0.9674, "step": 7770 }, { "epoch": 1.5376427375627797, "grad_norm": 2.25, "learning_rate": 4.7703521187614075e-06, "loss": 0.9746, "step": 7771 }, { "epoch": 1.5378426326178756, "grad_norm": 2.40625, "learning_rate": 4.769299434594026e-06, "loss": 0.964, "step": 7772 }, { "epoch": 1.5380425276729717, "grad_norm": 2.046875, "learning_rate": 4.76824676067434e-06, "loss": 0.9537, "step": 7773 }, { "epoch": 1.5382424227280678, "grad_norm": 2.171875, "learning_rate": 4.767194097049108e-06, "loss": 0.9013, "step": 7774 }, { "epoch": 1.5384423177831639, "grad_norm": 2.171875, "learning_rate": 4.766141443765088e-06, "loss": 0.9636, "step": 7775 }, { "epoch": 1.5386422128382597, "grad_norm": 2.15625, "learning_rate": 4.765088800869041e-06, "loss": 0.9825, "step": 7776 }, { "epoch": 1.5388421078933558, "grad_norm": 2.265625, "learning_rate": 4.764036168407726e-06, "loss": 0.9128, "step": 7777 }, { "epoch": 1.539042002948452, "grad_norm": 2.140625, "learning_rate": 4.762983546427897e-06, "loss": 0.9874, "step": 7778 }, { "epoch": 1.539241898003548, "grad_norm": 2.0625, "learning_rate": 4.761930934976315e-06, "loss": 0.9349, "step": 7779 }, { "epoch": 1.5394417930586441, "grad_norm": 2.140625, "learning_rate": 4.760878334099734e-06, "loss": 0.9253, "step": 7780 }, { "epoch": 1.5396416881137402, "grad_norm": 2.078125, "learning_rate": 4.759825743844912e-06, "loss": 0.9425, "step": 7781 }, { "epoch": 1.5398415831688363, "grad_norm": 2.09375, "learning_rate": 4.758773164258606e-06, "loss": 0.98, "step": 7782 }, { "epoch": 1.5400414782239324, "grad_norm": 2.0625, "learning_rate": 4.75772059538757e-06, "loss": 1.0028, "step": 7783 }, { "epoch": 1.5402413732790285, "grad_norm": 2.171875, "learning_rate": 4.7566680372785575e-06, "loss": 0.9386, "step": 7784 }, { "epoch": 1.5404412683341246, "grad_norm": 2.390625, "learning_rate": 4.755615489978328e-06, "loss": 1.011, "step": 7785 }, { "epoch": 1.5406411633892207, "grad_norm": 2.078125, "learning_rate": 4.7545629535336315e-06, "loss": 1.0177, "step": 7786 }, { "epoch": 1.5408410584443168, "grad_norm": 2.15625, "learning_rate": 4.753510427991221e-06, "loss": 0.9757, "step": 7787 }, { "epoch": 1.541040953499413, "grad_norm": 2.078125, "learning_rate": 4.752457913397851e-06, "loss": 0.967, "step": 7788 }, { "epoch": 1.541240848554509, "grad_norm": 2.09375, "learning_rate": 4.751405409800275e-06, "loss": 0.8561, "step": 7789 }, { "epoch": 1.541440743609605, "grad_norm": 2.28125, "learning_rate": 4.750352917245244e-06, "loss": 0.9998, "step": 7790 }, { "epoch": 1.541640638664701, "grad_norm": 2.15625, "learning_rate": 4.74930043577951e-06, "loss": 0.9589, "step": 7791 }, { "epoch": 1.5418405337197971, "grad_norm": 2.1875, "learning_rate": 4.748247965449822e-06, "loss": 0.9633, "step": 7792 }, { "epoch": 1.5420404287748932, "grad_norm": 2.203125, "learning_rate": 4.747195506302933e-06, "loss": 0.9434, "step": 7793 }, { "epoch": 1.542240323829989, "grad_norm": 2.125, "learning_rate": 4.746143058385594e-06, "loss": 0.9524, "step": 7794 }, { "epoch": 1.5424402188850852, "grad_norm": 2.171875, "learning_rate": 4.745090621744552e-06, "loss": 0.9472, "step": 7795 }, { "epoch": 1.5426401139401813, "grad_norm": 2.125, "learning_rate": 4.744038196426558e-06, "loss": 1.007, "step": 7796 }, { "epoch": 1.5428400089952774, "grad_norm": 2.078125, "learning_rate": 4.742985782478361e-06, "loss": 0.9061, "step": 7797 }, { "epoch": 1.5430399040503735, "grad_norm": 2.15625, "learning_rate": 4.741933379946706e-06, "loss": 1.0119, "step": 7798 }, { "epoch": 1.5432397991054696, "grad_norm": 2.046875, "learning_rate": 4.740880988878345e-06, "loss": 0.9332, "step": 7799 }, { "epoch": 1.5434396941605657, "grad_norm": 2.140625, "learning_rate": 4.739828609320021e-06, "loss": 0.8967, "step": 7800 }, { "epoch": 1.5436395892156618, "grad_norm": 2.1875, "learning_rate": 4.738776241318482e-06, "loss": 0.8662, "step": 7801 }, { "epoch": 1.543839484270758, "grad_norm": 2.125, "learning_rate": 4.737723884920476e-06, "loss": 0.9848, "step": 7802 }, { "epoch": 1.544039379325854, "grad_norm": 2.1875, "learning_rate": 4.736671540172746e-06, "loss": 1.0055, "step": 7803 }, { "epoch": 1.54423927438095, "grad_norm": 2.015625, "learning_rate": 4.735619207122038e-06, "loss": 0.9009, "step": 7804 }, { "epoch": 1.5444391694360462, "grad_norm": 2.1875, "learning_rate": 4.734566885815098e-06, "loss": 1.0093, "step": 7805 }, { "epoch": 1.5446390644911423, "grad_norm": 2.15625, "learning_rate": 4.733514576298666e-06, "loss": 0.9394, "step": 7806 }, { "epoch": 1.5448389595462382, "grad_norm": 2.296875, "learning_rate": 4.732462278619491e-06, "loss": 1.0647, "step": 7807 }, { "epoch": 1.5450388546013343, "grad_norm": 2.140625, "learning_rate": 4.731409992824311e-06, "loss": 0.9244, "step": 7808 }, { "epoch": 1.5452387496564304, "grad_norm": 2.078125, "learning_rate": 4.730357718959871e-06, "loss": 0.9751, "step": 7809 }, { "epoch": 1.5454386447115265, "grad_norm": 2.125, "learning_rate": 4.729305457072913e-06, "loss": 0.9699, "step": 7810 }, { "epoch": 1.5456385397666226, "grad_norm": 2.09375, "learning_rate": 4.728253207210176e-06, "loss": 0.9998, "step": 7811 }, { "epoch": 1.5458384348217185, "grad_norm": 2.09375, "learning_rate": 4.727200969418404e-06, "loss": 0.9604, "step": 7812 }, { "epoch": 1.5460383298768146, "grad_norm": 2.21875, "learning_rate": 4.726148743744336e-06, "loss": 1.0473, "step": 7813 }, { "epoch": 1.5462382249319107, "grad_norm": 2.4375, "learning_rate": 4.72509653023471e-06, "loss": 0.9781, "step": 7814 }, { "epoch": 1.5464381199870068, "grad_norm": 2.046875, "learning_rate": 4.724044328936268e-06, "loss": 0.9372, "step": 7815 }, { "epoch": 1.5466380150421029, "grad_norm": 2.109375, "learning_rate": 4.722992139895748e-06, "loss": 0.9463, "step": 7816 }, { "epoch": 1.546837910097199, "grad_norm": 1.9921875, "learning_rate": 4.7219399631598875e-06, "loss": 0.888, "step": 7817 }, { "epoch": 1.547037805152295, "grad_norm": 2.125, "learning_rate": 4.720887798775424e-06, "loss": 0.978, "step": 7818 }, { "epoch": 1.5472377002073912, "grad_norm": 2.09375, "learning_rate": 4.719835646789094e-06, "loss": 0.9491, "step": 7819 }, { "epoch": 1.5474375952624873, "grad_norm": 2.140625, "learning_rate": 4.718783507247634e-06, "loss": 0.9715, "step": 7820 }, { "epoch": 1.5476374903175834, "grad_norm": 2.203125, "learning_rate": 4.717731380197782e-06, "loss": 0.9565, "step": 7821 }, { "epoch": 1.5478373853726795, "grad_norm": 2.21875, "learning_rate": 4.716679265686271e-06, "loss": 0.9697, "step": 7822 }, { "epoch": 1.5480372804277756, "grad_norm": 2.15625, "learning_rate": 4.715627163759837e-06, "loss": 0.9604, "step": 7823 }, { "epoch": 1.5482371754828717, "grad_norm": 2.109375, "learning_rate": 4.714575074465213e-06, "loss": 1.0961, "step": 7824 }, { "epoch": 1.5484370705379675, "grad_norm": 2.125, "learning_rate": 4.7135229978491354e-06, "loss": 1.0226, "step": 7825 }, { "epoch": 1.5486369655930636, "grad_norm": 2.171875, "learning_rate": 4.7124709339583345e-06, "loss": 0.9901, "step": 7826 }, { "epoch": 1.5488368606481597, "grad_norm": 2.046875, "learning_rate": 4.711418882839544e-06, "loss": 0.9758, "step": 7827 }, { "epoch": 1.5490367557032558, "grad_norm": 2.1875, "learning_rate": 4.710366844539497e-06, "loss": 1.0121, "step": 7828 }, { "epoch": 1.5492366507583517, "grad_norm": 2.15625, "learning_rate": 4.7093148191049224e-06, "loss": 0.8907, "step": 7829 }, { "epoch": 1.5494365458134478, "grad_norm": 2.234375, "learning_rate": 4.708262806582554e-06, "loss": 0.9393, "step": 7830 }, { "epoch": 1.549636440868544, "grad_norm": 2.25, "learning_rate": 4.70721080701912e-06, "loss": 1.0503, "step": 7831 }, { "epoch": 1.54983633592364, "grad_norm": 2.203125, "learning_rate": 4.706158820461349e-06, "loss": 0.9926, "step": 7832 }, { "epoch": 1.550036230978736, "grad_norm": 2.21875, "learning_rate": 4.705106846955975e-06, "loss": 1.0444, "step": 7833 }, { "epoch": 1.5502361260338322, "grad_norm": 2.09375, "learning_rate": 4.704054886549721e-06, "loss": 0.8473, "step": 7834 }, { "epoch": 1.5504360210889283, "grad_norm": 2.109375, "learning_rate": 4.703002939289317e-06, "loss": 1.0376, "step": 7835 }, { "epoch": 1.5506359161440244, "grad_norm": 2.125, "learning_rate": 4.7019510052214924e-06, "loss": 1.0081, "step": 7836 }, { "epoch": 1.5508358111991205, "grad_norm": 2.125, "learning_rate": 4.7008990843929714e-06, "loss": 0.9817, "step": 7837 }, { "epoch": 1.5510357062542166, "grad_norm": 2.28125, "learning_rate": 4.699847176850482e-06, "loss": 1.0341, "step": 7838 }, { "epoch": 1.5512356013093127, "grad_norm": 2.140625, "learning_rate": 4.698795282640748e-06, "loss": 1.0323, "step": 7839 }, { "epoch": 1.5514354963644088, "grad_norm": 2.15625, "learning_rate": 4.6977434018104955e-06, "loss": 1.0385, "step": 7840 }, { "epoch": 1.551635391419505, "grad_norm": 2.1875, "learning_rate": 4.69669153440645e-06, "loss": 1.0385, "step": 7841 }, { "epoch": 1.5518352864746008, "grad_norm": 2.203125, "learning_rate": 4.695639680475332e-06, "loss": 0.8897, "step": 7842 }, { "epoch": 1.5520351815296969, "grad_norm": 2.15625, "learning_rate": 4.694587840063868e-06, "loss": 0.9869, "step": 7843 }, { "epoch": 1.552235076584793, "grad_norm": 2.140625, "learning_rate": 4.69353601321878e-06, "loss": 1.0172, "step": 7844 }, { "epoch": 1.552434971639889, "grad_norm": 2.0625, "learning_rate": 4.692484199986789e-06, "loss": 0.9036, "step": 7845 }, { "epoch": 1.5526348666949852, "grad_norm": 2.171875, "learning_rate": 4.691432400414617e-06, "loss": 0.9853, "step": 7846 }, { "epoch": 1.552834761750081, "grad_norm": 2.171875, "learning_rate": 4.690380614548986e-06, "loss": 1.0161, "step": 7847 }, { "epoch": 1.5530346568051772, "grad_norm": 2.171875, "learning_rate": 4.6893288424366145e-06, "loss": 0.9438, "step": 7848 }, { "epoch": 1.5532345518602733, "grad_norm": 2.109375, "learning_rate": 4.688277084124222e-06, "loss": 0.9814, "step": 7849 }, { "epoch": 1.5534344469153694, "grad_norm": 2.0625, "learning_rate": 4.687225339658529e-06, "loss": 0.954, "step": 7850 }, { "epoch": 1.5536343419704655, "grad_norm": 2.25, "learning_rate": 4.686173609086251e-06, "loss": 1.0238, "step": 7851 }, { "epoch": 1.5538342370255616, "grad_norm": 2.0625, "learning_rate": 4.685121892454109e-06, "loss": 0.9797, "step": 7852 }, { "epoch": 1.5540341320806577, "grad_norm": 2.078125, "learning_rate": 4.68407018980882e-06, "loss": 0.9091, "step": 7853 }, { "epoch": 1.5542340271357538, "grad_norm": 2.15625, "learning_rate": 4.683018501197098e-06, "loss": 0.9587, "step": 7854 }, { "epoch": 1.5544339221908499, "grad_norm": 2.078125, "learning_rate": 4.6819668266656594e-06, "loss": 0.8859, "step": 7855 }, { "epoch": 1.554633817245946, "grad_norm": 2.21875, "learning_rate": 4.680915166261223e-06, "loss": 1.0847, "step": 7856 }, { "epoch": 1.554833712301042, "grad_norm": 2.28125, "learning_rate": 4.679863520030499e-06, "loss": 1.0824, "step": 7857 }, { "epoch": 1.5550336073561382, "grad_norm": 2.25, "learning_rate": 4.678811888020204e-06, "loss": 0.9992, "step": 7858 }, { "epoch": 1.5552335024112343, "grad_norm": 2.171875, "learning_rate": 4.6777602702770505e-06, "loss": 0.9977, "step": 7859 }, { "epoch": 1.5554333974663301, "grad_norm": 2.203125, "learning_rate": 4.67670866684775e-06, "loss": 1.0028, "step": 7860 }, { "epoch": 1.5556332925214262, "grad_norm": 2.046875, "learning_rate": 4.675657077779019e-06, "loss": 0.894, "step": 7861 }, { "epoch": 1.5558331875765223, "grad_norm": 2.125, "learning_rate": 4.674605503117563e-06, "loss": 1.0168, "step": 7862 }, { "epoch": 1.5560330826316184, "grad_norm": 2.046875, "learning_rate": 4.673553942910097e-06, "loss": 0.9088, "step": 7863 }, { "epoch": 1.5562329776867143, "grad_norm": 2.0625, "learning_rate": 4.67250239720333e-06, "loss": 0.8533, "step": 7864 }, { "epoch": 1.5564328727418104, "grad_norm": 2.203125, "learning_rate": 4.6714508660439715e-06, "loss": 1.0101, "step": 7865 }, { "epoch": 1.5566327677969065, "grad_norm": 2.140625, "learning_rate": 4.67039934947873e-06, "loss": 0.9974, "step": 7866 }, { "epoch": 1.5568326628520026, "grad_norm": 2.015625, "learning_rate": 4.669347847554315e-06, "loss": 0.896, "step": 7867 }, { "epoch": 1.5570325579070987, "grad_norm": 2.109375, "learning_rate": 4.6682963603174326e-06, "loss": 0.9481, "step": 7868 }, { "epoch": 1.5572324529621948, "grad_norm": 2.15625, "learning_rate": 4.667244887814791e-06, "loss": 1.0765, "step": 7869 }, { "epoch": 1.557432348017291, "grad_norm": 2.140625, "learning_rate": 4.666193430093096e-06, "loss": 1.0258, "step": 7870 }, { "epoch": 1.557632243072387, "grad_norm": 2.125, "learning_rate": 4.665141987199052e-06, "loss": 0.938, "step": 7871 }, { "epoch": 1.5578321381274831, "grad_norm": 2.203125, "learning_rate": 4.664090559179367e-06, "loss": 1.0506, "step": 7872 }, { "epoch": 1.5580320331825792, "grad_norm": 2.234375, "learning_rate": 4.663039146080742e-06, "loss": 0.999, "step": 7873 }, { "epoch": 1.5582319282376753, "grad_norm": 2.234375, "learning_rate": 4.661987747949882e-06, "loss": 0.9825, "step": 7874 }, { "epoch": 1.5584318232927714, "grad_norm": 2.265625, "learning_rate": 4.660936364833492e-06, "loss": 0.8881, "step": 7875 }, { "epoch": 1.5586317183478675, "grad_norm": 2.296875, "learning_rate": 4.659884996778271e-06, "loss": 1.021, "step": 7876 }, { "epoch": 1.5588316134029634, "grad_norm": 2.21875, "learning_rate": 4.658833643830923e-06, "loss": 0.9397, "step": 7877 }, { "epoch": 1.5590315084580595, "grad_norm": 2.125, "learning_rate": 4.657782306038148e-06, "loss": 0.9747, "step": 7878 }, { "epoch": 1.5592314035131556, "grad_norm": 2.140625, "learning_rate": 4.6567309834466465e-06, "loss": 1.0115, "step": 7879 }, { "epoch": 1.5594312985682517, "grad_norm": 2.171875, "learning_rate": 4.6556796761031175e-06, "loss": 0.9302, "step": 7880 }, { "epoch": 1.5596311936233478, "grad_norm": 2.203125, "learning_rate": 4.654628384054261e-06, "loss": 1.0368, "step": 7881 }, { "epoch": 1.5598310886784437, "grad_norm": 2.15625, "learning_rate": 4.653577107346774e-06, "loss": 0.9975, "step": 7882 }, { "epoch": 1.5600309837335398, "grad_norm": 2.125, "learning_rate": 4.652525846027355e-06, "loss": 0.9942, "step": 7883 }, { "epoch": 1.5602308787886359, "grad_norm": 2.140625, "learning_rate": 4.651474600142701e-06, "loss": 0.8976, "step": 7884 }, { "epoch": 1.560430773843732, "grad_norm": 2.3125, "learning_rate": 4.650423369739508e-06, "loss": 0.9892, "step": 7885 }, { "epoch": 1.560630668898828, "grad_norm": 2.09375, "learning_rate": 4.649372154864471e-06, "loss": 0.9206, "step": 7886 }, { "epoch": 1.5608305639539242, "grad_norm": 2.140625, "learning_rate": 4.648320955564287e-06, "loss": 0.9585, "step": 7887 }, { "epoch": 1.5610304590090203, "grad_norm": 2.203125, "learning_rate": 4.647269771885648e-06, "loss": 0.9131, "step": 7888 }, { "epoch": 1.5612303540641164, "grad_norm": 2.1875, "learning_rate": 4.646218603875248e-06, "loss": 1.0652, "step": 7889 }, { "epoch": 1.5614302491192125, "grad_norm": 2.140625, "learning_rate": 4.645167451579779e-06, "loss": 1.0153, "step": 7890 }, { "epoch": 1.5616301441743086, "grad_norm": 2.265625, "learning_rate": 4.644116315045933e-06, "loss": 0.965, "step": 7891 }, { "epoch": 1.5618300392294047, "grad_norm": 2.140625, "learning_rate": 4.643065194320405e-06, "loss": 0.9428, "step": 7892 }, { "epoch": 1.5620299342845008, "grad_norm": 2.0625, "learning_rate": 4.642014089449881e-06, "loss": 0.9131, "step": 7893 }, { "epoch": 1.5622298293395969, "grad_norm": 2.203125, "learning_rate": 4.6409630004810535e-06, "loss": 0.9232, "step": 7894 }, { "epoch": 1.5624297243946927, "grad_norm": 2.34375, "learning_rate": 4.639911927460612e-06, "loss": 1.0809, "step": 7895 }, { "epoch": 1.5626296194497888, "grad_norm": 2.125, "learning_rate": 4.638860870435243e-06, "loss": 0.9853, "step": 7896 }, { "epoch": 1.562829514504885, "grad_norm": 2.140625, "learning_rate": 4.637809829451637e-06, "loss": 0.9465, "step": 7897 }, { "epoch": 1.563029409559981, "grad_norm": 2.09375, "learning_rate": 4.636758804556478e-06, "loss": 0.8824, "step": 7898 }, { "epoch": 1.563229304615077, "grad_norm": 2.203125, "learning_rate": 4.635707795796454e-06, "loss": 1.1537, "step": 7899 }, { "epoch": 1.563429199670173, "grad_norm": 2.25, "learning_rate": 4.634656803218252e-06, "loss": 0.9672, "step": 7900 }, { "epoch": 1.5636290947252691, "grad_norm": 2.0625, "learning_rate": 4.633605826868556e-06, "loss": 0.9607, "step": 7901 }, { "epoch": 1.5638289897803652, "grad_norm": 2.140625, "learning_rate": 4.632554866794048e-06, "loss": 0.951, "step": 7902 }, { "epoch": 1.5640288848354613, "grad_norm": 2.3125, "learning_rate": 4.631503923041416e-06, "loss": 0.9414, "step": 7903 }, { "epoch": 1.5642287798905574, "grad_norm": 2.046875, "learning_rate": 4.63045299565734e-06, "loss": 0.9421, "step": 7904 }, { "epoch": 1.5644286749456535, "grad_norm": 2.125, "learning_rate": 4.629402084688501e-06, "loss": 0.9926, "step": 7905 }, { "epoch": 1.5646285700007496, "grad_norm": 2.109375, "learning_rate": 4.628351190181583e-06, "loss": 0.9994, "step": 7906 }, { "epoch": 1.5648284650558457, "grad_norm": 2.328125, "learning_rate": 4.627300312183265e-06, "loss": 1.1636, "step": 7907 }, { "epoch": 1.5650283601109418, "grad_norm": 2.09375, "learning_rate": 4.6262494507402275e-06, "loss": 0.9685, "step": 7908 }, { "epoch": 1.565228255166038, "grad_norm": 2.0, "learning_rate": 4.625198605899152e-06, "loss": 0.9123, "step": 7909 }, { "epoch": 1.565428150221134, "grad_norm": 2.046875, "learning_rate": 4.624147777706711e-06, "loss": 0.9561, "step": 7910 }, { "epoch": 1.5656280452762301, "grad_norm": 2.171875, "learning_rate": 4.623096966209586e-06, "loss": 0.9164, "step": 7911 }, { "epoch": 1.5658279403313262, "grad_norm": 2.296875, "learning_rate": 4.622046171454454e-06, "loss": 0.9847, "step": 7912 }, { "epoch": 1.566027835386422, "grad_norm": 2.140625, "learning_rate": 4.620995393487991e-06, "loss": 0.9813, "step": 7913 }, { "epoch": 1.5662277304415182, "grad_norm": 2.046875, "learning_rate": 4.619944632356871e-06, "loss": 0.9009, "step": 7914 }, { "epoch": 1.5664276254966143, "grad_norm": 2.109375, "learning_rate": 4.618893888107772e-06, "loss": 0.9058, "step": 7915 }, { "epoch": 1.5666275205517104, "grad_norm": 2.140625, "learning_rate": 4.617843160787364e-06, "loss": 0.9579, "step": 7916 }, { "epoch": 1.5668274156068063, "grad_norm": 2.203125, "learning_rate": 4.616792450442321e-06, "loss": 0.9935, "step": 7917 }, { "epoch": 1.5670273106619024, "grad_norm": 2.140625, "learning_rate": 4.6157417571193184e-06, "loss": 0.8968, "step": 7918 }, { "epoch": 1.5672272057169985, "grad_norm": 2.15625, "learning_rate": 4.6146910808650255e-06, "loss": 0.9871, "step": 7919 }, { "epoch": 1.5674271007720946, "grad_norm": 2.171875, "learning_rate": 4.613640421726114e-06, "loss": 0.938, "step": 7920 }, { "epoch": 1.5676269958271907, "grad_norm": 2.1875, "learning_rate": 4.612589779749252e-06, "loss": 0.9615, "step": 7921 }, { "epoch": 1.5678268908822868, "grad_norm": 2.21875, "learning_rate": 4.611539154981112e-06, "loss": 0.948, "step": 7922 }, { "epoch": 1.5680267859373829, "grad_norm": 3.046875, "learning_rate": 4.610488547468361e-06, "loss": 0.984, "step": 7923 }, { "epoch": 1.568226680992479, "grad_norm": 2.171875, "learning_rate": 4.609437957257667e-06, "loss": 1.0223, "step": 7924 }, { "epoch": 1.568426576047575, "grad_norm": 2.125, "learning_rate": 4.608387384395697e-06, "loss": 0.9318, "step": 7925 }, { "epoch": 1.5686264711026712, "grad_norm": 2.203125, "learning_rate": 4.607336828929119e-06, "loss": 0.9919, "step": 7926 }, { "epoch": 1.5688263661577673, "grad_norm": 2.328125, "learning_rate": 4.606286290904595e-06, "loss": 1.0524, "step": 7927 }, { "epoch": 1.5690262612128634, "grad_norm": 2.25, "learning_rate": 4.605235770368794e-06, "loss": 0.9626, "step": 7928 }, { "epoch": 1.5692261562679595, "grad_norm": 2.5, "learning_rate": 4.604185267368376e-06, "loss": 0.9993, "step": 7929 }, { "epoch": 1.5694260513230553, "grad_norm": 2.0625, "learning_rate": 4.603134781950007e-06, "loss": 0.9542, "step": 7930 }, { "epoch": 1.5696259463781514, "grad_norm": 2.171875, "learning_rate": 4.6020843141603484e-06, "loss": 0.952, "step": 7931 }, { "epoch": 1.5698258414332475, "grad_norm": 2.3125, "learning_rate": 4.601033864046062e-06, "loss": 0.9748, "step": 7932 }, { "epoch": 1.5700257364883436, "grad_norm": 2.171875, "learning_rate": 4.599983431653809e-06, "loss": 1.0249, "step": 7933 }, { "epoch": 1.5702256315434397, "grad_norm": 2.171875, "learning_rate": 4.59893301703025e-06, "loss": 0.9893, "step": 7934 }, { "epoch": 1.5704255265985356, "grad_norm": 2.203125, "learning_rate": 4.597882620222043e-06, "loss": 0.9827, "step": 7935 }, { "epoch": 1.5706254216536317, "grad_norm": 2.21875, "learning_rate": 4.596832241275846e-06, "loss": 1.1081, "step": 7936 }, { "epoch": 1.5708253167087278, "grad_norm": 2.25, "learning_rate": 4.595781880238319e-06, "loss": 0.9389, "step": 7937 }, { "epoch": 1.571025211763824, "grad_norm": 2.234375, "learning_rate": 4.594731537156117e-06, "loss": 0.9455, "step": 7938 }, { "epoch": 1.57122510681892, "grad_norm": 2.234375, "learning_rate": 4.593681212075898e-06, "loss": 0.9311, "step": 7939 }, { "epoch": 1.5714250018740161, "grad_norm": 2.15625, "learning_rate": 4.592630905044317e-06, "loss": 0.9819, "step": 7940 }, { "epoch": 1.5716248969291122, "grad_norm": 2.25, "learning_rate": 4.591580616108026e-06, "loss": 1.0248, "step": 7941 }, { "epoch": 1.5718247919842083, "grad_norm": 2.046875, "learning_rate": 4.59053034531368e-06, "loss": 0.9329, "step": 7942 }, { "epoch": 1.5720246870393044, "grad_norm": 2.15625, "learning_rate": 4.589480092707933e-06, "loss": 1.0111, "step": 7943 }, { "epoch": 1.5722245820944005, "grad_norm": 2.203125, "learning_rate": 4.5884298583374356e-06, "loss": 0.9432, "step": 7944 }, { "epoch": 1.5724244771494966, "grad_norm": 2.15625, "learning_rate": 4.5873796422488405e-06, "loss": 1.0184, "step": 7945 }, { "epoch": 1.5726243722045927, "grad_norm": 2.046875, "learning_rate": 4.586329444488798e-06, "loss": 0.9221, "step": 7946 }, { "epoch": 1.5728242672596888, "grad_norm": 2.328125, "learning_rate": 4.585279265103957e-06, "loss": 1.0654, "step": 7947 }, { "epoch": 1.5730241623147847, "grad_norm": 2.171875, "learning_rate": 4.584229104140967e-06, "loss": 1.043, "step": 7948 }, { "epoch": 1.5732240573698808, "grad_norm": 2.15625, "learning_rate": 4.583178961646475e-06, "loss": 0.8936, "step": 7949 }, { "epoch": 1.573423952424977, "grad_norm": 2.234375, "learning_rate": 4.58212883766713e-06, "loss": 0.9949, "step": 7950 }, { "epoch": 1.573623847480073, "grad_norm": 2.109375, "learning_rate": 4.581078732249577e-06, "loss": 0.9255, "step": 7951 }, { "epoch": 1.5738237425351689, "grad_norm": 2.25, "learning_rate": 4.580028645440462e-06, "loss": 0.9435, "step": 7952 }, { "epoch": 1.574023637590265, "grad_norm": 2.453125, "learning_rate": 4.5789785772864295e-06, "loss": 1.0914, "step": 7953 }, { "epoch": 1.574223532645361, "grad_norm": 2.21875, "learning_rate": 4.577928527834124e-06, "loss": 1.0328, "step": 7954 }, { "epoch": 1.5744234277004572, "grad_norm": 2.109375, "learning_rate": 4.576878497130188e-06, "loss": 0.9528, "step": 7955 }, { "epoch": 1.5746233227555533, "grad_norm": 2.09375, "learning_rate": 4.575828485221263e-06, "loss": 0.9652, "step": 7956 }, { "epoch": 1.5748232178106494, "grad_norm": 2.03125, "learning_rate": 4.574778492153993e-06, "loss": 0.8972, "step": 7957 }, { "epoch": 1.5750231128657455, "grad_norm": 2.25, "learning_rate": 4.573728517975016e-06, "loss": 0.9544, "step": 7958 }, { "epoch": 1.5752230079208416, "grad_norm": 2.171875, "learning_rate": 4.5726785627309736e-06, "loss": 0.9876, "step": 7959 }, { "epoch": 1.5754229029759377, "grad_norm": 2.03125, "learning_rate": 4.571628626468503e-06, "loss": 0.9727, "step": 7960 }, { "epoch": 1.5756227980310338, "grad_norm": 2.171875, "learning_rate": 4.570578709234243e-06, "loss": 1.0302, "step": 7961 }, { "epoch": 1.5758226930861299, "grad_norm": 2.078125, "learning_rate": 4.5695288110748324e-06, "loss": 0.9296, "step": 7962 }, { "epoch": 1.576022588141226, "grad_norm": 2.046875, "learning_rate": 4.568478932036904e-06, "loss": 0.9386, "step": 7963 }, { "epoch": 1.576222483196322, "grad_norm": 2.109375, "learning_rate": 4.567429072167095e-06, "loss": 0.9132, "step": 7964 }, { "epoch": 1.576422378251418, "grad_norm": 2.09375, "learning_rate": 4.566379231512042e-06, "loss": 0.9991, "step": 7965 }, { "epoch": 1.576622273306514, "grad_norm": 2.171875, "learning_rate": 4.565329410118377e-06, "loss": 0.927, "step": 7966 }, { "epoch": 1.5768221683616102, "grad_norm": 2.234375, "learning_rate": 4.564279608032734e-06, "loss": 0.9367, "step": 7967 }, { "epoch": 1.5770220634167063, "grad_norm": 2.28125, "learning_rate": 4.563229825301743e-06, "loss": 1.0996, "step": 7968 }, { "epoch": 1.5772219584718024, "grad_norm": 2.15625, "learning_rate": 4.562180061972036e-06, "loss": 0.9513, "step": 7969 }, { "epoch": 1.5774218535268982, "grad_norm": 2.21875, "learning_rate": 4.561130318090246e-06, "loss": 1.0707, "step": 7970 }, { "epoch": 1.5776217485819943, "grad_norm": 2.296875, "learning_rate": 4.560080593703e-06, "loss": 0.9936, "step": 7971 }, { "epoch": 1.5778216436370904, "grad_norm": 2.125, "learning_rate": 4.5590308888569256e-06, "loss": 0.952, "step": 7972 }, { "epoch": 1.5780215386921865, "grad_norm": 2.265625, "learning_rate": 4.557981203598652e-06, "loss": 0.9375, "step": 7973 }, { "epoch": 1.5782214337472826, "grad_norm": 2.125, "learning_rate": 4.556931537974808e-06, "loss": 0.9408, "step": 7974 }, { "epoch": 1.5784213288023787, "grad_norm": 2.25, "learning_rate": 4.555881892032016e-06, "loss": 1.037, "step": 7975 }, { "epoch": 1.5786212238574748, "grad_norm": 2.15625, "learning_rate": 4.554832265816903e-06, "loss": 0.9225, "step": 7976 }, { "epoch": 1.578821118912571, "grad_norm": 2.109375, "learning_rate": 4.553782659376094e-06, "loss": 0.9012, "step": 7977 }, { "epoch": 1.579021013967667, "grad_norm": 2.203125, "learning_rate": 4.552733072756211e-06, "loss": 0.994, "step": 7978 }, { "epoch": 1.5792209090227631, "grad_norm": 2.265625, "learning_rate": 4.551683506003878e-06, "loss": 0.9952, "step": 7979 }, { "epoch": 1.5794208040778592, "grad_norm": 2.25, "learning_rate": 4.5506339591657145e-06, "loss": 0.9756, "step": 7980 }, { "epoch": 1.5796206991329553, "grad_norm": 2.046875, "learning_rate": 4.549584432288343e-06, "loss": 0.9691, "step": 7981 }, { "epoch": 1.5798205941880514, "grad_norm": 2.21875, "learning_rate": 4.548534925418383e-06, "loss": 1.0193, "step": 7982 }, { "epoch": 1.5800204892431473, "grad_norm": 2.296875, "learning_rate": 4.547485438602453e-06, "loss": 1.0499, "step": 7983 }, { "epoch": 1.5802203842982434, "grad_norm": 2.265625, "learning_rate": 4.546435971887172e-06, "loss": 0.9644, "step": 7984 }, { "epoch": 1.5804202793533395, "grad_norm": 2.21875, "learning_rate": 4.545386525319157e-06, "loss": 0.9267, "step": 7985 }, { "epoch": 1.5806201744084356, "grad_norm": 2.140625, "learning_rate": 4.544337098945023e-06, "loss": 1.0208, "step": 7986 }, { "epoch": 1.5808200694635315, "grad_norm": 2.15625, "learning_rate": 4.543287692811388e-06, "loss": 0.903, "step": 7987 }, { "epoch": 1.5810199645186276, "grad_norm": 2.125, "learning_rate": 4.542238306964863e-06, "loss": 0.9197, "step": 7988 }, { "epoch": 1.5812198595737237, "grad_norm": 2.28125, "learning_rate": 4.5411889414520634e-06, "loss": 1.0371, "step": 7989 }, { "epoch": 1.5814197546288198, "grad_norm": 2.140625, "learning_rate": 4.540139596319604e-06, "loss": 0.985, "step": 7990 }, { "epoch": 1.5816196496839159, "grad_norm": 2.109375, "learning_rate": 4.539090271614094e-06, "loss": 0.8718, "step": 7991 }, { "epoch": 1.581819544739012, "grad_norm": 1.96875, "learning_rate": 4.538040967382144e-06, "loss": 0.9993, "step": 7992 }, { "epoch": 1.582019439794108, "grad_norm": 2.203125, "learning_rate": 4.536991683670366e-06, "loss": 1.0313, "step": 7993 }, { "epoch": 1.5822193348492042, "grad_norm": 2.109375, "learning_rate": 4.5359424205253665e-06, "loss": 1.0063, "step": 7994 }, { "epoch": 1.5824192299043003, "grad_norm": 2.03125, "learning_rate": 4.534893177993756e-06, "loss": 1.0112, "step": 7995 }, { "epoch": 1.5826191249593964, "grad_norm": 2.125, "learning_rate": 4.533843956122142e-06, "loss": 0.9603, "step": 7996 }, { "epoch": 1.5828190200144925, "grad_norm": 2.046875, "learning_rate": 4.532794754957128e-06, "loss": 0.9179, "step": 7997 }, { "epoch": 1.5830189150695886, "grad_norm": 2.078125, "learning_rate": 4.531745574545322e-06, "loss": 0.9546, "step": 7998 }, { "epoch": 1.5832188101246847, "grad_norm": 2.171875, "learning_rate": 4.5306964149333265e-06, "loss": 0.9534, "step": 7999 }, { "epoch": 1.5834187051797806, "grad_norm": 2.328125, "learning_rate": 4.529647276167747e-06, "loss": 0.9989, "step": 8000 }, { "epoch": 1.5836186002348767, "grad_norm": 2.15625, "learning_rate": 4.5285981582951855e-06, "loss": 0.9472, "step": 8001 }, { "epoch": 1.5838184952899728, "grad_norm": 2.171875, "learning_rate": 4.527549061362245e-06, "loss": 0.98, "step": 8002 }, { "epoch": 1.5840183903450689, "grad_norm": 2.28125, "learning_rate": 4.526499985415522e-06, "loss": 1.0411, "step": 8003 }, { "epoch": 1.584218285400165, "grad_norm": 2.109375, "learning_rate": 4.5254509305016195e-06, "loss": 0.9677, "step": 8004 }, { "epoch": 1.5844181804552608, "grad_norm": 2.140625, "learning_rate": 4.524401896667137e-06, "loss": 0.952, "step": 8005 }, { "epoch": 1.584618075510357, "grad_norm": 2.03125, "learning_rate": 4.52335288395867e-06, "loss": 0.8664, "step": 8006 }, { "epoch": 1.584817970565453, "grad_norm": 2.046875, "learning_rate": 4.522303892422818e-06, "loss": 0.9897, "step": 8007 }, { "epoch": 1.5850178656205491, "grad_norm": 2.296875, "learning_rate": 4.521254922106176e-06, "loss": 0.9461, "step": 8008 }, { "epoch": 1.5852177606756452, "grad_norm": 2.125, "learning_rate": 4.520205973055339e-06, "loss": 0.9887, "step": 8009 }, { "epoch": 1.5854176557307413, "grad_norm": 2.21875, "learning_rate": 4.519157045316902e-06, "loss": 0.9829, "step": 8010 }, { "epoch": 1.5856175507858374, "grad_norm": 2.078125, "learning_rate": 4.518108138937457e-06, "loss": 0.9488, "step": 8011 }, { "epoch": 1.5858174458409335, "grad_norm": 2.234375, "learning_rate": 4.517059253963596e-06, "loss": 0.8656, "step": 8012 }, { "epoch": 1.5860173408960296, "grad_norm": 2.03125, "learning_rate": 4.516010390441914e-06, "loss": 0.8928, "step": 8013 }, { "epoch": 1.5862172359511257, "grad_norm": 2.109375, "learning_rate": 4.5149615484189965e-06, "loss": 0.8735, "step": 8014 }, { "epoch": 1.5864171310062218, "grad_norm": 2.203125, "learning_rate": 4.513912727941437e-06, "loss": 0.9873, "step": 8015 }, { "epoch": 1.586617026061318, "grad_norm": 2.046875, "learning_rate": 4.512863929055821e-06, "loss": 0.9418, "step": 8016 }, { "epoch": 1.586816921116414, "grad_norm": 2.109375, "learning_rate": 4.511815151808737e-06, "loss": 0.9767, "step": 8017 }, { "epoch": 1.58701681617151, "grad_norm": 2.125, "learning_rate": 4.510766396246773e-06, "loss": 1.0002, "step": 8018 }, { "epoch": 1.587216711226606, "grad_norm": 2.15625, "learning_rate": 4.5097176624165125e-06, "loss": 0.9911, "step": 8019 }, { "epoch": 1.5874166062817021, "grad_norm": 2.09375, "learning_rate": 4.508668950364541e-06, "loss": 0.8995, "step": 8020 }, { "epoch": 1.5876165013367982, "grad_norm": 2.25, "learning_rate": 4.507620260137443e-06, "loss": 1.0113, "step": 8021 }, { "epoch": 1.587816396391894, "grad_norm": 2.171875, "learning_rate": 4.5065715917818e-06, "loss": 0.9905, "step": 8022 }, { "epoch": 1.5880162914469902, "grad_norm": 2.078125, "learning_rate": 4.5055229453441935e-06, "loss": 0.9579, "step": 8023 }, { "epoch": 1.5882161865020863, "grad_norm": 2.1875, "learning_rate": 4.504474320871206e-06, "loss": 1.0448, "step": 8024 }, { "epoch": 1.5884160815571824, "grad_norm": 2.0625, "learning_rate": 4.503425718409416e-06, "loss": 0.9604, "step": 8025 }, { "epoch": 1.5886159766122785, "grad_norm": 2.125, "learning_rate": 4.502377138005402e-06, "loss": 1.0344, "step": 8026 }, { "epoch": 1.5888158716673746, "grad_norm": 2.171875, "learning_rate": 4.501328579705744e-06, "loss": 1.0418, "step": 8027 }, { "epoch": 1.5890157667224707, "grad_norm": 2.078125, "learning_rate": 4.500280043557015e-06, "loss": 0.9852, "step": 8028 }, { "epoch": 1.5892156617775668, "grad_norm": 2.0625, "learning_rate": 4.499231529605795e-06, "loss": 0.8487, "step": 8029 }, { "epoch": 1.589415556832663, "grad_norm": 2.140625, "learning_rate": 4.498183037898656e-06, "loss": 1.0181, "step": 8030 }, { "epoch": 1.589615451887759, "grad_norm": 2.109375, "learning_rate": 4.497134568482172e-06, "loss": 0.9443, "step": 8031 }, { "epoch": 1.589815346942855, "grad_norm": 2.28125, "learning_rate": 4.496086121402919e-06, "loss": 1.0473, "step": 8032 }, { "epoch": 1.5900152419979512, "grad_norm": 2.1875, "learning_rate": 4.495037696707467e-06, "loss": 0.9742, "step": 8033 }, { "epoch": 1.5902151370530473, "grad_norm": 2.265625, "learning_rate": 4.493989294442385e-06, "loss": 1.0032, "step": 8034 }, { "epoch": 1.5904150321081434, "grad_norm": 2.03125, "learning_rate": 4.492940914654244e-06, "loss": 0.9712, "step": 8035 }, { "epoch": 1.5906149271632393, "grad_norm": 2.125, "learning_rate": 4.491892557389614e-06, "loss": 1.0108, "step": 8036 }, { "epoch": 1.5908148222183354, "grad_norm": 2.21875, "learning_rate": 4.4908442226950625e-06, "loss": 1.0197, "step": 8037 }, { "epoch": 1.5910147172734315, "grad_norm": 2.046875, "learning_rate": 4.489795910617156e-06, "loss": 0.998, "step": 8038 }, { "epoch": 1.5912146123285276, "grad_norm": 2.203125, "learning_rate": 4.488747621202461e-06, "loss": 0.8976, "step": 8039 }, { "epoch": 1.5914145073836234, "grad_norm": 2.171875, "learning_rate": 4.487699354497541e-06, "loss": 0.9769, "step": 8040 }, { "epoch": 1.5916144024387195, "grad_norm": 2.265625, "learning_rate": 4.4866511105489625e-06, "loss": 0.9799, "step": 8041 }, { "epoch": 1.5918142974938156, "grad_norm": 1.9296875, "learning_rate": 4.485602889403286e-06, "loss": 0.7528, "step": 8042 }, { "epoch": 1.5920141925489117, "grad_norm": 2.25, "learning_rate": 4.484554691107073e-06, "loss": 0.9563, "step": 8043 }, { "epoch": 1.5922140876040078, "grad_norm": 2.125, "learning_rate": 4.4835065157068875e-06, "loss": 0.9822, "step": 8044 }, { "epoch": 1.592413982659104, "grad_norm": 2.15625, "learning_rate": 4.4824583632492866e-06, "loss": 1.0397, "step": 8045 }, { "epoch": 1.5926138777142, "grad_norm": 2.296875, "learning_rate": 4.48141023378083e-06, "loss": 1.0909, "step": 8046 }, { "epoch": 1.5928137727692961, "grad_norm": 2.125, "learning_rate": 4.480362127348075e-06, "loss": 0.9063, "step": 8047 }, { "epoch": 1.5930136678243922, "grad_norm": 2.0, "learning_rate": 4.479314043997579e-06, "loss": 0.9052, "step": 8048 }, { "epoch": 1.5932135628794883, "grad_norm": 2.125, "learning_rate": 4.478265983775899e-06, "loss": 0.9554, "step": 8049 }, { "epoch": 1.5934134579345844, "grad_norm": 2.15625, "learning_rate": 4.4772179467295865e-06, "loss": 0.919, "step": 8050 }, { "epoch": 1.5936133529896805, "grad_norm": 2.15625, "learning_rate": 4.476169932905197e-06, "loss": 0.9487, "step": 8051 }, { "epoch": 1.5938132480447766, "grad_norm": 2.15625, "learning_rate": 4.475121942349285e-06, "loss": 1.0724, "step": 8052 }, { "epoch": 1.5940131430998725, "grad_norm": 2.078125, "learning_rate": 4.474073975108398e-06, "loss": 0.9217, "step": 8053 }, { "epoch": 1.5942130381549686, "grad_norm": 2.359375, "learning_rate": 4.47302603122909e-06, "loss": 1.0427, "step": 8054 }, { "epoch": 1.5944129332100647, "grad_norm": 2.09375, "learning_rate": 4.47197811075791e-06, "loss": 0.8782, "step": 8055 }, { "epoch": 1.5946128282651608, "grad_norm": 2.09375, "learning_rate": 4.470930213741405e-06, "loss": 0.9156, "step": 8056 }, { "epoch": 1.594812723320257, "grad_norm": 2.15625, "learning_rate": 4.469882340226124e-06, "loss": 1.0549, "step": 8057 }, { "epoch": 1.5950126183753528, "grad_norm": 2.1875, "learning_rate": 4.468834490258612e-06, "loss": 0.9348, "step": 8058 }, { "epoch": 1.595212513430449, "grad_norm": 2.109375, "learning_rate": 4.467786663885415e-06, "loss": 0.959, "step": 8059 }, { "epoch": 1.595412408485545, "grad_norm": 2.15625, "learning_rate": 4.4667388611530785e-06, "loss": 1.0165, "step": 8060 }, { "epoch": 1.595612303540641, "grad_norm": 2.15625, "learning_rate": 4.465691082108145e-06, "loss": 0.9735, "step": 8061 }, { "epoch": 1.5958121985957372, "grad_norm": 2.3125, "learning_rate": 4.464643326797155e-06, "loss": 1.0059, "step": 8062 }, { "epoch": 1.5960120936508333, "grad_norm": 2.171875, "learning_rate": 4.463595595266653e-06, "loss": 1.0127, "step": 8063 }, { "epoch": 1.5962119887059294, "grad_norm": 2.3125, "learning_rate": 4.4625478875631785e-06, "loss": 0.9852, "step": 8064 }, { "epoch": 1.5964118837610255, "grad_norm": 2.25, "learning_rate": 4.4615002037332675e-06, "loss": 1.0747, "step": 8065 }, { "epoch": 1.5966117788161216, "grad_norm": 2.1875, "learning_rate": 4.460452543823459e-06, "loss": 1.091, "step": 8066 }, { "epoch": 1.5968116738712177, "grad_norm": 2.15625, "learning_rate": 4.459404907880293e-06, "loss": 0.9338, "step": 8067 }, { "epoch": 1.5970115689263138, "grad_norm": 2.125, "learning_rate": 4.458357295950302e-06, "loss": 0.9632, "step": 8068 }, { "epoch": 1.59721146398141, "grad_norm": 2.375, "learning_rate": 4.457309708080022e-06, "loss": 0.9838, "step": 8069 }, { "epoch": 1.597411359036506, "grad_norm": 2.125, "learning_rate": 4.456262144315987e-06, "loss": 0.9952, "step": 8070 }, { "epoch": 1.5976112540916019, "grad_norm": 2.09375, "learning_rate": 4.455214604704729e-06, "loss": 0.9566, "step": 8071 }, { "epoch": 1.597811149146698, "grad_norm": 2.109375, "learning_rate": 4.454167089292781e-06, "loss": 0.9163, "step": 8072 }, { "epoch": 1.598011044201794, "grad_norm": 2.109375, "learning_rate": 4.453119598126672e-06, "loss": 0.8876, "step": 8073 }, { "epoch": 1.5982109392568902, "grad_norm": 2.203125, "learning_rate": 4.452072131252932e-06, "loss": 1.0028, "step": 8074 }, { "epoch": 1.598410834311986, "grad_norm": 2.1875, "learning_rate": 4.45102468871809e-06, "loss": 0.947, "step": 8075 }, { "epoch": 1.5986107293670822, "grad_norm": 2.109375, "learning_rate": 4.449977270568672e-06, "loss": 0.9892, "step": 8076 }, { "epoch": 1.5988106244221783, "grad_norm": 1.984375, "learning_rate": 4.4489298768512065e-06, "loss": 0.9009, "step": 8077 }, { "epoch": 1.5990105194772744, "grad_norm": 2.078125, "learning_rate": 4.4478825076122155e-06, "loss": 0.9217, "step": 8078 }, { "epoch": 1.5992104145323705, "grad_norm": 2.0625, "learning_rate": 4.446835162898224e-06, "loss": 0.9853, "step": 8079 }, { "epoch": 1.5994103095874665, "grad_norm": 2.3125, "learning_rate": 4.445787842755756e-06, "loss": 1.0672, "step": 8080 }, { "epoch": 1.5996102046425626, "grad_norm": 2.125, "learning_rate": 4.444740547231334e-06, "loss": 0.9832, "step": 8081 }, { "epoch": 1.5998100996976587, "grad_norm": 2.1875, "learning_rate": 4.443693276371476e-06, "loss": 0.9773, "step": 8082 }, { "epoch": 1.6000099947527548, "grad_norm": 2.109375, "learning_rate": 4.442646030222704e-06, "loss": 0.9381, "step": 8083 }, { "epoch": 1.600209889807851, "grad_norm": 2.171875, "learning_rate": 4.441598808831536e-06, "loss": 1.1051, "step": 8084 }, { "epoch": 1.600409784862947, "grad_norm": 2.390625, "learning_rate": 4.440551612244489e-06, "loss": 0.9364, "step": 8085 }, { "epoch": 1.6006096799180431, "grad_norm": 2.296875, "learning_rate": 4.4395044405080805e-06, "loss": 1.0837, "step": 8086 }, { "epoch": 1.6008095749731392, "grad_norm": 2.046875, "learning_rate": 4.438457293668823e-06, "loss": 0.9065, "step": 8087 }, { "epoch": 1.6010094700282351, "grad_norm": 2.1875, "learning_rate": 4.437410171773234e-06, "loss": 0.9178, "step": 8088 }, { "epoch": 1.6012093650833312, "grad_norm": 2.171875, "learning_rate": 4.436363074867824e-06, "loss": 0.9614, "step": 8089 }, { "epoch": 1.6014092601384273, "grad_norm": 2.125, "learning_rate": 4.435316002999107e-06, "loss": 0.9914, "step": 8090 }, { "epoch": 1.6016091551935234, "grad_norm": 2.140625, "learning_rate": 4.434268956213593e-06, "loss": 0.9617, "step": 8091 }, { "epoch": 1.6018090502486195, "grad_norm": 2.15625, "learning_rate": 4.43322193455779e-06, "loss": 1.0329, "step": 8092 }, { "epoch": 1.6020089453037154, "grad_norm": 2.140625, "learning_rate": 4.432174938078209e-06, "loss": 0.9898, "step": 8093 }, { "epoch": 1.6022088403588115, "grad_norm": 2.203125, "learning_rate": 4.431127966821358e-06, "loss": 1.0177, "step": 8094 }, { "epoch": 1.6024087354139076, "grad_norm": 2.078125, "learning_rate": 4.4300810208337425e-06, "loss": 0.9509, "step": 8095 }, { "epoch": 1.6026086304690037, "grad_norm": 2.265625, "learning_rate": 4.4290341001618645e-06, "loss": 0.9996, "step": 8096 }, { "epoch": 1.6028085255240998, "grad_norm": 2.296875, "learning_rate": 4.4279872048522316e-06, "loss": 1.0083, "step": 8097 }, { "epoch": 1.603008420579196, "grad_norm": 2.1875, "learning_rate": 4.426940334951347e-06, "loss": 0.8973, "step": 8098 }, { "epoch": 1.603208315634292, "grad_norm": 2.125, "learning_rate": 4.42589349050571e-06, "loss": 0.9218, "step": 8099 }, { "epoch": 1.603408210689388, "grad_norm": 2.328125, "learning_rate": 4.424846671561824e-06, "loss": 0.9154, "step": 8100 }, { "epoch": 1.6036081057444842, "grad_norm": 2.03125, "learning_rate": 4.423799878166187e-06, "loss": 1.0048, "step": 8101 }, { "epoch": 1.6038080007995803, "grad_norm": 2.0625, "learning_rate": 4.422753110365297e-06, "loss": 1.0094, "step": 8102 }, { "epoch": 1.6040078958546764, "grad_norm": 2.1875, "learning_rate": 4.4217063682056534e-06, "loss": 1.0047, "step": 8103 }, { "epoch": 1.6042077909097725, "grad_norm": 2.203125, "learning_rate": 4.420659651733751e-06, "loss": 0.9212, "step": 8104 }, { "epoch": 1.6044076859648686, "grad_norm": 2.03125, "learning_rate": 4.419612960996083e-06, "loss": 0.935, "step": 8105 }, { "epoch": 1.6046075810199645, "grad_norm": 2.140625, "learning_rate": 4.418566296039148e-06, "loss": 0.9701, "step": 8106 }, { "epoch": 1.6048074760750606, "grad_norm": 2.265625, "learning_rate": 4.417519656909435e-06, "loss": 0.9098, "step": 8107 }, { "epoch": 1.6050073711301567, "grad_norm": 2.125, "learning_rate": 4.416473043653437e-06, "loss": 0.9055, "step": 8108 }, { "epoch": 1.6052072661852528, "grad_norm": 2.171875, "learning_rate": 4.415426456317644e-06, "loss": 0.9648, "step": 8109 }, { "epoch": 1.6054071612403487, "grad_norm": 2.0, "learning_rate": 4.4143798949485445e-06, "loss": 0.929, "step": 8110 }, { "epoch": 1.6056070562954448, "grad_norm": 2.125, "learning_rate": 4.4133333595926285e-06, "loss": 0.9921, "step": 8111 }, { "epoch": 1.6058069513505409, "grad_norm": 2.296875, "learning_rate": 4.4122868502963815e-06, "loss": 0.8996, "step": 8112 }, { "epoch": 1.606006846405637, "grad_norm": 2.140625, "learning_rate": 4.411240367106289e-06, "loss": 1.0227, "step": 8113 }, { "epoch": 1.606206741460733, "grad_norm": 2.296875, "learning_rate": 4.410193910068838e-06, "loss": 1.0014, "step": 8114 }, { "epoch": 1.6064066365158292, "grad_norm": 2.21875, "learning_rate": 4.40914747923051e-06, "loss": 0.9274, "step": 8115 }, { "epoch": 1.6066065315709253, "grad_norm": 2.15625, "learning_rate": 4.4081010746377875e-06, "loss": 0.9208, "step": 8116 }, { "epoch": 1.6068064266260214, "grad_norm": 2.25, "learning_rate": 4.407054696337153e-06, "loss": 1.0311, "step": 8117 }, { "epoch": 1.6070063216811175, "grad_norm": 2.1875, "learning_rate": 4.4060083443750844e-06, "loss": 0.9027, "step": 8118 }, { "epoch": 1.6072062167362136, "grad_norm": 2.171875, "learning_rate": 4.404962018798064e-06, "loss": 0.9151, "step": 8119 }, { "epoch": 1.6074061117913097, "grad_norm": 2.078125, "learning_rate": 4.403915719652565e-06, "loss": 0.9553, "step": 8120 }, { "epoch": 1.6076060068464058, "grad_norm": 2.15625, "learning_rate": 4.402869446985066e-06, "loss": 0.9868, "step": 8121 }, { "epoch": 1.6078059019015019, "grad_norm": 2.1875, "learning_rate": 4.401823200842043e-06, "loss": 1.0076, "step": 8122 }, { "epoch": 1.6080057969565977, "grad_norm": 2.15625, "learning_rate": 4.400776981269969e-06, "loss": 1.0536, "step": 8123 }, { "epoch": 1.6082056920116938, "grad_norm": 2.21875, "learning_rate": 4.399730788315317e-06, "loss": 0.9728, "step": 8124 }, { "epoch": 1.60840558706679, "grad_norm": 2.0625, "learning_rate": 4.398684622024562e-06, "loss": 0.9305, "step": 8125 }, { "epoch": 1.608605482121886, "grad_norm": 2.21875, "learning_rate": 4.397638482444171e-06, "loss": 1.0283, "step": 8126 }, { "epoch": 1.6088053771769821, "grad_norm": 2.0625, "learning_rate": 4.396592369620613e-06, "loss": 0.9209, "step": 8127 }, { "epoch": 1.609005272232078, "grad_norm": 2.203125, "learning_rate": 4.395546283600359e-06, "loss": 1.0647, "step": 8128 }, { "epoch": 1.609205167287174, "grad_norm": 2.34375, "learning_rate": 4.394500224429873e-06, "loss": 0.9947, "step": 8129 }, { "epoch": 1.6094050623422702, "grad_norm": 2.203125, "learning_rate": 4.393454192155621e-06, "loss": 0.9625, "step": 8130 }, { "epoch": 1.6096049573973663, "grad_norm": 2.140625, "learning_rate": 4.392408186824072e-06, "loss": 0.9379, "step": 8131 }, { "epoch": 1.6098048524524624, "grad_norm": 2.171875, "learning_rate": 4.391362208481685e-06, "loss": 1.0019, "step": 8132 }, { "epoch": 1.6100047475075585, "grad_norm": 2.046875, "learning_rate": 4.3903162571749234e-06, "loss": 0.8508, "step": 8133 }, { "epoch": 1.6102046425626546, "grad_norm": 2.09375, "learning_rate": 4.38927033295025e-06, "loss": 0.9127, "step": 8134 }, { "epoch": 1.6104045376177507, "grad_norm": 2.140625, "learning_rate": 4.388224435854121e-06, "loss": 0.9434, "step": 8135 }, { "epoch": 1.6106044326728468, "grad_norm": 2.046875, "learning_rate": 4.3871785659329985e-06, "loss": 0.9151, "step": 8136 }, { "epoch": 1.610804327727943, "grad_norm": 2.125, "learning_rate": 4.386132723233339e-06, "loss": 0.994, "step": 8137 }, { "epoch": 1.611004222783039, "grad_norm": 2.03125, "learning_rate": 4.385086907801598e-06, "loss": 0.932, "step": 8138 }, { "epoch": 1.611204117838135, "grad_norm": 2.171875, "learning_rate": 4.384041119684231e-06, "loss": 1.012, "step": 8139 }, { "epoch": 1.6114040128932312, "grad_norm": 2.15625, "learning_rate": 4.382995358927691e-06, "loss": 0.9277, "step": 8140 }, { "epoch": 1.611603907948327, "grad_norm": 2.0625, "learning_rate": 4.3819496255784314e-06, "loss": 0.9755, "step": 8141 }, { "epoch": 1.6118038030034232, "grad_norm": 2.328125, "learning_rate": 4.380903919682904e-06, "loss": 1.0356, "step": 8142 }, { "epoch": 1.6120036980585193, "grad_norm": 2.15625, "learning_rate": 4.379858241287558e-06, "loss": 0.979, "step": 8143 }, { "epoch": 1.6122035931136154, "grad_norm": 2.171875, "learning_rate": 4.378812590438843e-06, "loss": 0.9252, "step": 8144 }, { "epoch": 1.6124034881687113, "grad_norm": 2.171875, "learning_rate": 4.377766967183206e-06, "loss": 0.9043, "step": 8145 }, { "epoch": 1.6126033832238074, "grad_norm": 2.234375, "learning_rate": 4.376721371567094e-06, "loss": 0.9587, "step": 8146 }, { "epoch": 1.6128032782789035, "grad_norm": 2.15625, "learning_rate": 4.375675803636953e-06, "loss": 1.0145, "step": 8147 }, { "epoch": 1.6130031733339996, "grad_norm": 2.078125, "learning_rate": 4.374630263439225e-06, "loss": 0.9653, "step": 8148 }, { "epoch": 1.6132030683890957, "grad_norm": 2.25, "learning_rate": 4.3735847510203536e-06, "loss": 0.9276, "step": 8149 }, { "epoch": 1.6134029634441918, "grad_norm": 2.125, "learning_rate": 4.372539266426783e-06, "loss": 0.9764, "step": 8150 }, { "epoch": 1.6136028584992879, "grad_norm": 2.265625, "learning_rate": 4.371493809704948e-06, "loss": 1.016, "step": 8151 }, { "epoch": 1.613802753554384, "grad_norm": 2.125, "learning_rate": 4.3704483809012925e-06, "loss": 1.0168, "step": 8152 }, { "epoch": 1.61400264860948, "grad_norm": 2.203125, "learning_rate": 4.369402980062253e-06, "loss": 0.987, "step": 8153 }, { "epoch": 1.6142025436645762, "grad_norm": 2.140625, "learning_rate": 4.368357607234265e-06, "loss": 0.9511, "step": 8154 }, { "epoch": 1.6144024387196723, "grad_norm": 2.25, "learning_rate": 4.367312262463764e-06, "loss": 0.9389, "step": 8155 }, { "epoch": 1.6146023337747684, "grad_norm": 2.125, "learning_rate": 4.366266945797187e-06, "loss": 1.0277, "step": 8156 }, { "epoch": 1.6148022288298645, "grad_norm": 2.28125, "learning_rate": 4.3652216572809645e-06, "loss": 1.0851, "step": 8157 }, { "epoch": 1.6150021238849606, "grad_norm": 2.078125, "learning_rate": 4.3641763969615255e-06, "loss": 0.9183, "step": 8158 }, { "epoch": 1.6152020189400564, "grad_norm": 2.15625, "learning_rate": 4.363131164885306e-06, "loss": 0.9552, "step": 8159 }, { "epoch": 1.6154019139951525, "grad_norm": 2.171875, "learning_rate": 4.36208596109873e-06, "loss": 1.0006, "step": 8160 }, { "epoch": 1.6156018090502486, "grad_norm": 2.3125, "learning_rate": 4.361040785648227e-06, "loss": 1.0061, "step": 8161 }, { "epoch": 1.6158017041053447, "grad_norm": 2.015625, "learning_rate": 4.359995638580226e-06, "loss": 0.962, "step": 8162 }, { "epoch": 1.6160015991604406, "grad_norm": 2.25, "learning_rate": 4.358950519941149e-06, "loss": 1.0112, "step": 8163 }, { "epoch": 1.6162014942155367, "grad_norm": 2.171875, "learning_rate": 4.357905429777422e-06, "loss": 0.9687, "step": 8164 }, { "epoch": 1.6164013892706328, "grad_norm": 2.265625, "learning_rate": 4.356860368135468e-06, "loss": 1.0494, "step": 8165 }, { "epoch": 1.616601284325729, "grad_norm": 2.28125, "learning_rate": 4.3558153350617065e-06, "loss": 0.9395, "step": 8166 }, { "epoch": 1.616801179380825, "grad_norm": 2.0, "learning_rate": 4.354770330602559e-06, "loss": 0.9022, "step": 8167 }, { "epoch": 1.6170010744359211, "grad_norm": 2.28125, "learning_rate": 4.353725354804445e-06, "loss": 0.9848, "step": 8168 }, { "epoch": 1.6172009694910172, "grad_norm": 2.203125, "learning_rate": 4.3526804077137816e-06, "loss": 1.0458, "step": 8169 }, { "epoch": 1.6174008645461133, "grad_norm": 2.140625, "learning_rate": 4.351635489376986e-06, "loss": 0.9763, "step": 8170 }, { "epoch": 1.6176007596012094, "grad_norm": 2.125, "learning_rate": 4.350590599840472e-06, "loss": 0.9329, "step": 8171 }, { "epoch": 1.6178006546563055, "grad_norm": 2.15625, "learning_rate": 4.349545739150654e-06, "loss": 0.8845, "step": 8172 }, { "epoch": 1.6180005497114016, "grad_norm": 2.125, "learning_rate": 4.3485009073539445e-06, "loss": 0.9565, "step": 8173 }, { "epoch": 1.6182004447664977, "grad_norm": 2.125, "learning_rate": 4.3474561044967555e-06, "loss": 1.0195, "step": 8174 }, { "epoch": 1.6184003398215938, "grad_norm": 2.234375, "learning_rate": 4.346411330625496e-06, "loss": 0.9583, "step": 8175 }, { "epoch": 1.6186002348766897, "grad_norm": 2.296875, "learning_rate": 4.345366585786577e-06, "loss": 1.0182, "step": 8176 }, { "epoch": 1.6188001299317858, "grad_norm": 2.1875, "learning_rate": 4.344321870026404e-06, "loss": 1.0077, "step": 8177 }, { "epoch": 1.619000024986882, "grad_norm": 2.125, "learning_rate": 4.343277183391384e-06, "loss": 0.9333, "step": 8178 }, { "epoch": 1.619199920041978, "grad_norm": 2.0625, "learning_rate": 4.342232525927919e-06, "loss": 0.8814, "step": 8179 }, { "epoch": 1.619399815097074, "grad_norm": 2.328125, "learning_rate": 4.341187897682416e-06, "loss": 0.9667, "step": 8180 }, { "epoch": 1.61959971015217, "grad_norm": 2.203125, "learning_rate": 4.3401432987012775e-06, "loss": 1.0133, "step": 8181 }, { "epoch": 1.619799605207266, "grad_norm": 2.296875, "learning_rate": 4.339098729030902e-06, "loss": 0.913, "step": 8182 }, { "epoch": 1.6199995002623622, "grad_norm": 2.078125, "learning_rate": 4.3380541887176904e-06, "loss": 1.0063, "step": 8183 }, { "epoch": 1.6201993953174583, "grad_norm": 2.171875, "learning_rate": 4.337009677808042e-06, "loss": 0.9996, "step": 8184 }, { "epoch": 1.6203992903725544, "grad_norm": 2.25, "learning_rate": 4.335965196348352e-06, "loss": 0.9258, "step": 8185 }, { "epoch": 1.6205991854276505, "grad_norm": 2.03125, "learning_rate": 4.334920744385017e-06, "loss": 0.9185, "step": 8186 }, { "epoch": 1.6207990804827466, "grad_norm": 2.234375, "learning_rate": 4.3338763219644335e-06, "loss": 1.129, "step": 8187 }, { "epoch": 1.6209989755378427, "grad_norm": 2.078125, "learning_rate": 4.332831929132991e-06, "loss": 0.9931, "step": 8188 }, { "epoch": 1.6211988705929388, "grad_norm": 2.015625, "learning_rate": 4.331787565937082e-06, "loss": 0.8996, "step": 8189 }, { "epoch": 1.6213987656480349, "grad_norm": 2.078125, "learning_rate": 4.330743232423101e-06, "loss": 0.956, "step": 8190 }, { "epoch": 1.621598660703131, "grad_norm": 2.125, "learning_rate": 4.32969892863743e-06, "loss": 0.9862, "step": 8191 }, { "epoch": 1.621798555758227, "grad_norm": 2.21875, "learning_rate": 4.328654654626463e-06, "loss": 1.0203, "step": 8192 }, { "epoch": 1.6219984508133232, "grad_norm": 2.078125, "learning_rate": 4.3276104104365855e-06, "loss": 0.9044, "step": 8193 }, { "epoch": 1.622198345868419, "grad_norm": 2.03125, "learning_rate": 4.32656619611418e-06, "loss": 0.9614, "step": 8194 }, { "epoch": 1.6223982409235151, "grad_norm": 2.15625, "learning_rate": 4.3255220117056325e-06, "loss": 0.9691, "step": 8195 }, { "epoch": 1.6225981359786112, "grad_norm": 2.125, "learning_rate": 4.324477857257326e-06, "loss": 0.9605, "step": 8196 }, { "epoch": 1.6227980310337073, "grad_norm": 2.21875, "learning_rate": 4.323433732815641e-06, "loss": 1.0356, "step": 8197 }, { "epoch": 1.6229979260888032, "grad_norm": 2.109375, "learning_rate": 4.322389638426957e-06, "loss": 0.8856, "step": 8198 }, { "epoch": 1.6231978211438993, "grad_norm": 2.15625, "learning_rate": 4.321345574137652e-06, "loss": 0.9635, "step": 8199 }, { "epoch": 1.6233977161989954, "grad_norm": 2.15625, "learning_rate": 4.320301539994105e-06, "loss": 0.9722, "step": 8200 }, { "epoch": 1.6235976112540915, "grad_norm": 2.15625, "learning_rate": 4.319257536042692e-06, "loss": 0.9877, "step": 8201 }, { "epoch": 1.6237975063091876, "grad_norm": 2.21875, "learning_rate": 4.318213562329784e-06, "loss": 0.8835, "step": 8202 }, { "epoch": 1.6239974013642837, "grad_norm": 2.296875, "learning_rate": 4.317169618901758e-06, "loss": 0.9898, "step": 8203 }, { "epoch": 1.6241972964193798, "grad_norm": 2.125, "learning_rate": 4.316125705804986e-06, "loss": 0.8772, "step": 8204 }, { "epoch": 1.624397191474476, "grad_norm": 2.21875, "learning_rate": 4.315081823085835e-06, "loss": 1.0199, "step": 8205 }, { "epoch": 1.624597086529572, "grad_norm": 2.1875, "learning_rate": 4.3140379707906765e-06, "loss": 1.0246, "step": 8206 }, { "epoch": 1.6247969815846681, "grad_norm": 2.1875, "learning_rate": 4.312994148965879e-06, "loss": 0.912, "step": 8207 }, { "epoch": 1.6249968766397642, "grad_norm": 2.203125, "learning_rate": 4.311950357657807e-06, "loss": 0.9617, "step": 8208 }, { "epoch": 1.6251967716948603, "grad_norm": 2.1875, "learning_rate": 4.310906596912828e-06, "loss": 0.9523, "step": 8209 }, { "epoch": 1.6253966667499564, "grad_norm": 2.21875, "learning_rate": 4.309862866777303e-06, "loss": 0.9298, "step": 8210 }, { "epoch": 1.6255965618050523, "grad_norm": 2.109375, "learning_rate": 4.3088191672975965e-06, "loss": 0.9174, "step": 8211 }, { "epoch": 1.6257964568601484, "grad_norm": 2.046875, "learning_rate": 4.30777549852007e-06, "loss": 1.0472, "step": 8212 }, { "epoch": 1.6259963519152445, "grad_norm": 2.234375, "learning_rate": 4.30673186049108e-06, "loss": 1.0767, "step": 8213 }, { "epoch": 1.6261962469703406, "grad_norm": 2.09375, "learning_rate": 4.305688253256986e-06, "loss": 0.9619, "step": 8214 }, { "epoch": 1.6263961420254367, "grad_norm": 2.1875, "learning_rate": 4.304644676864149e-06, "loss": 1.0446, "step": 8215 }, { "epoch": 1.6265960370805326, "grad_norm": 2.15625, "learning_rate": 4.303601131358918e-06, "loss": 1.0276, "step": 8216 }, { "epoch": 1.6267959321356287, "grad_norm": 2.21875, "learning_rate": 4.302557616787652e-06, "loss": 0.953, "step": 8217 }, { "epoch": 1.6269958271907248, "grad_norm": 2.1875, "learning_rate": 4.3015141331967045e-06, "loss": 0.9412, "step": 8218 }, { "epoch": 1.6271957222458209, "grad_norm": 2.046875, "learning_rate": 4.300470680632421e-06, "loss": 0.9793, "step": 8219 }, { "epoch": 1.627395617300917, "grad_norm": 2.0, "learning_rate": 4.299427259141155e-06, "loss": 0.9653, "step": 8220 }, { "epoch": 1.627595512356013, "grad_norm": 2.1875, "learning_rate": 4.298383868769257e-06, "loss": 0.9435, "step": 8221 }, { "epoch": 1.6277954074111092, "grad_norm": 2.140625, "learning_rate": 4.297340509563072e-06, "loss": 0.9783, "step": 8222 }, { "epoch": 1.6279953024662053, "grad_norm": 2.1875, "learning_rate": 4.296297181568946e-06, "loss": 1.0285, "step": 8223 }, { "epoch": 1.6281951975213014, "grad_norm": 2.1875, "learning_rate": 4.295253884833225e-06, "loss": 0.9742, "step": 8224 }, { "epoch": 1.6283950925763975, "grad_norm": 2.203125, "learning_rate": 4.29421061940225e-06, "loss": 0.9601, "step": 8225 }, { "epoch": 1.6285949876314936, "grad_norm": 2.140625, "learning_rate": 4.293167385322364e-06, "loss": 0.9869, "step": 8226 }, { "epoch": 1.6287948826865897, "grad_norm": 2.125, "learning_rate": 4.292124182639909e-06, "loss": 0.9445, "step": 8227 }, { "epoch": 1.6289947777416858, "grad_norm": 2.21875, "learning_rate": 4.29108101140122e-06, "loss": 0.9883, "step": 8228 }, { "epoch": 1.6291946727967817, "grad_norm": 2.03125, "learning_rate": 4.290037871652639e-06, "loss": 0.9173, "step": 8229 }, { "epoch": 1.6293945678518778, "grad_norm": 2.203125, "learning_rate": 4.288994763440498e-06, "loss": 1.0868, "step": 8230 }, { "epoch": 1.6295944629069739, "grad_norm": 2.140625, "learning_rate": 4.2879516868111346e-06, "loss": 1.0022, "step": 8231 }, { "epoch": 1.62979435796207, "grad_norm": 2.0625, "learning_rate": 4.2869086418108815e-06, "loss": 0.9782, "step": 8232 }, { "epoch": 1.6299942530171658, "grad_norm": 2.15625, "learning_rate": 4.285865628486069e-06, "loss": 0.9706, "step": 8233 }, { "epoch": 1.630194148072262, "grad_norm": 2.140625, "learning_rate": 4.2848226468830295e-06, "loss": 0.9746, "step": 8234 }, { "epoch": 1.630394043127358, "grad_norm": 2.0625, "learning_rate": 4.283779697048093e-06, "loss": 0.9263, "step": 8235 }, { "epoch": 1.6305939381824541, "grad_norm": 2.328125, "learning_rate": 4.282736779027584e-06, "loss": 1.057, "step": 8236 }, { "epoch": 1.6307938332375502, "grad_norm": 2.078125, "learning_rate": 4.281693892867832e-06, "loss": 0.8657, "step": 8237 }, { "epoch": 1.6309937282926463, "grad_norm": 2.09375, "learning_rate": 4.28065103861516e-06, "loss": 0.9345, "step": 8238 }, { "epoch": 1.6311936233477424, "grad_norm": 2.234375, "learning_rate": 4.2796082163158914e-06, "loss": 0.9327, "step": 8239 }, { "epoch": 1.6313935184028385, "grad_norm": 2.1875, "learning_rate": 4.278565426016351e-06, "loss": 0.8978, "step": 8240 }, { "epoch": 1.6315934134579346, "grad_norm": 2.28125, "learning_rate": 4.277522667762855e-06, "loss": 0.9943, "step": 8241 }, { "epoch": 1.6317933085130307, "grad_norm": 2.09375, "learning_rate": 4.276479941601726e-06, "loss": 0.8993, "step": 8242 }, { "epoch": 1.6319932035681268, "grad_norm": 2.109375, "learning_rate": 4.275437247579281e-06, "loss": 1.0119, "step": 8243 }, { "epoch": 1.632193098623223, "grad_norm": 2.15625, "learning_rate": 4.274394585741835e-06, "loss": 1.0501, "step": 8244 }, { "epoch": 1.632392993678319, "grad_norm": 2.25, "learning_rate": 4.273351956135704e-06, "loss": 1.005, "step": 8245 }, { "epoch": 1.632592888733415, "grad_norm": 2.234375, "learning_rate": 4.272309358807203e-06, "loss": 1.0611, "step": 8246 }, { "epoch": 1.632792783788511, "grad_norm": 2.328125, "learning_rate": 4.271266793802641e-06, "loss": 1.0112, "step": 8247 }, { "epoch": 1.632992678843607, "grad_norm": 2.140625, "learning_rate": 4.270224261168332e-06, "loss": 1.0161, "step": 8248 }, { "epoch": 1.6331925738987032, "grad_norm": 2.1875, "learning_rate": 4.269181760950584e-06, "loss": 0.9708, "step": 8249 }, { "epoch": 1.6333924689537993, "grad_norm": 2.171875, "learning_rate": 4.268139293195702e-06, "loss": 0.9985, "step": 8250 }, { "epoch": 1.6335923640088952, "grad_norm": 2.171875, "learning_rate": 4.267096857949994e-06, "loss": 1.0267, "step": 8251 }, { "epoch": 1.6337922590639913, "grad_norm": 2.140625, "learning_rate": 4.266054455259767e-06, "loss": 0.9011, "step": 8252 }, { "epoch": 1.6339921541190874, "grad_norm": 2.171875, "learning_rate": 4.265012085171322e-06, "loss": 0.9273, "step": 8253 }, { "epoch": 1.6341920491741835, "grad_norm": 2.046875, "learning_rate": 4.263969747730961e-06, "loss": 0.8469, "step": 8254 }, { "epoch": 1.6343919442292796, "grad_norm": 2.234375, "learning_rate": 4.262927442984986e-06, "loss": 0.91, "step": 8255 }, { "epoch": 1.6345918392843757, "grad_norm": 2.15625, "learning_rate": 4.261885170979695e-06, "loss": 1.0107, "step": 8256 }, { "epoch": 1.6347917343394718, "grad_norm": 2.15625, "learning_rate": 4.260842931761385e-06, "loss": 0.9458, "step": 8257 }, { "epoch": 1.6349916293945679, "grad_norm": 2.109375, "learning_rate": 4.259800725376355e-06, "loss": 0.9503, "step": 8258 }, { "epoch": 1.635191524449664, "grad_norm": 2.09375, "learning_rate": 4.258758551870896e-06, "loss": 0.9429, "step": 8259 }, { "epoch": 1.63539141950476, "grad_norm": 2.21875, "learning_rate": 4.257716411291304e-06, "loss": 0.9112, "step": 8260 }, { "epoch": 1.6355913145598562, "grad_norm": 2.328125, "learning_rate": 4.256674303683869e-06, "loss": 1.0088, "step": 8261 }, { "epoch": 1.6357912096149523, "grad_norm": 2.109375, "learning_rate": 4.255632229094882e-06, "loss": 0.9294, "step": 8262 }, { "epoch": 1.6359911046700484, "grad_norm": 2.203125, "learning_rate": 4.254590187570633e-06, "loss": 0.9696, "step": 8263 }, { "epoch": 1.6361909997251443, "grad_norm": 2.296875, "learning_rate": 4.253548179157407e-06, "loss": 0.995, "step": 8264 }, { "epoch": 1.6363908947802404, "grad_norm": 2.171875, "learning_rate": 4.252506203901491e-06, "loss": 0.9733, "step": 8265 }, { "epoch": 1.6365907898353365, "grad_norm": 2.21875, "learning_rate": 4.251464261849171e-06, "loss": 0.9563, "step": 8266 }, { "epoch": 1.6367906848904326, "grad_norm": 2.234375, "learning_rate": 4.2504223530467275e-06, "loss": 1.0219, "step": 8267 }, { "epoch": 1.6369905799455284, "grad_norm": 2.234375, "learning_rate": 4.249380477540444e-06, "loss": 0.9858, "step": 8268 }, { "epoch": 1.6371904750006245, "grad_norm": 2.109375, "learning_rate": 4.248338635376599e-06, "loss": 0.9386, "step": 8269 }, { "epoch": 1.6373903700557206, "grad_norm": 2.25, "learning_rate": 4.247296826601471e-06, "loss": 1.0425, "step": 8270 }, { "epoch": 1.6375902651108167, "grad_norm": 2.34375, "learning_rate": 4.246255051261338e-06, "loss": 0.9948, "step": 8271 }, { "epoch": 1.6377901601659128, "grad_norm": 2.234375, "learning_rate": 4.245213309402475e-06, "loss": 0.9659, "step": 8272 }, { "epoch": 1.637990055221009, "grad_norm": 2.46875, "learning_rate": 4.244171601071156e-06, "loss": 1.1023, "step": 8273 }, { "epoch": 1.638189950276105, "grad_norm": 2.15625, "learning_rate": 4.243129926313654e-06, "loss": 0.9661, "step": 8274 }, { "epoch": 1.6383898453312011, "grad_norm": 2.125, "learning_rate": 4.242088285176239e-06, "loss": 0.9441, "step": 8275 }, { "epoch": 1.6385897403862972, "grad_norm": 2.171875, "learning_rate": 4.241046677705183e-06, "loss": 1.0038, "step": 8276 }, { "epoch": 1.6387896354413933, "grad_norm": 2.21875, "learning_rate": 4.240005103946751e-06, "loss": 0.9978, "step": 8277 }, { "epoch": 1.6389895304964894, "grad_norm": 2.109375, "learning_rate": 4.238963563947212e-06, "loss": 0.9425, "step": 8278 }, { "epoch": 1.6391894255515855, "grad_norm": 2.25, "learning_rate": 4.237922057752831e-06, "loss": 1.0408, "step": 8279 }, { "epoch": 1.6393893206066816, "grad_norm": 2.125, "learning_rate": 4.236880585409872e-06, "loss": 0.9364, "step": 8280 }, { "epoch": 1.6395892156617777, "grad_norm": 2.296875, "learning_rate": 4.235839146964592e-06, "loss": 0.9781, "step": 8281 }, { "epoch": 1.6397891107168736, "grad_norm": 2.25, "learning_rate": 4.234797742463258e-06, "loss": 1.0187, "step": 8282 }, { "epoch": 1.6399890057719697, "grad_norm": 2.15625, "learning_rate": 4.2337563719521254e-06, "loss": 0.9255, "step": 8283 }, { "epoch": 1.6401889008270658, "grad_norm": 2.1875, "learning_rate": 4.2327150354774536e-06, "loss": 0.9822, "step": 8284 }, { "epoch": 1.640388795882162, "grad_norm": 2.078125, "learning_rate": 4.231673733085497e-06, "loss": 0.9543, "step": 8285 }, { "epoch": 1.6405886909372578, "grad_norm": 2.09375, "learning_rate": 4.230632464822513e-06, "loss": 1.0042, "step": 8286 }, { "epoch": 1.6407885859923539, "grad_norm": 2.09375, "learning_rate": 4.229591230734751e-06, "loss": 0.8565, "step": 8287 }, { "epoch": 1.64098848104745, "grad_norm": 2.171875, "learning_rate": 4.228550030868465e-06, "loss": 0.9459, "step": 8288 }, { "epoch": 1.641188376102546, "grad_norm": 2.1875, "learning_rate": 4.227508865269904e-06, "loss": 1.0064, "step": 8289 }, { "epoch": 1.6413882711576422, "grad_norm": 2.25, "learning_rate": 4.226467733985316e-06, "loss": 0.999, "step": 8290 }, { "epoch": 1.6415881662127383, "grad_norm": 2.0625, "learning_rate": 4.22542663706095e-06, "loss": 0.8225, "step": 8291 }, { "epoch": 1.6417880612678344, "grad_norm": 2.109375, "learning_rate": 4.22438557454305e-06, "loss": 0.9228, "step": 8292 }, { "epoch": 1.6419879563229305, "grad_norm": 2.078125, "learning_rate": 4.223344546477858e-06, "loss": 0.9381, "step": 8293 }, { "epoch": 1.6421878513780266, "grad_norm": 2.109375, "learning_rate": 4.2223035529116205e-06, "loss": 1.005, "step": 8294 }, { "epoch": 1.6423877464331227, "grad_norm": 2.078125, "learning_rate": 4.2212625938905745e-06, "loss": 0.8917, "step": 8295 }, { "epoch": 1.6425876414882188, "grad_norm": 2.21875, "learning_rate": 4.220221669460962e-06, "loss": 1.0419, "step": 8296 }, { "epoch": 1.6427875365433149, "grad_norm": 2.078125, "learning_rate": 4.21918077966902e-06, "loss": 0.9797, "step": 8297 }, { "epoch": 1.642987431598411, "grad_norm": 2.109375, "learning_rate": 4.218139924560983e-06, "loss": 0.9871, "step": 8298 }, { "epoch": 1.6431873266535069, "grad_norm": 2.1875, "learning_rate": 4.217099104183089e-06, "loss": 0.9627, "step": 8299 }, { "epoch": 1.643387221708603, "grad_norm": 2.15625, "learning_rate": 4.216058318581567e-06, "loss": 0.9994, "step": 8300 }, { "epoch": 1.643587116763699, "grad_norm": 2.203125, "learning_rate": 4.215017567802651e-06, "loss": 0.9186, "step": 8301 }, { "epoch": 1.6437870118187952, "grad_norm": 2.25, "learning_rate": 4.213976851892573e-06, "loss": 0.9584, "step": 8302 }, { "epoch": 1.643986906873891, "grad_norm": 2.234375, "learning_rate": 4.212936170897557e-06, "loss": 1.0087, "step": 8303 }, { "epoch": 1.6441868019289871, "grad_norm": 2.21875, "learning_rate": 4.211895524863832e-06, "loss": 1.0012, "step": 8304 }, { "epoch": 1.6443866969840832, "grad_norm": 2.171875, "learning_rate": 4.210854913837625e-06, "loss": 0.9215, "step": 8305 }, { "epoch": 1.6445865920391793, "grad_norm": 2.140625, "learning_rate": 4.209814337865158e-06, "loss": 0.9373, "step": 8306 }, { "epoch": 1.6447864870942754, "grad_norm": 2.125, "learning_rate": 4.2087737969926545e-06, "loss": 0.8611, "step": 8307 }, { "epoch": 1.6449863821493715, "grad_norm": 2.03125, "learning_rate": 4.2077332912663335e-06, "loss": 0.8602, "step": 8308 }, { "epoch": 1.6451862772044676, "grad_norm": 2.1875, "learning_rate": 4.206692820732415e-06, "loss": 1.0522, "step": 8309 }, { "epoch": 1.6453861722595637, "grad_norm": 2.1875, "learning_rate": 4.205652385437118e-06, "loss": 0.9538, "step": 8310 }, { "epoch": 1.6455860673146598, "grad_norm": 2.234375, "learning_rate": 4.204611985426657e-06, "loss": 1.025, "step": 8311 }, { "epoch": 1.645785962369756, "grad_norm": 2.265625, "learning_rate": 4.203571620747246e-06, "loss": 1.0084, "step": 8312 }, { "epoch": 1.645985857424852, "grad_norm": 2.09375, "learning_rate": 4.202531291445098e-06, "loss": 0.9268, "step": 8313 }, { "epoch": 1.6461857524799481, "grad_norm": 2.15625, "learning_rate": 4.201490997566426e-06, "loss": 0.9529, "step": 8314 }, { "epoch": 1.6463856475350442, "grad_norm": 2.0625, "learning_rate": 4.200450739157437e-06, "loss": 1.0054, "step": 8315 }, { "epoch": 1.6465855425901403, "grad_norm": 2.0625, "learning_rate": 4.199410516264342e-06, "loss": 0.9759, "step": 8316 }, { "epoch": 1.6467854376452362, "grad_norm": 2.359375, "learning_rate": 4.198370328933346e-06, "loss": 1.0142, "step": 8317 }, { "epoch": 1.6469853327003323, "grad_norm": 2.109375, "learning_rate": 4.197330177210654e-06, "loss": 0.9481, "step": 8318 }, { "epoch": 1.6471852277554284, "grad_norm": 2.140625, "learning_rate": 4.19629006114247e-06, "loss": 0.9881, "step": 8319 }, { "epoch": 1.6473851228105245, "grad_norm": 2.234375, "learning_rate": 4.195249980774997e-06, "loss": 0.906, "step": 8320 }, { "epoch": 1.6475850178656204, "grad_norm": 2.21875, "learning_rate": 4.194209936154431e-06, "loss": 1.0061, "step": 8321 }, { "epoch": 1.6477849129207165, "grad_norm": 2.25, "learning_rate": 4.1931699273269765e-06, "loss": 0.9856, "step": 8322 }, { "epoch": 1.6479848079758126, "grad_norm": 2.296875, "learning_rate": 4.192129954338826e-06, "loss": 1.0957, "step": 8323 }, { "epoch": 1.6481847030309087, "grad_norm": 2.0625, "learning_rate": 4.191090017236177e-06, "loss": 0.9819, "step": 8324 }, { "epoch": 1.6483845980860048, "grad_norm": 2.375, "learning_rate": 4.190050116065224e-06, "loss": 1.0189, "step": 8325 }, { "epoch": 1.648584493141101, "grad_norm": 2.140625, "learning_rate": 4.1890102508721565e-06, "loss": 0.9359, "step": 8326 }, { "epoch": 1.648784388196197, "grad_norm": 2.234375, "learning_rate": 4.187970421703168e-06, "loss": 0.9377, "step": 8327 }, { "epoch": 1.648984283251293, "grad_norm": 2.171875, "learning_rate": 4.186930628604447e-06, "loss": 1.0538, "step": 8328 }, { "epoch": 1.6491841783063892, "grad_norm": 2.125, "learning_rate": 4.18589087162218e-06, "loss": 0.9485, "step": 8329 }, { "epoch": 1.6493840733614853, "grad_norm": 2.046875, "learning_rate": 4.184851150802554e-06, "loss": 0.9153, "step": 8330 }, { "epoch": 1.6495839684165814, "grad_norm": 2.28125, "learning_rate": 4.1838114661917525e-06, "loss": 1.0567, "step": 8331 }, { "epoch": 1.6497838634716775, "grad_norm": 2.046875, "learning_rate": 4.1827718178359586e-06, "loss": 0.9752, "step": 8332 }, { "epoch": 1.6499837585267736, "grad_norm": 2.078125, "learning_rate": 4.181732205781354e-06, "loss": 0.9755, "step": 8333 }, { "epoch": 1.6501836535818695, "grad_norm": 2.1875, "learning_rate": 4.180692630074116e-06, "loss": 1.0196, "step": 8334 }, { "epoch": 1.6503835486369656, "grad_norm": 2.125, "learning_rate": 4.179653090760424e-06, "loss": 0.8684, "step": 8335 }, { "epoch": 1.6505834436920617, "grad_norm": 2.171875, "learning_rate": 4.178613587886455e-06, "loss": 1.0431, "step": 8336 }, { "epoch": 1.6507833387471578, "grad_norm": 2.203125, "learning_rate": 4.177574121498382e-06, "loss": 0.9899, "step": 8337 }, { "epoch": 1.6509832338022539, "grad_norm": 2.15625, "learning_rate": 4.17653469164238e-06, "loss": 0.9906, "step": 8338 }, { "epoch": 1.6511831288573497, "grad_norm": 2.15625, "learning_rate": 4.175495298364618e-06, "loss": 0.9755, "step": 8339 }, { "epoch": 1.6513830239124458, "grad_norm": 2.125, "learning_rate": 4.174455941711266e-06, "loss": 0.99, "step": 8340 }, { "epoch": 1.651582918967542, "grad_norm": 2.1875, "learning_rate": 4.173416621728495e-06, "loss": 0.9646, "step": 8341 }, { "epoch": 1.651782814022638, "grad_norm": 2.359375, "learning_rate": 4.17237733846247e-06, "loss": 0.9579, "step": 8342 }, { "epoch": 1.6519827090777341, "grad_norm": 2.171875, "learning_rate": 4.1713380919593525e-06, "loss": 0.926, "step": 8343 }, { "epoch": 1.6521826041328302, "grad_norm": 2.1875, "learning_rate": 4.170298882265309e-06, "loss": 1.0695, "step": 8344 }, { "epoch": 1.6523824991879263, "grad_norm": 2.21875, "learning_rate": 4.169259709426502e-06, "loss": 0.988, "step": 8345 }, { "epoch": 1.6525823942430224, "grad_norm": 2.203125, "learning_rate": 4.168220573489088e-06, "loss": 0.9672, "step": 8346 }, { "epoch": 1.6527822892981185, "grad_norm": 2.25, "learning_rate": 4.167181474499228e-06, "loss": 0.9888, "step": 8347 }, { "epoch": 1.6529821843532146, "grad_norm": 2.125, "learning_rate": 4.166142412503078e-06, "loss": 1.0004, "step": 8348 }, { "epoch": 1.6531820794083107, "grad_norm": 2.09375, "learning_rate": 4.1651033875467935e-06, "loss": 0.8566, "step": 8349 }, { "epoch": 1.6533819744634068, "grad_norm": 2.28125, "learning_rate": 4.1640643996765275e-06, "loss": 1.0011, "step": 8350 }, { "epoch": 1.653581869518503, "grad_norm": 2.078125, "learning_rate": 4.163025448938431e-06, "loss": 0.9704, "step": 8351 }, { "epoch": 1.6537817645735988, "grad_norm": 2.109375, "learning_rate": 4.1619865353786535e-06, "loss": 0.9291, "step": 8352 }, { "epoch": 1.653981659628695, "grad_norm": 2.171875, "learning_rate": 4.160947659043347e-06, "loss": 0.9484, "step": 8353 }, { "epoch": 1.654181554683791, "grad_norm": 2.171875, "learning_rate": 4.159908819978654e-06, "loss": 0.9807, "step": 8354 }, { "epoch": 1.6543814497388871, "grad_norm": 2.046875, "learning_rate": 4.158870018230722e-06, "loss": 0.8579, "step": 8355 }, { "epoch": 1.654581344793983, "grad_norm": 2.078125, "learning_rate": 4.157831253845695e-06, "loss": 0.9327, "step": 8356 }, { "epoch": 1.654781239849079, "grad_norm": 2.25, "learning_rate": 4.156792526869712e-06, "loss": 0.9865, "step": 8357 }, { "epoch": 1.6549811349041752, "grad_norm": 2.0625, "learning_rate": 4.155753837348917e-06, "loss": 0.9142, "step": 8358 }, { "epoch": 1.6551810299592713, "grad_norm": 2.28125, "learning_rate": 4.154715185329445e-06, "loss": 0.9701, "step": 8359 }, { "epoch": 1.6553809250143674, "grad_norm": 2.109375, "learning_rate": 4.153676570857434e-06, "loss": 0.962, "step": 8360 }, { "epoch": 1.6555808200694635, "grad_norm": 2.71875, "learning_rate": 4.152637993979019e-06, "loss": 1.0536, "step": 8361 }, { "epoch": 1.6557807151245596, "grad_norm": 2.203125, "learning_rate": 4.151599454740335e-06, "loss": 1.0314, "step": 8362 }, { "epoch": 1.6559806101796557, "grad_norm": 2.078125, "learning_rate": 4.150560953187511e-06, "loss": 0.8813, "step": 8363 }, { "epoch": 1.6561805052347518, "grad_norm": 2.25, "learning_rate": 4.149522489366681e-06, "loss": 1.0078, "step": 8364 }, { "epoch": 1.656380400289848, "grad_norm": 2.234375, "learning_rate": 4.148484063323969e-06, "loss": 1.0463, "step": 8365 }, { "epoch": 1.656580295344944, "grad_norm": 1.984375, "learning_rate": 4.147445675105506e-06, "loss": 0.8978, "step": 8366 }, { "epoch": 1.65678019040004, "grad_norm": 2.09375, "learning_rate": 4.146407324757414e-06, "loss": 0.8991, "step": 8367 }, { "epoch": 1.6569800854551362, "grad_norm": 2.09375, "learning_rate": 4.145369012325816e-06, "loss": 0.9344, "step": 8368 }, { "epoch": 1.657179980510232, "grad_norm": 2.0625, "learning_rate": 4.1443307378568385e-06, "loss": 0.9609, "step": 8369 }, { "epoch": 1.6573798755653282, "grad_norm": 2.078125, "learning_rate": 4.143292501396596e-06, "loss": 1.0138, "step": 8370 }, { "epoch": 1.6575797706204243, "grad_norm": 2.3125, "learning_rate": 4.142254302991209e-06, "loss": 1.0597, "step": 8371 }, { "epoch": 1.6577796656755204, "grad_norm": 2.3125, "learning_rate": 4.141216142686795e-06, "loss": 0.9977, "step": 8372 }, { "epoch": 1.6579795607306165, "grad_norm": 2.140625, "learning_rate": 4.14017802052947e-06, "loss": 0.9488, "step": 8373 }, { "epoch": 1.6581794557857124, "grad_norm": 2.09375, "learning_rate": 4.139139936565343e-06, "loss": 0.976, "step": 8374 }, { "epoch": 1.6583793508408085, "grad_norm": 2.109375, "learning_rate": 4.138101890840528e-06, "loss": 0.915, "step": 8375 }, { "epoch": 1.6585792458959046, "grad_norm": 2.390625, "learning_rate": 4.137063883401137e-06, "loss": 0.9585, "step": 8376 }, { "epoch": 1.6587791409510007, "grad_norm": 2.15625, "learning_rate": 4.136025914293274e-06, "loss": 0.9513, "step": 8377 }, { "epoch": 1.6589790360060968, "grad_norm": 2.203125, "learning_rate": 4.1349879835630486e-06, "loss": 0.955, "step": 8378 }, { "epoch": 1.6591789310611929, "grad_norm": 2.265625, "learning_rate": 4.133950091256564e-06, "loss": 1.0102, "step": 8379 }, { "epoch": 1.659378826116289, "grad_norm": 2.171875, "learning_rate": 4.1329122374199234e-06, "loss": 0.9872, "step": 8380 }, { "epoch": 1.659578721171385, "grad_norm": 2.171875, "learning_rate": 4.13187442209923e-06, "loss": 0.9144, "step": 8381 }, { "epoch": 1.6597786162264812, "grad_norm": 2.234375, "learning_rate": 4.130836645340581e-06, "loss": 0.9445, "step": 8382 }, { "epoch": 1.6599785112815773, "grad_norm": 2.15625, "learning_rate": 4.129798907190076e-06, "loss": 0.9533, "step": 8383 }, { "epoch": 1.6601784063366734, "grad_norm": 2.203125, "learning_rate": 4.12876120769381e-06, "loss": 0.8938, "step": 8384 }, { "epoch": 1.6603783013917695, "grad_norm": 2.171875, "learning_rate": 4.127723546897879e-06, "loss": 0.9748, "step": 8385 }, { "epoch": 1.6605781964468656, "grad_norm": 2.125, "learning_rate": 4.126685924848373e-06, "loss": 0.9792, "step": 8386 }, { "epoch": 1.6607780915019614, "grad_norm": 2.109375, "learning_rate": 4.125648341591387e-06, "loss": 0.9547, "step": 8387 }, { "epoch": 1.6609779865570575, "grad_norm": 2.15625, "learning_rate": 4.124610797173008e-06, "loss": 0.9843, "step": 8388 }, { "epoch": 1.6611778816121536, "grad_norm": 2.25, "learning_rate": 4.123573291639323e-06, "loss": 0.9881, "step": 8389 }, { "epoch": 1.6613777766672497, "grad_norm": 2.234375, "learning_rate": 4.12253582503642e-06, "loss": 1.0074, "step": 8390 }, { "epoch": 1.6615776717223456, "grad_norm": 2.203125, "learning_rate": 4.12149839741038e-06, "loss": 0.9708, "step": 8391 }, { "epoch": 1.6617775667774417, "grad_norm": 2.203125, "learning_rate": 4.120461008807289e-06, "loss": 0.9391, "step": 8392 }, { "epoch": 1.6619774618325378, "grad_norm": 2.125, "learning_rate": 4.119423659273226e-06, "loss": 0.975, "step": 8393 }, { "epoch": 1.662177356887634, "grad_norm": 2.078125, "learning_rate": 4.1183863488542686e-06, "loss": 0.8772, "step": 8394 }, { "epoch": 1.66237725194273, "grad_norm": 2.1875, "learning_rate": 4.117349077596497e-06, "loss": 1.0748, "step": 8395 }, { "epoch": 1.662577146997826, "grad_norm": 2.15625, "learning_rate": 4.116311845545983e-06, "loss": 1.0191, "step": 8396 }, { "epoch": 1.6627770420529222, "grad_norm": 2.171875, "learning_rate": 4.115274652748806e-06, "loss": 0.9597, "step": 8397 }, { "epoch": 1.6629769371080183, "grad_norm": 2.21875, "learning_rate": 4.114237499251031e-06, "loss": 0.8679, "step": 8398 }, { "epoch": 1.6631768321631144, "grad_norm": 2.09375, "learning_rate": 4.113200385098733e-06, "loss": 0.9605, "step": 8399 }, { "epoch": 1.6633767272182105, "grad_norm": 2.078125, "learning_rate": 4.112163310337981e-06, "loss": 0.8945, "step": 8400 }, { "epoch": 1.6635766222733066, "grad_norm": 2.125, "learning_rate": 4.1111262750148375e-06, "loss": 0.8471, "step": 8401 }, { "epoch": 1.6637765173284027, "grad_norm": 2.171875, "learning_rate": 4.11008927917537e-06, "loss": 0.9585, "step": 8402 }, { "epoch": 1.6639764123834988, "grad_norm": 2.1875, "learning_rate": 4.109052322865643e-06, "loss": 1.0087, "step": 8403 }, { "epoch": 1.664176307438595, "grad_norm": 2.09375, "learning_rate": 4.1080154061317175e-06, "loss": 0.8538, "step": 8404 }, { "epoch": 1.6643762024936908, "grad_norm": 2.140625, "learning_rate": 4.1069785290196505e-06, "loss": 0.9302, "step": 8405 }, { "epoch": 1.6645760975487869, "grad_norm": 2.171875, "learning_rate": 4.105941691575502e-06, "loss": 1.0174, "step": 8406 }, { "epoch": 1.664775992603883, "grad_norm": 2.09375, "learning_rate": 4.104904893845328e-06, "loss": 1.0642, "step": 8407 }, { "epoch": 1.664975887658979, "grad_norm": 2.03125, "learning_rate": 4.103868135875184e-06, "loss": 0.8731, "step": 8408 }, { "epoch": 1.665175782714075, "grad_norm": 2.234375, "learning_rate": 4.102831417711121e-06, "loss": 1.0226, "step": 8409 }, { "epoch": 1.665375677769171, "grad_norm": 2.375, "learning_rate": 4.1017947393991905e-06, "loss": 1.0029, "step": 8410 }, { "epoch": 1.6655755728242672, "grad_norm": 2.203125, "learning_rate": 4.100758100985441e-06, "loss": 1.0093, "step": 8411 }, { "epoch": 1.6657754678793633, "grad_norm": 2.171875, "learning_rate": 4.099721502515922e-06, "loss": 1.0087, "step": 8412 }, { "epoch": 1.6659753629344594, "grad_norm": 2.109375, "learning_rate": 4.098684944036677e-06, "loss": 1.006, "step": 8413 }, { "epoch": 1.6661752579895555, "grad_norm": 2.21875, "learning_rate": 4.09764842559375e-06, "loss": 1.0076, "step": 8414 }, { "epoch": 1.6663751530446516, "grad_norm": 2.09375, "learning_rate": 4.096611947233185e-06, "loss": 0.9425, "step": 8415 }, { "epoch": 1.6665750480997477, "grad_norm": 2.046875, "learning_rate": 4.09557550900102e-06, "loss": 0.9662, "step": 8416 }, { "epoch": 1.6667749431548438, "grad_norm": 2.265625, "learning_rate": 4.094539110943295e-06, "loss": 1.0501, "step": 8417 }, { "epoch": 1.6669748382099399, "grad_norm": 2.203125, "learning_rate": 4.093502753106044e-06, "loss": 0.8633, "step": 8418 }, { "epoch": 1.667174733265036, "grad_norm": 2.203125, "learning_rate": 4.092466435535306e-06, "loss": 0.939, "step": 8419 }, { "epoch": 1.667374628320132, "grad_norm": 2.109375, "learning_rate": 4.091430158277112e-06, "loss": 1.0204, "step": 8420 }, { "epoch": 1.6675745233752282, "grad_norm": 2.3125, "learning_rate": 4.090393921377491e-06, "loss": 0.9776, "step": 8421 }, { "epoch": 1.667774418430324, "grad_norm": 2.171875, "learning_rate": 4.089357724882477e-06, "loss": 1.0552, "step": 8422 }, { "epoch": 1.6679743134854201, "grad_norm": 2.21875, "learning_rate": 4.088321568838095e-06, "loss": 0.9877, "step": 8423 }, { "epoch": 1.6681742085405162, "grad_norm": 2.15625, "learning_rate": 4.087285453290372e-06, "loss": 0.9206, "step": 8424 }, { "epoch": 1.6683741035956123, "grad_norm": 2.328125, "learning_rate": 4.08624937828533e-06, "loss": 0.9945, "step": 8425 }, { "epoch": 1.6685739986507082, "grad_norm": 2.125, "learning_rate": 4.085213343868995e-06, "loss": 0.9323, "step": 8426 }, { "epoch": 1.6687738937058043, "grad_norm": 2.109375, "learning_rate": 4.084177350087384e-06, "loss": 0.8495, "step": 8427 }, { "epoch": 1.6689737887609004, "grad_norm": 2.125, "learning_rate": 4.0831413969865195e-06, "loss": 0.9174, "step": 8428 }, { "epoch": 1.6691736838159965, "grad_norm": 2.046875, "learning_rate": 4.082105484612414e-06, "loss": 0.9647, "step": 8429 }, { "epoch": 1.6693735788710926, "grad_norm": 2.109375, "learning_rate": 4.0810696130110855e-06, "loss": 0.9339, "step": 8430 }, { "epoch": 1.6695734739261887, "grad_norm": 2.15625, "learning_rate": 4.080033782228547e-06, "loss": 0.9517, "step": 8431 }, { "epoch": 1.6697733689812848, "grad_norm": 2.09375, "learning_rate": 4.078997992310809e-06, "loss": 0.9333, "step": 8432 }, { "epoch": 1.669973264036381, "grad_norm": 2.4375, "learning_rate": 4.0779622433038825e-06, "loss": 1.0035, "step": 8433 }, { "epoch": 1.670173159091477, "grad_norm": 2.25, "learning_rate": 4.076926535253775e-06, "loss": 1.0483, "step": 8434 }, { "epoch": 1.670373054146573, "grad_norm": 2.203125, "learning_rate": 4.075890868206494e-06, "loss": 1.0404, "step": 8435 }, { "epoch": 1.6705729492016692, "grad_norm": 2.046875, "learning_rate": 4.074855242208039e-06, "loss": 0.94, "step": 8436 }, { "epoch": 1.6707728442567653, "grad_norm": 2.046875, "learning_rate": 4.0738196573044155e-06, "loss": 0.9711, "step": 8437 }, { "epoch": 1.6709727393118614, "grad_norm": 2.21875, "learning_rate": 4.072784113541625e-06, "loss": 0.953, "step": 8438 }, { "epoch": 1.6711726343669575, "grad_norm": 2.125, "learning_rate": 4.0717486109656636e-06, "loss": 0.9248, "step": 8439 }, { "epoch": 1.6713725294220534, "grad_norm": 2.21875, "learning_rate": 4.070713149622532e-06, "loss": 0.9457, "step": 8440 }, { "epoch": 1.6715724244771495, "grad_norm": 2.125, "learning_rate": 4.06967772955822e-06, "loss": 0.9245, "step": 8441 }, { "epoch": 1.6717723195322456, "grad_norm": 2.078125, "learning_rate": 4.068642350818726e-06, "loss": 0.9103, "step": 8442 }, { "epoch": 1.6719722145873417, "grad_norm": 2.1875, "learning_rate": 4.067607013450039e-06, "loss": 1.0159, "step": 8443 }, { "epoch": 1.6721721096424376, "grad_norm": 2.15625, "learning_rate": 4.066571717498149e-06, "loss": 0.9965, "step": 8444 }, { "epoch": 1.6723720046975337, "grad_norm": 2.21875, "learning_rate": 4.065536463009043e-06, "loss": 1.0675, "step": 8445 }, { "epoch": 1.6725718997526298, "grad_norm": 2.046875, "learning_rate": 4.064501250028708e-06, "loss": 0.9118, "step": 8446 }, { "epoch": 1.6727717948077259, "grad_norm": 2.203125, "learning_rate": 4.063466078603127e-06, "loss": 0.9403, "step": 8447 }, { "epoch": 1.672971689862822, "grad_norm": 2.125, "learning_rate": 4.062430948778285e-06, "loss": 0.9674, "step": 8448 }, { "epoch": 1.673171584917918, "grad_norm": 2.203125, "learning_rate": 4.061395860600157e-06, "loss": 0.933, "step": 8449 }, { "epoch": 1.6733714799730142, "grad_norm": 2.21875, "learning_rate": 4.060360814114725e-06, "loss": 1.0244, "step": 8450 }, { "epoch": 1.6735713750281103, "grad_norm": 2.21875, "learning_rate": 4.059325809367967e-06, "loss": 1.1114, "step": 8451 }, { "epoch": 1.6737712700832064, "grad_norm": 2.28125, "learning_rate": 4.058290846405856e-06, "loss": 1.0581, "step": 8452 }, { "epoch": 1.6739711651383025, "grad_norm": 2.09375, "learning_rate": 4.0572559252743635e-06, "loss": 0.8494, "step": 8453 }, { "epoch": 1.6741710601933986, "grad_norm": 2.140625, "learning_rate": 4.056221046019464e-06, "loss": 0.967, "step": 8454 }, { "epoch": 1.6743709552484947, "grad_norm": 2.34375, "learning_rate": 4.055186208687123e-06, "loss": 0.9342, "step": 8455 }, { "epoch": 1.6745708503035908, "grad_norm": 2.125, "learning_rate": 4.0541514133233115e-06, "loss": 0.8761, "step": 8456 }, { "epoch": 1.6747707453586866, "grad_norm": 2.234375, "learning_rate": 4.053116659973991e-06, "loss": 0.9926, "step": 8457 }, { "epoch": 1.6749706404137827, "grad_norm": 2.15625, "learning_rate": 4.052081948685128e-06, "loss": 0.9398, "step": 8458 }, { "epoch": 1.6751705354688788, "grad_norm": 2.140625, "learning_rate": 4.051047279502686e-06, "loss": 1.0457, "step": 8459 }, { "epoch": 1.675370430523975, "grad_norm": 2.1875, "learning_rate": 4.05001265247262e-06, "loss": 0.9247, "step": 8460 }, { "epoch": 1.675570325579071, "grad_norm": 2.21875, "learning_rate": 4.04897806764089e-06, "loss": 0.9881, "step": 8461 }, { "epoch": 1.675770220634167, "grad_norm": 2.140625, "learning_rate": 4.047943525053455e-06, "loss": 0.9475, "step": 8462 }, { "epoch": 1.675970115689263, "grad_norm": 2.140625, "learning_rate": 4.046909024756265e-06, "loss": 0.9269, "step": 8463 }, { "epoch": 1.6761700107443591, "grad_norm": 2.578125, "learning_rate": 4.045874566795275e-06, "loss": 1.0492, "step": 8464 }, { "epoch": 1.6763699057994552, "grad_norm": 2.15625, "learning_rate": 4.044840151216435e-06, "loss": 0.9486, "step": 8465 }, { "epoch": 1.6765698008545513, "grad_norm": 2.125, "learning_rate": 4.043805778065696e-06, "loss": 0.9758, "step": 8466 }, { "epoch": 1.6767696959096474, "grad_norm": 2.171875, "learning_rate": 4.042771447388998e-06, "loss": 0.8804, "step": 8467 }, { "epoch": 1.6769695909647435, "grad_norm": 2.171875, "learning_rate": 4.041737159232293e-06, "loss": 0.9541, "step": 8468 }, { "epoch": 1.6771694860198396, "grad_norm": 2.125, "learning_rate": 4.040702913641519e-06, "loss": 0.9378, "step": 8469 }, { "epoch": 1.6773693810749357, "grad_norm": 2.125, "learning_rate": 4.03966871066262e-06, "loss": 0.949, "step": 8470 }, { "epoch": 1.6775692761300318, "grad_norm": 2.09375, "learning_rate": 4.038634550341534e-06, "loss": 0.984, "step": 8471 }, { "epoch": 1.677769171185128, "grad_norm": 2.125, "learning_rate": 4.037600432724199e-06, "loss": 0.9495, "step": 8472 }, { "epoch": 1.677969066240224, "grad_norm": 2.296875, "learning_rate": 4.036566357856549e-06, "loss": 1.0034, "step": 8473 }, { "epoch": 1.6781689612953201, "grad_norm": 2.09375, "learning_rate": 4.035532325784519e-06, "loss": 0.912, "step": 8474 }, { "epoch": 1.678368856350416, "grad_norm": 2.140625, "learning_rate": 4.034498336554041e-06, "loss": 0.9738, "step": 8475 }, { "epoch": 1.678568751405512, "grad_norm": 2.09375, "learning_rate": 4.033464390211043e-06, "loss": 0.9138, "step": 8476 }, { "epoch": 1.6787686464606082, "grad_norm": 2.046875, "learning_rate": 4.032430486801454e-06, "loss": 1.0189, "step": 8477 }, { "epoch": 1.6789685415157043, "grad_norm": 2.3125, "learning_rate": 4.031396626371198e-06, "loss": 0.9922, "step": 8478 }, { "epoch": 1.6791684365708002, "grad_norm": 2.203125, "learning_rate": 4.030362808966202e-06, "loss": 1.0465, "step": 8479 }, { "epoch": 1.6793683316258963, "grad_norm": 2.140625, "learning_rate": 4.029329034632386e-06, "loss": 0.9951, "step": 8480 }, { "epoch": 1.6795682266809924, "grad_norm": 2.25, "learning_rate": 4.02829530341567e-06, "loss": 1.0458, "step": 8481 }, { "epoch": 1.6797681217360885, "grad_norm": 2.09375, "learning_rate": 4.027261615361973e-06, "loss": 0.9252, "step": 8482 }, { "epoch": 1.6799680167911846, "grad_norm": 2.21875, "learning_rate": 4.026227970517212e-06, "loss": 0.982, "step": 8483 }, { "epoch": 1.6801679118462807, "grad_norm": 2.046875, "learning_rate": 4.0251943689273e-06, "loss": 0.9143, "step": 8484 }, { "epoch": 1.6803678069013768, "grad_norm": 2.09375, "learning_rate": 4.02416081063815e-06, "loss": 0.9427, "step": 8485 }, { "epoch": 1.6805677019564729, "grad_norm": 2.328125, "learning_rate": 4.0231272956956725e-06, "loss": 1.0324, "step": 8486 }, { "epoch": 1.680767597011569, "grad_norm": 2.15625, "learning_rate": 4.022093824145777e-06, "loss": 0.9414, "step": 8487 }, { "epoch": 1.680967492066665, "grad_norm": 2.1875, "learning_rate": 4.021060396034369e-06, "loss": 0.9265, "step": 8488 }, { "epoch": 1.6811673871217612, "grad_norm": 2.140625, "learning_rate": 4.020027011407353e-06, "loss": 0.9412, "step": 8489 }, { "epoch": 1.6813672821768573, "grad_norm": 2.109375, "learning_rate": 4.018993670310633e-06, "loss": 0.9536, "step": 8490 }, { "epoch": 1.6815671772319534, "grad_norm": 2.203125, "learning_rate": 4.017960372790109e-06, "loss": 0.9672, "step": 8491 }, { "epoch": 1.6817670722870492, "grad_norm": 2.03125, "learning_rate": 4.01692711889168e-06, "loss": 0.8579, "step": 8492 }, { "epoch": 1.6819669673421453, "grad_norm": 2.171875, "learning_rate": 4.015893908661245e-06, "loss": 0.9421, "step": 8493 }, { "epoch": 1.6821668623972414, "grad_norm": 2.09375, "learning_rate": 4.014860742144696e-06, "loss": 0.9703, "step": 8494 }, { "epoch": 1.6823667574523375, "grad_norm": 2.046875, "learning_rate": 4.013827619387928e-06, "loss": 0.9083, "step": 8495 }, { "epoch": 1.6825666525074336, "grad_norm": 2.15625, "learning_rate": 4.012794540436832e-06, "loss": 0.9675, "step": 8496 }, { "epoch": 1.6827665475625295, "grad_norm": 2.1875, "learning_rate": 4.0117615053372986e-06, "loss": 1.0955, "step": 8497 }, { "epoch": 1.6829664426176256, "grad_norm": 2.140625, "learning_rate": 4.010728514135211e-06, "loss": 0.9257, "step": 8498 }, { "epoch": 1.6831663376727217, "grad_norm": 2.125, "learning_rate": 4.009695566876459e-06, "loss": 0.9684, "step": 8499 }, { "epoch": 1.6833662327278178, "grad_norm": 2.1875, "learning_rate": 4.008662663606923e-06, "loss": 0.9572, "step": 8500 }, { "epoch": 1.683566127782914, "grad_norm": 2.046875, "learning_rate": 4.007629804372486e-06, "loss": 0.9897, "step": 8501 }, { "epoch": 1.68376602283801, "grad_norm": 2.296875, "learning_rate": 4.006596989219027e-06, "loss": 0.9713, "step": 8502 }, { "epoch": 1.6839659178931061, "grad_norm": 2.03125, "learning_rate": 4.005564218192424e-06, "loss": 0.8463, "step": 8503 }, { "epoch": 1.6841658129482022, "grad_norm": 2.1875, "learning_rate": 4.004531491338551e-06, "loss": 0.9783, "step": 8504 }, { "epoch": 1.6843657080032983, "grad_norm": 2.15625, "learning_rate": 4.0034988087032845e-06, "loss": 0.993, "step": 8505 }, { "epoch": 1.6845656030583944, "grad_norm": 2.21875, "learning_rate": 4.002466170332493e-06, "loss": 0.8878, "step": 8506 }, { "epoch": 1.6847654981134905, "grad_norm": 2.140625, "learning_rate": 4.001433576272049e-06, "loss": 1.0262, "step": 8507 }, { "epoch": 1.6849653931685866, "grad_norm": 2.140625, "learning_rate": 4.000401026567818e-06, "loss": 0.9601, "step": 8508 }, { "epoch": 1.6851652882236827, "grad_norm": 2.109375, "learning_rate": 3.999368521265667e-06, "loss": 1.006, "step": 8509 }, { "epoch": 1.6853651832787786, "grad_norm": 2.1875, "learning_rate": 3.998336060411459e-06, "loss": 0.8779, "step": 8510 }, { "epoch": 1.6855650783338747, "grad_norm": 2.25, "learning_rate": 3.997303644051056e-06, "loss": 1.0226, "step": 8511 }, { "epoch": 1.6857649733889708, "grad_norm": 2.203125, "learning_rate": 3.9962712722303186e-06, "loss": 0.9109, "step": 8512 }, { "epoch": 1.685964868444067, "grad_norm": 2.203125, "learning_rate": 3.995238944995105e-06, "loss": 0.9653, "step": 8513 }, { "epoch": 1.6861647634991628, "grad_norm": 2.15625, "learning_rate": 3.994206662391269e-06, "loss": 0.9835, "step": 8514 }, { "epoch": 1.6863646585542589, "grad_norm": 2.1875, "learning_rate": 3.993174424464665e-06, "loss": 0.9628, "step": 8515 }, { "epoch": 1.686564553609355, "grad_norm": 2.078125, "learning_rate": 3.992142231261147e-06, "loss": 0.9007, "step": 8516 }, { "epoch": 1.686764448664451, "grad_norm": 2.359375, "learning_rate": 3.991110082826562e-06, "loss": 0.9254, "step": 8517 }, { "epoch": 1.6869643437195472, "grad_norm": 2.234375, "learning_rate": 3.990077979206761e-06, "loss": 1.0035, "step": 8518 }, { "epoch": 1.6871642387746433, "grad_norm": 2.109375, "learning_rate": 3.989045920447587e-06, "loss": 0.945, "step": 8519 }, { "epoch": 1.6873641338297394, "grad_norm": 2.171875, "learning_rate": 3.988013906594886e-06, "loss": 0.8934, "step": 8520 }, { "epoch": 1.6875640288848355, "grad_norm": 2.15625, "learning_rate": 3.9869819376944985e-06, "loss": 0.9832, "step": 8521 }, { "epoch": 1.6877639239399316, "grad_norm": 2.171875, "learning_rate": 3.985950013792265e-06, "loss": 0.931, "step": 8522 }, { "epoch": 1.6879638189950277, "grad_norm": 2.171875, "learning_rate": 3.984918134934024e-06, "loss": 0.9826, "step": 8523 }, { "epoch": 1.6881637140501238, "grad_norm": 2.171875, "learning_rate": 3.983886301165611e-06, "loss": 0.9445, "step": 8524 }, { "epoch": 1.6883636091052199, "grad_norm": 2.109375, "learning_rate": 3.9828545125328606e-06, "loss": 0.9148, "step": 8525 }, { "epoch": 1.688563504160316, "grad_norm": 2.25, "learning_rate": 3.9818227690816045e-06, "loss": 1.0161, "step": 8526 }, { "epoch": 1.6887633992154119, "grad_norm": 2.15625, "learning_rate": 3.98079107085767e-06, "loss": 0.9282, "step": 8527 }, { "epoch": 1.688963294270508, "grad_norm": 2.015625, "learning_rate": 3.979759417906891e-06, "loss": 0.9579, "step": 8528 }, { "epoch": 1.689163189325604, "grad_norm": 2.3125, "learning_rate": 3.978727810275087e-06, "loss": 0.9937, "step": 8529 }, { "epoch": 1.6893630843807002, "grad_norm": 2.15625, "learning_rate": 3.977696248008086e-06, "loss": 1.0588, "step": 8530 }, { "epoch": 1.6895629794357963, "grad_norm": 2.21875, "learning_rate": 3.976664731151707e-06, "loss": 1.0032, "step": 8531 }, { "epoch": 1.6897628744908921, "grad_norm": 2.25, "learning_rate": 3.975633259751771e-06, "loss": 1.0594, "step": 8532 }, { "epoch": 1.6899627695459882, "grad_norm": 2.1875, "learning_rate": 3.974601833854097e-06, "loss": 0.9511, "step": 8533 }, { "epoch": 1.6901626646010843, "grad_norm": 2.125, "learning_rate": 3.9735704535045e-06, "loss": 0.9048, "step": 8534 }, { "epoch": 1.6903625596561804, "grad_norm": 2.15625, "learning_rate": 3.9725391187487935e-06, "loss": 0.9463, "step": 8535 }, { "epoch": 1.6905624547112765, "grad_norm": 2.078125, "learning_rate": 3.97150782963279e-06, "loss": 0.9615, "step": 8536 }, { "epoch": 1.6907623497663726, "grad_norm": 2.203125, "learning_rate": 3.9704765862022985e-06, "loss": 0.9832, "step": 8537 }, { "epoch": 1.6909622448214687, "grad_norm": 2.203125, "learning_rate": 3.969445388503128e-06, "loss": 0.8743, "step": 8538 }, { "epoch": 1.6911621398765648, "grad_norm": 2.078125, "learning_rate": 3.968414236581083e-06, "loss": 0.8841, "step": 8539 }, { "epoch": 1.691362034931661, "grad_norm": 2.046875, "learning_rate": 3.967383130481966e-06, "loss": 0.9036, "step": 8540 }, { "epoch": 1.691561929986757, "grad_norm": 2.203125, "learning_rate": 3.966352070251582e-06, "loss": 0.8706, "step": 8541 }, { "epoch": 1.6917618250418531, "grad_norm": 2.140625, "learning_rate": 3.965321055935727e-06, "loss": 0.996, "step": 8542 }, { "epoch": 1.6919617200969492, "grad_norm": 2.109375, "learning_rate": 3.9642900875802e-06, "loss": 0.9702, "step": 8543 }, { "epoch": 1.6921616151520453, "grad_norm": 2.109375, "learning_rate": 3.9632591652307985e-06, "loss": 0.9192, "step": 8544 }, { "epoch": 1.6923615102071412, "grad_norm": 1.9921875, "learning_rate": 3.9622282889333135e-06, "loss": 0.9064, "step": 8545 }, { "epoch": 1.6925614052622373, "grad_norm": 2.015625, "learning_rate": 3.9611974587335375e-06, "loss": 0.9418, "step": 8546 }, { "epoch": 1.6927613003173334, "grad_norm": 2.234375, "learning_rate": 3.9601666746772586e-06, "loss": 0.973, "step": 8547 }, { "epoch": 1.6929611953724295, "grad_norm": 2.21875, "learning_rate": 3.959135936810265e-06, "loss": 1.0562, "step": 8548 }, { "epoch": 1.6931610904275254, "grad_norm": 2.25, "learning_rate": 3.958105245178342e-06, "loss": 0.9999, "step": 8549 }, { "epoch": 1.6933609854826215, "grad_norm": 2.15625, "learning_rate": 3.957074599827272e-06, "loss": 0.9098, "step": 8550 }, { "epoch": 1.6935608805377176, "grad_norm": 2.09375, "learning_rate": 3.956044000802838e-06, "loss": 0.9153, "step": 8551 }, { "epoch": 1.6937607755928137, "grad_norm": 2.25, "learning_rate": 3.955013448150818e-06, "loss": 1.0571, "step": 8552 }, { "epoch": 1.6939606706479098, "grad_norm": 2.03125, "learning_rate": 3.953982941916988e-06, "loss": 0.9183, "step": 8553 }, { "epoch": 1.6941605657030059, "grad_norm": 2.125, "learning_rate": 3.952952482147125e-06, "loss": 0.9679, "step": 8554 }, { "epoch": 1.694360460758102, "grad_norm": 2.109375, "learning_rate": 3.9519220688870004e-06, "loss": 0.9365, "step": 8555 }, { "epoch": 1.694560355813198, "grad_norm": 2.046875, "learning_rate": 3.950891702182386e-06, "loss": 0.8167, "step": 8556 }, { "epoch": 1.6947602508682942, "grad_norm": 2.171875, "learning_rate": 3.94986138207905e-06, "loss": 0.9672, "step": 8557 }, { "epoch": 1.6949601459233903, "grad_norm": 2.15625, "learning_rate": 3.948831108622759e-06, "loss": 1.0135, "step": 8558 }, { "epoch": 1.6951600409784864, "grad_norm": 2.09375, "learning_rate": 3.94780088185928e-06, "loss": 0.941, "step": 8559 }, { "epoch": 1.6953599360335825, "grad_norm": 2.171875, "learning_rate": 3.946770701834372e-06, "loss": 1.034, "step": 8560 }, { "epoch": 1.6955598310886786, "grad_norm": 2.359375, "learning_rate": 3.945740568593796e-06, "loss": 1.0687, "step": 8561 }, { "epoch": 1.6957597261437747, "grad_norm": 2.109375, "learning_rate": 3.944710482183312e-06, "loss": 0.9726, "step": 8562 }, { "epoch": 1.6959596211988706, "grad_norm": 2.25, "learning_rate": 3.943680442648675e-06, "loss": 1.0225, "step": 8563 }, { "epoch": 1.6961595162539667, "grad_norm": 2.109375, "learning_rate": 3.9426504500356415e-06, "loss": 0.9074, "step": 8564 }, { "epoch": 1.6963594113090628, "grad_norm": 2.078125, "learning_rate": 3.94162050438996e-06, "loss": 0.9779, "step": 8565 }, { "epoch": 1.6965593063641589, "grad_norm": 2.046875, "learning_rate": 3.940590605757383e-06, "loss": 1.0321, "step": 8566 }, { "epoch": 1.6967592014192547, "grad_norm": 2.15625, "learning_rate": 3.93956075418366e-06, "loss": 1.026, "step": 8567 }, { "epoch": 1.6969590964743508, "grad_norm": 2.1875, "learning_rate": 3.938530949714533e-06, "loss": 1.0925, "step": 8568 }, { "epoch": 1.697158991529447, "grad_norm": 2.15625, "learning_rate": 3.937501192395749e-06, "loss": 0.975, "step": 8569 }, { "epoch": 1.697358886584543, "grad_norm": 2.09375, "learning_rate": 3.936471482273048e-06, "loss": 0.8929, "step": 8570 }, { "epoch": 1.6975587816396391, "grad_norm": 2.046875, "learning_rate": 3.935441819392169e-06, "loss": 0.9929, "step": 8571 }, { "epoch": 1.6977586766947352, "grad_norm": 2.03125, "learning_rate": 3.934412203798853e-06, "loss": 0.8692, "step": 8572 }, { "epoch": 1.6979585717498313, "grad_norm": 2.125, "learning_rate": 3.9333826355388325e-06, "loss": 1.0455, "step": 8573 }, { "epoch": 1.6981584668049274, "grad_norm": 2.203125, "learning_rate": 3.93235311465784e-06, "loss": 1.0355, "step": 8574 }, { "epoch": 1.6983583618600235, "grad_norm": 2.109375, "learning_rate": 3.93132364120161e-06, "loss": 0.8788, "step": 8575 }, { "epoch": 1.6985582569151196, "grad_norm": 2.171875, "learning_rate": 3.930294215215868e-06, "loss": 1.0396, "step": 8576 }, { "epoch": 1.6987581519702157, "grad_norm": 2.140625, "learning_rate": 3.929264836746345e-06, "loss": 1.0266, "step": 8577 }, { "epoch": 1.6989580470253118, "grad_norm": 2.15625, "learning_rate": 3.928235505838762e-06, "loss": 1.0414, "step": 8578 }, { "epoch": 1.699157942080408, "grad_norm": 2.0625, "learning_rate": 3.927206222538843e-06, "loss": 0.8899, "step": 8579 }, { "epoch": 1.6993578371355038, "grad_norm": 2.046875, "learning_rate": 3.92617698689231e-06, "loss": 0.9934, "step": 8580 }, { "epoch": 1.6995577321906, "grad_norm": 2.0625, "learning_rate": 3.92514779894488e-06, "loss": 0.9783, "step": 8581 }, { "epoch": 1.699757627245696, "grad_norm": 2.1875, "learning_rate": 3.924118658742269e-06, "loss": 1.0363, "step": 8582 }, { "epoch": 1.6999575223007921, "grad_norm": 2.09375, "learning_rate": 3.923089566330195e-06, "loss": 1.0069, "step": 8583 }, { "epoch": 1.7001574173558882, "grad_norm": 2.171875, "learning_rate": 3.922060521754365e-06, "loss": 0.9617, "step": 8584 }, { "epoch": 1.700357312410984, "grad_norm": 2.0, "learning_rate": 3.921031525060493e-06, "loss": 0.8853, "step": 8585 }, { "epoch": 1.7005572074660802, "grad_norm": 2.359375, "learning_rate": 3.920002576294284e-06, "loss": 0.9252, "step": 8586 }, { "epoch": 1.7007571025211763, "grad_norm": 2.109375, "learning_rate": 3.918973675501446e-06, "loss": 0.9564, "step": 8587 }, { "epoch": 1.7009569975762724, "grad_norm": 2.234375, "learning_rate": 3.917944822727682e-06, "loss": 0.9961, "step": 8588 }, { "epoch": 1.7011568926313685, "grad_norm": 2.25, "learning_rate": 3.916916018018696e-06, "loss": 1.062, "step": 8589 }, { "epoch": 1.7013567876864646, "grad_norm": 2.0625, "learning_rate": 3.915887261420181e-06, "loss": 0.9978, "step": 8590 }, { "epoch": 1.7015566827415607, "grad_norm": 2.265625, "learning_rate": 3.9148585529778385e-06, "loss": 1.0687, "step": 8591 }, { "epoch": 1.7017565777966568, "grad_norm": 2.015625, "learning_rate": 3.913829892737364e-06, "loss": 0.8257, "step": 8592 }, { "epoch": 1.7019564728517529, "grad_norm": 2.09375, "learning_rate": 3.912801280744449e-06, "loss": 0.9383, "step": 8593 }, { "epoch": 1.702156367906849, "grad_norm": 2.09375, "learning_rate": 3.911772717044786e-06, "loss": 1.0214, "step": 8594 }, { "epoch": 1.702356262961945, "grad_norm": 2.0625, "learning_rate": 3.910744201684062e-06, "loss": 0.956, "step": 8595 }, { "epoch": 1.7025561580170412, "grad_norm": 2.125, "learning_rate": 3.909715734707964e-06, "loss": 0.9843, "step": 8596 }, { "epoch": 1.7027560530721373, "grad_norm": 2.140625, "learning_rate": 3.908687316162178e-06, "loss": 0.9228, "step": 8597 }, { "epoch": 1.7029559481272332, "grad_norm": 2.15625, "learning_rate": 3.907658946092383e-06, "loss": 0.963, "step": 8598 }, { "epoch": 1.7031558431823293, "grad_norm": 2.1875, "learning_rate": 3.906630624544261e-06, "loss": 0.9671, "step": 8599 }, { "epoch": 1.7033557382374254, "grad_norm": 2.0625, "learning_rate": 3.905602351563492e-06, "loss": 1.0168, "step": 8600 }, { "epoch": 1.7035556332925215, "grad_norm": 2.21875, "learning_rate": 3.904574127195747e-06, "loss": 0.9687, "step": 8601 }, { "epoch": 1.7037555283476173, "grad_norm": 2.15625, "learning_rate": 3.903545951486704e-06, "loss": 1.0447, "step": 8602 }, { "epoch": 1.7039554234027134, "grad_norm": 2.140625, "learning_rate": 3.902517824482033e-06, "loss": 0.9336, "step": 8603 }, { "epoch": 1.7041553184578095, "grad_norm": 2.046875, "learning_rate": 3.901489746227402e-06, "loss": 0.898, "step": 8604 }, { "epoch": 1.7043552135129056, "grad_norm": 2.109375, "learning_rate": 3.900461716768479e-06, "loss": 0.9529, "step": 8605 }, { "epoch": 1.7045551085680017, "grad_norm": 2.109375, "learning_rate": 3.89943373615093e-06, "loss": 0.8709, "step": 8606 }, { "epoch": 1.7047550036230978, "grad_norm": 2.078125, "learning_rate": 3.8984058044204164e-06, "loss": 1.0679, "step": 8607 }, { "epoch": 1.704954898678194, "grad_norm": 2.15625, "learning_rate": 3.8973779216226e-06, "loss": 0.9793, "step": 8608 }, { "epoch": 1.70515479373329, "grad_norm": 2.203125, "learning_rate": 3.8963500878031376e-06, "loss": 0.9541, "step": 8609 }, { "epoch": 1.7053546887883861, "grad_norm": 2.125, "learning_rate": 3.895322303007686e-06, "loss": 0.9899, "step": 8610 }, { "epoch": 1.7055545838434822, "grad_norm": 2.359375, "learning_rate": 3.894294567281901e-06, "loss": 0.979, "step": 8611 }, { "epoch": 1.7057544788985783, "grad_norm": 2.140625, "learning_rate": 3.893266880671433e-06, "loss": 0.9598, "step": 8612 }, { "epoch": 1.7059543739536744, "grad_norm": 2.328125, "learning_rate": 3.892239243221931e-06, "loss": 1.0535, "step": 8613 }, { "epoch": 1.7061542690087705, "grad_norm": 2.25, "learning_rate": 3.891211654979045e-06, "loss": 1.0797, "step": 8614 }, { "epoch": 1.7063541640638664, "grad_norm": 2.1875, "learning_rate": 3.890184115988418e-06, "loss": 1.0081, "step": 8615 }, { "epoch": 1.7065540591189625, "grad_norm": 2.140625, "learning_rate": 3.889156626295694e-06, "loss": 1.0177, "step": 8616 }, { "epoch": 1.7067539541740586, "grad_norm": 2.09375, "learning_rate": 3.888129185946514e-06, "loss": 1.0065, "step": 8617 }, { "epoch": 1.7069538492291547, "grad_norm": 1.9921875, "learning_rate": 3.887101794986516e-06, "loss": 0.9566, "step": 8618 }, { "epoch": 1.7071537442842508, "grad_norm": 2.25, "learning_rate": 3.886074453461339e-06, "loss": 0.9088, "step": 8619 }, { "epoch": 1.7073536393393467, "grad_norm": 2.21875, "learning_rate": 3.885047161416618e-06, "loss": 1.0895, "step": 8620 }, { "epoch": 1.7075535343944428, "grad_norm": 2.0625, "learning_rate": 3.884019918897979e-06, "loss": 0.9131, "step": 8621 }, { "epoch": 1.707753429449539, "grad_norm": 2.34375, "learning_rate": 3.882992725951057e-06, "loss": 1.0781, "step": 8622 }, { "epoch": 1.707953324504635, "grad_norm": 2.171875, "learning_rate": 3.881965582621479e-06, "loss": 0.9742, "step": 8623 }, { "epoch": 1.708153219559731, "grad_norm": 2.078125, "learning_rate": 3.88093848895487e-06, "loss": 0.9311, "step": 8624 }, { "epoch": 1.7083531146148272, "grad_norm": 2.484375, "learning_rate": 3.879911444996854e-06, "loss": 1.1179, "step": 8625 }, { "epoch": 1.7085530096699233, "grad_norm": 2.15625, "learning_rate": 3.878884450793053e-06, "loss": 0.9329, "step": 8626 }, { "epoch": 1.7087529047250194, "grad_norm": 2.328125, "learning_rate": 3.877857506389083e-06, "loss": 1.0512, "step": 8627 }, { "epoch": 1.7089527997801155, "grad_norm": 2.1875, "learning_rate": 3.876830611830565e-06, "loss": 1.0058, "step": 8628 }, { "epoch": 1.7091526948352116, "grad_norm": 2.28125, "learning_rate": 3.87580376716311e-06, "loss": 0.9564, "step": 8629 }, { "epoch": 1.7093525898903077, "grad_norm": 2.109375, "learning_rate": 3.874776972432331e-06, "loss": 0.9368, "step": 8630 }, { "epoch": 1.7095524849454038, "grad_norm": 2.0625, "learning_rate": 3.87375022768384e-06, "loss": 0.8779, "step": 8631 }, { "epoch": 1.7097523800005, "grad_norm": 2.15625, "learning_rate": 3.872723532963242e-06, "loss": 0.976, "step": 8632 }, { "epoch": 1.7099522750555958, "grad_norm": 2.140625, "learning_rate": 3.871696888316145e-06, "loss": 1.0064, "step": 8633 }, { "epoch": 1.7101521701106919, "grad_norm": 2.296875, "learning_rate": 3.870670293788153e-06, "loss": 0.9534, "step": 8634 }, { "epoch": 1.710352065165788, "grad_norm": 2.328125, "learning_rate": 3.8696437494248645e-06, "loss": 1.0084, "step": 8635 }, { "epoch": 1.710551960220884, "grad_norm": 2.140625, "learning_rate": 3.868617255271881e-06, "loss": 0.9585, "step": 8636 }, { "epoch": 1.71075185527598, "grad_norm": 2.1875, "learning_rate": 3.867590811374797e-06, "loss": 1.01, "step": 8637 }, { "epoch": 1.710951750331076, "grad_norm": 2.1875, "learning_rate": 3.866564417779208e-06, "loss": 0.9748, "step": 8638 }, { "epoch": 1.7111516453861721, "grad_norm": 2.0625, "learning_rate": 3.865538074530708e-06, "loss": 0.937, "step": 8639 }, { "epoch": 1.7113515404412682, "grad_norm": 2.21875, "learning_rate": 3.864511781674885e-06, "loss": 0.9954, "step": 8640 }, { "epoch": 1.7115514354963643, "grad_norm": 2.234375, "learning_rate": 3.863485539257326e-06, "loss": 0.9562, "step": 8641 }, { "epoch": 1.7117513305514604, "grad_norm": 2.265625, "learning_rate": 3.86245934732362e-06, "loss": 1.0433, "step": 8642 }, { "epoch": 1.7119512256065565, "grad_norm": 1.9765625, "learning_rate": 3.861433205919347e-06, "loss": 0.8788, "step": 8643 }, { "epoch": 1.7121511206616526, "grad_norm": 2.171875, "learning_rate": 3.86040711509009e-06, "loss": 0.9968, "step": 8644 }, { "epoch": 1.7123510157167487, "grad_norm": 2.21875, "learning_rate": 3.859381074881427e-06, "loss": 1.0523, "step": 8645 }, { "epoch": 1.7125509107718448, "grad_norm": 2.15625, "learning_rate": 3.858355085338935e-06, "loss": 0.9658, "step": 8646 }, { "epoch": 1.712750805826941, "grad_norm": 2.234375, "learning_rate": 3.857329146508188e-06, "loss": 0.907, "step": 8647 }, { "epoch": 1.712950700882037, "grad_norm": 2.15625, "learning_rate": 3.856303258434758e-06, "loss": 0.8811, "step": 8648 }, { "epoch": 1.7131505959371331, "grad_norm": 2.203125, "learning_rate": 3.8552774211642154e-06, "loss": 0.9952, "step": 8649 }, { "epoch": 1.713350490992229, "grad_norm": 2.203125, "learning_rate": 3.854251634742128e-06, "loss": 0.9703, "step": 8650 }, { "epoch": 1.7135503860473251, "grad_norm": 2.171875, "learning_rate": 3.853225899214062e-06, "loss": 0.9576, "step": 8651 }, { "epoch": 1.7137502811024212, "grad_norm": 2.328125, "learning_rate": 3.8522002146255765e-06, "loss": 0.9298, "step": 8652 }, { "epoch": 1.7139501761575173, "grad_norm": 2.171875, "learning_rate": 3.851174581022236e-06, "loss": 1.0168, "step": 8653 }, { "epoch": 1.7141500712126134, "grad_norm": 2.25, "learning_rate": 3.850148998449597e-06, "loss": 0.9716, "step": 8654 }, { "epoch": 1.7143499662677093, "grad_norm": 2.15625, "learning_rate": 3.849123466953217e-06, "loss": 1.003, "step": 8655 }, { "epoch": 1.7145498613228054, "grad_norm": 2.15625, "learning_rate": 3.84809798657865e-06, "loss": 0.9513, "step": 8656 }, { "epoch": 1.7147497563779015, "grad_norm": 2.0625, "learning_rate": 3.847072557371448e-06, "loss": 0.9694, "step": 8657 }, { "epoch": 1.7149496514329976, "grad_norm": 2.078125, "learning_rate": 3.846047179377159e-06, "loss": 0.9758, "step": 8658 }, { "epoch": 1.7151495464880937, "grad_norm": 2.078125, "learning_rate": 3.8450218526413315e-06, "loss": 0.9551, "step": 8659 }, { "epoch": 1.7153494415431898, "grad_norm": 2.046875, "learning_rate": 3.843996577209509e-06, "loss": 0.979, "step": 8660 }, { "epoch": 1.715549336598286, "grad_norm": 2.140625, "learning_rate": 3.842971353127235e-06, "loss": 1.0461, "step": 8661 }, { "epoch": 1.715749231653382, "grad_norm": 2.15625, "learning_rate": 3.841946180440052e-06, "loss": 1.0534, "step": 8662 }, { "epoch": 1.715949126708478, "grad_norm": 2.109375, "learning_rate": 3.840921059193494e-06, "loss": 0.9713, "step": 8663 }, { "epoch": 1.7161490217635742, "grad_norm": 2.171875, "learning_rate": 3.839895989433099e-06, "loss": 0.9854, "step": 8664 }, { "epoch": 1.7163489168186703, "grad_norm": 2.109375, "learning_rate": 3.838870971204401e-06, "loss": 1.0073, "step": 8665 }, { "epoch": 1.7165488118737664, "grad_norm": 2.203125, "learning_rate": 3.83784600455293e-06, "loss": 0.9942, "step": 8666 }, { "epoch": 1.7167487069288625, "grad_norm": 2.09375, "learning_rate": 3.836821089524216e-06, "loss": 0.9617, "step": 8667 }, { "epoch": 1.7169486019839584, "grad_norm": 2.15625, "learning_rate": 3.835796226163784e-06, "loss": 0.9522, "step": 8668 }, { "epoch": 1.7171484970390545, "grad_norm": 2.15625, "learning_rate": 3.834771414517159e-06, "loss": 0.9609, "step": 8669 }, { "epoch": 1.7173483920941506, "grad_norm": 2.453125, "learning_rate": 3.833746654629865e-06, "loss": 0.9881, "step": 8670 }, { "epoch": 1.7175482871492467, "grad_norm": 2.3125, "learning_rate": 3.832721946547418e-06, "loss": 0.917, "step": 8671 }, { "epoch": 1.7177481822043426, "grad_norm": 2.34375, "learning_rate": 3.831697290315339e-06, "loss": 1.0694, "step": 8672 }, { "epoch": 1.7179480772594387, "grad_norm": 2.1875, "learning_rate": 3.830672685979142e-06, "loss": 1.0193, "step": 8673 }, { "epoch": 1.7181479723145348, "grad_norm": 2.078125, "learning_rate": 3.829648133584338e-06, "loss": 0.895, "step": 8674 }, { "epoch": 1.7183478673696309, "grad_norm": 2.21875, "learning_rate": 3.82862363317644e-06, "loss": 0.9861, "step": 8675 }, { "epoch": 1.718547762424727, "grad_norm": 2.140625, "learning_rate": 3.827599184800954e-06, "loss": 0.9176, "step": 8676 }, { "epoch": 1.718747657479823, "grad_norm": 2.1875, "learning_rate": 3.826574788503387e-06, "loss": 0.9677, "step": 8677 }, { "epoch": 1.7189475525349192, "grad_norm": 2.265625, "learning_rate": 3.825550444329244e-06, "loss": 1.0463, "step": 8678 }, { "epoch": 1.7191474475900153, "grad_norm": 2.25, "learning_rate": 3.8245261523240235e-06, "loss": 1.0227, "step": 8679 }, { "epoch": 1.7193473426451114, "grad_norm": 2.109375, "learning_rate": 3.823501912533226e-06, "loss": 0.9353, "step": 8680 }, { "epoch": 1.7195472377002075, "grad_norm": 2.125, "learning_rate": 3.822477725002348e-06, "loss": 0.9909, "step": 8681 }, { "epoch": 1.7197471327553036, "grad_norm": 2.109375, "learning_rate": 3.821453589776886e-06, "loss": 0.9857, "step": 8682 }, { "epoch": 1.7199470278103997, "grad_norm": 2.234375, "learning_rate": 3.820429506902326e-06, "loss": 1.0048, "step": 8683 }, { "epoch": 1.7201469228654958, "grad_norm": 2.203125, "learning_rate": 3.819405476424164e-06, "loss": 0.9792, "step": 8684 }, { "epoch": 1.7203468179205919, "grad_norm": 2.3125, "learning_rate": 3.818381498387883e-06, "loss": 0.9946, "step": 8685 }, { "epoch": 1.7205467129756877, "grad_norm": 2.078125, "learning_rate": 3.81735757283897e-06, "loss": 0.9009, "step": 8686 }, { "epoch": 1.7207466080307838, "grad_norm": 2.15625, "learning_rate": 3.8163336998229075e-06, "loss": 0.9599, "step": 8687 }, { "epoch": 1.72094650308588, "grad_norm": 2.203125, "learning_rate": 3.815309879385176e-06, "loss": 0.9272, "step": 8688 }, { "epoch": 1.721146398140976, "grad_norm": 2.328125, "learning_rate": 3.8142861115712515e-06, "loss": 0.9367, "step": 8689 }, { "epoch": 1.721346293196072, "grad_norm": 2.171875, "learning_rate": 3.8132623964266136e-06, "loss": 1.0028, "step": 8690 }, { "epoch": 1.721546188251168, "grad_norm": 2.1875, "learning_rate": 3.8122387339967315e-06, "loss": 1.0069, "step": 8691 }, { "epoch": 1.721746083306264, "grad_norm": 2.1875, "learning_rate": 3.811215124327078e-06, "loss": 1.0578, "step": 8692 }, { "epoch": 1.7219459783613602, "grad_norm": 2.15625, "learning_rate": 3.810191567463123e-06, "loss": 0.9108, "step": 8693 }, { "epoch": 1.7221458734164563, "grad_norm": 2.15625, "learning_rate": 3.8091680634503303e-06, "loss": 0.9725, "step": 8694 }, { "epoch": 1.7223457684715524, "grad_norm": 2.078125, "learning_rate": 3.808144612334165e-06, "loss": 0.9575, "step": 8695 }, { "epoch": 1.7225456635266485, "grad_norm": 2.1875, "learning_rate": 3.80712121416009e-06, "loss": 1.0384, "step": 8696 }, { "epoch": 1.7227455585817446, "grad_norm": 2.359375, "learning_rate": 3.806097868973562e-06, "loss": 0.977, "step": 8697 }, { "epoch": 1.7229454536368407, "grad_norm": 2.125, "learning_rate": 3.8050745768200404e-06, "loss": 0.9893, "step": 8698 }, { "epoch": 1.7231453486919368, "grad_norm": 2.25, "learning_rate": 3.8040513377449774e-06, "loss": 0.9146, "step": 8699 }, { "epoch": 1.723345243747033, "grad_norm": 2.234375, "learning_rate": 3.803028151793826e-06, "loss": 1.0259, "step": 8700 }, { "epoch": 1.723545138802129, "grad_norm": 2.078125, "learning_rate": 3.802005019012038e-06, "loss": 0.9262, "step": 8701 }, { "epoch": 1.723745033857225, "grad_norm": 2.078125, "learning_rate": 3.800981939445057e-06, "loss": 0.9522, "step": 8702 }, { "epoch": 1.723944928912321, "grad_norm": 2.203125, "learning_rate": 3.7999589131383306e-06, "loss": 0.9703, "step": 8703 }, { "epoch": 1.724144823967417, "grad_norm": 2.25, "learning_rate": 3.798935940137303e-06, "loss": 1.141, "step": 8704 }, { "epoch": 1.7243447190225132, "grad_norm": 2.1875, "learning_rate": 3.7979130204874106e-06, "loss": 0.9866, "step": 8705 }, { "epoch": 1.7245446140776093, "grad_norm": 2.3125, "learning_rate": 3.7968901542340946e-06, "loss": 0.9721, "step": 8706 }, { "epoch": 1.7247445091327054, "grad_norm": 2.1875, "learning_rate": 3.795867341422789e-06, "loss": 1.0209, "step": 8707 }, { "epoch": 1.7249444041878013, "grad_norm": 2.125, "learning_rate": 3.7948445820989265e-06, "loss": 0.8676, "step": 8708 }, { "epoch": 1.7251442992428974, "grad_norm": 2.203125, "learning_rate": 3.793821876307941e-06, "loss": 0.9601, "step": 8709 }, { "epoch": 1.7253441942979935, "grad_norm": 2.125, "learning_rate": 3.792799224095257e-06, "loss": 0.991, "step": 8710 }, { "epoch": 1.7255440893530896, "grad_norm": 2.15625, "learning_rate": 3.791776625506303e-06, "loss": 0.983, "step": 8711 }, { "epoch": 1.7257439844081857, "grad_norm": 2.140625, "learning_rate": 3.7907540805865035e-06, "loss": 0.8535, "step": 8712 }, { "epoch": 1.7259438794632818, "grad_norm": 2.296875, "learning_rate": 3.7897315893812796e-06, "loss": 0.9902, "step": 8713 }, { "epoch": 1.7261437745183779, "grad_norm": 2.125, "learning_rate": 3.788709151936047e-06, "loss": 0.9822, "step": 8714 }, { "epoch": 1.726343669573474, "grad_norm": 2.046875, "learning_rate": 3.7876867682962244e-06, "loss": 0.9171, "step": 8715 }, { "epoch": 1.72654356462857, "grad_norm": 2.21875, "learning_rate": 3.786664438507227e-06, "loss": 0.9963, "step": 8716 }, { "epoch": 1.7267434596836662, "grad_norm": 2.046875, "learning_rate": 3.7856421626144645e-06, "loss": 0.866, "step": 8717 }, { "epoch": 1.7269433547387623, "grad_norm": 2.125, "learning_rate": 3.7846199406633493e-06, "loss": 0.9655, "step": 8718 }, { "epoch": 1.7271432497938584, "grad_norm": 2.109375, "learning_rate": 3.783597772699285e-06, "loss": 0.9487, "step": 8719 }, { "epoch": 1.7273431448489545, "grad_norm": 2.265625, "learning_rate": 3.782575658767678e-06, "loss": 0.9771, "step": 8720 }, { "epoch": 1.7275430399040503, "grad_norm": 2.234375, "learning_rate": 3.7815535989139307e-06, "loss": 0.953, "step": 8721 }, { "epoch": 1.7277429349591464, "grad_norm": 2.203125, "learning_rate": 3.7805315931834413e-06, "loss": 0.9501, "step": 8722 }, { "epoch": 1.7279428300142425, "grad_norm": 2.1875, "learning_rate": 3.779509641621609e-06, "loss": 0.9922, "step": 8723 }, { "epoch": 1.7281427250693386, "grad_norm": 2.203125, "learning_rate": 3.7784877442738283e-06, "loss": 1.0136, "step": 8724 }, { "epoch": 1.7283426201244345, "grad_norm": 2.234375, "learning_rate": 3.777465901185491e-06, "loss": 0.9256, "step": 8725 }, { "epoch": 1.7285425151795306, "grad_norm": 2.140625, "learning_rate": 3.776444112401989e-06, "loss": 0.9601, "step": 8726 }, { "epoch": 1.7287424102346267, "grad_norm": 2.140625, "learning_rate": 3.775422377968707e-06, "loss": 0.9693, "step": 8727 }, { "epoch": 1.7289423052897228, "grad_norm": 2.125, "learning_rate": 3.7744006979310333e-06, "loss": 0.9509, "step": 8728 }, { "epoch": 1.729142200344819, "grad_norm": 2.140625, "learning_rate": 3.77337907233435e-06, "loss": 1.0105, "step": 8729 }, { "epoch": 1.729342095399915, "grad_norm": 2.078125, "learning_rate": 3.7723575012240364e-06, "loss": 1.0143, "step": 8730 }, { "epoch": 1.7295419904550111, "grad_norm": 2.109375, "learning_rate": 3.771335984645471e-06, "loss": 0.9855, "step": 8731 }, { "epoch": 1.7297418855101072, "grad_norm": 2.15625, "learning_rate": 3.770314522644032e-06, "loss": 0.9882, "step": 8732 }, { "epoch": 1.7299417805652033, "grad_norm": 2.1875, "learning_rate": 3.7692931152650893e-06, "loss": 1.0563, "step": 8733 }, { "epoch": 1.7301416756202994, "grad_norm": 2.1875, "learning_rate": 3.7682717625540144e-06, "loss": 1.0002, "step": 8734 }, { "epoch": 1.7303415706753955, "grad_norm": 2.109375, "learning_rate": 3.7672504645561773e-06, "loss": 0.9918, "step": 8735 }, { "epoch": 1.7305414657304916, "grad_norm": 2.125, "learning_rate": 3.766229221316942e-06, "loss": 0.9311, "step": 8736 }, { "epoch": 1.7307413607855877, "grad_norm": 2.125, "learning_rate": 3.7652080328816744e-06, "loss": 0.968, "step": 8737 }, { "epoch": 1.7309412558406836, "grad_norm": 2.0625, "learning_rate": 3.7641868992957326e-06, "loss": 0.9163, "step": 8738 }, { "epoch": 1.7311411508957797, "grad_norm": 2.15625, "learning_rate": 3.763165820604477e-06, "loss": 1.006, "step": 8739 }, { "epoch": 1.7313410459508758, "grad_norm": 2.234375, "learning_rate": 3.7621447968532644e-06, "loss": 0.9999, "step": 8740 }, { "epoch": 1.731540941005972, "grad_norm": 2.15625, "learning_rate": 3.7611238280874467e-06, "loss": 0.9689, "step": 8741 }, { "epoch": 1.731740836061068, "grad_norm": 2.125, "learning_rate": 3.7601029143523767e-06, "loss": 0.9182, "step": 8742 }, { "epoch": 1.7319407311161639, "grad_norm": 2.21875, "learning_rate": 3.759082055693403e-06, "loss": 0.9651, "step": 8743 }, { "epoch": 1.73214062617126, "grad_norm": 2.15625, "learning_rate": 3.7580612521558737e-06, "loss": 0.9859, "step": 8744 }, { "epoch": 1.732340521226356, "grad_norm": 2.109375, "learning_rate": 3.7570405037851287e-06, "loss": 0.9697, "step": 8745 }, { "epoch": 1.7325404162814522, "grad_norm": 2.078125, "learning_rate": 3.756019810626511e-06, "loss": 0.9336, "step": 8746 }, { "epoch": 1.7327403113365483, "grad_norm": 2.25, "learning_rate": 3.754999172725362e-06, "loss": 1.0636, "step": 8747 }, { "epoch": 1.7329402063916444, "grad_norm": 2.046875, "learning_rate": 3.7539785901270164e-06, "loss": 0.9048, "step": 8748 }, { "epoch": 1.7331401014467405, "grad_norm": 2.09375, "learning_rate": 3.752958062876809e-06, "loss": 0.9167, "step": 8749 }, { "epoch": 1.7333399965018366, "grad_norm": 2.109375, "learning_rate": 3.7519375910200706e-06, "loss": 0.9092, "step": 8750 }, { "epoch": 1.7335398915569327, "grad_norm": 2.171875, "learning_rate": 3.7509171746021305e-06, "loss": 0.9155, "step": 8751 }, { "epoch": 1.7337397866120288, "grad_norm": 2.171875, "learning_rate": 3.749896813668318e-06, "loss": 1.0269, "step": 8752 }, { "epoch": 1.7339396816671249, "grad_norm": 2.234375, "learning_rate": 3.748876508263954e-06, "loss": 1.039, "step": 8753 }, { "epoch": 1.734139576722221, "grad_norm": 2.125, "learning_rate": 3.7478562584343615e-06, "loss": 0.9549, "step": 8754 }, { "epoch": 1.734339471777317, "grad_norm": 2.25, "learning_rate": 3.746836064224862e-06, "loss": 1.0041, "step": 8755 }, { "epoch": 1.734539366832413, "grad_norm": 2.28125, "learning_rate": 3.745815925680769e-06, "loss": 1.0523, "step": 8756 }, { "epoch": 1.734739261887509, "grad_norm": 2.21875, "learning_rate": 3.7447958428474006e-06, "loss": 1.0006, "step": 8757 }, { "epoch": 1.7349391569426051, "grad_norm": 2.4375, "learning_rate": 3.743775815770065e-06, "loss": 1.0765, "step": 8758 }, { "epoch": 1.7351390519977012, "grad_norm": 2.15625, "learning_rate": 3.7427558444940738e-06, "loss": 0.9442, "step": 8759 }, { "epoch": 1.7353389470527971, "grad_norm": 2.21875, "learning_rate": 3.741735929064735e-06, "loss": 0.9692, "step": 8760 }, { "epoch": 1.7355388421078932, "grad_norm": 2.09375, "learning_rate": 3.74071606952735e-06, "loss": 0.9094, "step": 8761 }, { "epoch": 1.7357387371629893, "grad_norm": 2.09375, "learning_rate": 3.739696265927223e-06, "loss": 0.9134, "step": 8762 }, { "epoch": 1.7359386322180854, "grad_norm": 2.125, "learning_rate": 3.7386765183096545e-06, "loss": 1.0662, "step": 8763 }, { "epoch": 1.7361385272731815, "grad_norm": 2.359375, "learning_rate": 3.737656826719939e-06, "loss": 0.9534, "step": 8764 }, { "epoch": 1.7363384223282776, "grad_norm": 2.1875, "learning_rate": 3.7366371912033737e-06, "loss": 0.9429, "step": 8765 }, { "epoch": 1.7365383173833737, "grad_norm": 2.125, "learning_rate": 3.735617611805249e-06, "loss": 0.9426, "step": 8766 }, { "epoch": 1.7367382124384698, "grad_norm": 2.15625, "learning_rate": 3.7345980885708545e-06, "loss": 0.9913, "step": 8767 }, { "epoch": 1.736938107493566, "grad_norm": 2.015625, "learning_rate": 3.733578621545478e-06, "loss": 0.8853, "step": 8768 }, { "epoch": 1.737138002548662, "grad_norm": 2.125, "learning_rate": 3.7325592107744034e-06, "loss": 0.9621, "step": 8769 }, { "epoch": 1.7373378976037581, "grad_norm": 2.0625, "learning_rate": 3.7315398563029137e-06, "loss": 0.9646, "step": 8770 }, { "epoch": 1.7375377926588542, "grad_norm": 2.109375, "learning_rate": 3.7305205581762895e-06, "loss": 0.9537, "step": 8771 }, { "epoch": 1.7377376877139503, "grad_norm": 2.203125, "learning_rate": 3.7295013164398042e-06, "loss": 0.9745, "step": 8772 }, { "epoch": 1.7379375827690462, "grad_norm": 2.171875, "learning_rate": 3.7284821311387356e-06, "loss": 0.901, "step": 8773 }, { "epoch": 1.7381374778241423, "grad_norm": 2.28125, "learning_rate": 3.7274630023183554e-06, "loss": 1.0101, "step": 8774 }, { "epoch": 1.7383373728792384, "grad_norm": 2.171875, "learning_rate": 3.726443930023934e-06, "loss": 1.0043, "step": 8775 }, { "epoch": 1.7385372679343345, "grad_norm": 2.21875, "learning_rate": 3.7254249143007356e-06, "loss": 1.0406, "step": 8776 }, { "epoch": 1.7387371629894306, "grad_norm": 2.171875, "learning_rate": 3.724405955194027e-06, "loss": 0.9588, "step": 8777 }, { "epoch": 1.7389370580445265, "grad_norm": 2.109375, "learning_rate": 3.7233870527490683e-06, "loss": 0.8205, "step": 8778 }, { "epoch": 1.7391369530996226, "grad_norm": 2.078125, "learning_rate": 3.7223682070111212e-06, "loss": 0.9412, "step": 8779 }, { "epoch": 1.7393368481547187, "grad_norm": 2.140625, "learning_rate": 3.7213494180254417e-06, "loss": 1.0123, "step": 8780 }, { "epoch": 1.7395367432098148, "grad_norm": 2.265625, "learning_rate": 3.7203306858372845e-06, "loss": 0.9726, "step": 8781 }, { "epoch": 1.7397366382649109, "grad_norm": 2.265625, "learning_rate": 3.719312010491901e-06, "loss": 0.9801, "step": 8782 }, { "epoch": 1.739936533320007, "grad_norm": 2.03125, "learning_rate": 3.7182933920345426e-06, "loss": 0.9375, "step": 8783 }, { "epoch": 1.740136428375103, "grad_norm": 2.125, "learning_rate": 3.7172748305104537e-06, "loss": 0.9314, "step": 8784 }, { "epoch": 1.7403363234301992, "grad_norm": 2.140625, "learning_rate": 3.71625632596488e-06, "loss": 0.9438, "step": 8785 }, { "epoch": 1.7405362184852953, "grad_norm": 2.171875, "learning_rate": 3.7152378784430643e-06, "loss": 0.9976, "step": 8786 }, { "epoch": 1.7407361135403914, "grad_norm": 2.21875, "learning_rate": 3.7142194879902434e-06, "loss": 0.9725, "step": 8787 }, { "epoch": 1.7409360085954875, "grad_norm": 2.0, "learning_rate": 3.7132011546516568e-06, "loss": 0.8509, "step": 8788 }, { "epoch": 1.7411359036505836, "grad_norm": 2.3125, "learning_rate": 3.7121828784725357e-06, "loss": 0.9757, "step": 8789 }, { "epoch": 1.7413357987056797, "grad_norm": 2.296875, "learning_rate": 3.711164659498114e-06, "loss": 0.9933, "step": 8790 }, { "epoch": 1.7415356937607755, "grad_norm": 2.03125, "learning_rate": 3.710146497773622e-06, "loss": 0.9986, "step": 8791 }, { "epoch": 1.7417355888158716, "grad_norm": 2.109375, "learning_rate": 3.7091283933442835e-06, "loss": 0.9548, "step": 8792 }, { "epoch": 1.7419354838709677, "grad_norm": 2.109375, "learning_rate": 3.7081103462553234e-06, "loss": 0.9078, "step": 8793 }, { "epoch": 1.7421353789260638, "grad_norm": 2.078125, "learning_rate": 3.7070923565519655e-06, "loss": 0.8792, "step": 8794 }, { "epoch": 1.7423352739811597, "grad_norm": 2.21875, "learning_rate": 3.706074424279426e-06, "loss": 1.042, "step": 8795 }, { "epoch": 1.7425351690362558, "grad_norm": 2.109375, "learning_rate": 3.7050565494829233e-06, "loss": 0.9285, "step": 8796 }, { "epoch": 1.742735064091352, "grad_norm": 2.265625, "learning_rate": 3.704038732207669e-06, "loss": 0.9947, "step": 8797 }, { "epoch": 1.742934959146448, "grad_norm": 2.171875, "learning_rate": 3.7030209724988764e-06, "loss": 0.9517, "step": 8798 }, { "epoch": 1.7431348542015441, "grad_norm": 2.15625, "learning_rate": 3.7020032704017537e-06, "loss": 1.017, "step": 8799 }, { "epoch": 1.7433347492566402, "grad_norm": 2.15625, "learning_rate": 3.7009856259615074e-06, "loss": 0.9797, "step": 8800 }, { "epoch": 1.7435346443117363, "grad_norm": 2.140625, "learning_rate": 3.6999680392233397e-06, "loss": 0.9665, "step": 8801 }, { "epoch": 1.7437345393668324, "grad_norm": 2.171875, "learning_rate": 3.698950510232454e-06, "loss": 0.9752, "step": 8802 }, { "epoch": 1.7439344344219285, "grad_norm": 2.109375, "learning_rate": 3.6979330390340472e-06, "loss": 0.8478, "step": 8803 }, { "epoch": 1.7441343294770246, "grad_norm": 2.125, "learning_rate": 3.696915625673317e-06, "loss": 0.899, "step": 8804 }, { "epoch": 1.7443342245321207, "grad_norm": 2.078125, "learning_rate": 3.6958982701954536e-06, "loss": 0.8745, "step": 8805 }, { "epoch": 1.7445341195872168, "grad_norm": 2.125, "learning_rate": 3.6948809726456525e-06, "loss": 0.9022, "step": 8806 }, { "epoch": 1.744734014642313, "grad_norm": 2.140625, "learning_rate": 3.6938637330690967e-06, "loss": 0.9966, "step": 8807 }, { "epoch": 1.744933909697409, "grad_norm": 2.09375, "learning_rate": 3.6928465515109756e-06, "loss": 0.9684, "step": 8808 }, { "epoch": 1.745133804752505, "grad_norm": 2.21875, "learning_rate": 3.69182942801647e-06, "loss": 0.9814, "step": 8809 }, { "epoch": 1.745333699807601, "grad_norm": 2.1875, "learning_rate": 3.6908123626307618e-06, "loss": 1.0867, "step": 8810 }, { "epoch": 1.745533594862697, "grad_norm": 2.125, "learning_rate": 3.6897953553990296e-06, "loss": 0.9418, "step": 8811 }, { "epoch": 1.7457334899177932, "grad_norm": 2.1875, "learning_rate": 3.6887784063664467e-06, "loss": 0.9365, "step": 8812 }, { "epoch": 1.745933384972889, "grad_norm": 2.0625, "learning_rate": 3.6877615155781863e-06, "loss": 0.9061, "step": 8813 }, { "epoch": 1.7461332800279852, "grad_norm": 2.28125, "learning_rate": 3.6867446830794205e-06, "loss": 0.9976, "step": 8814 }, { "epoch": 1.7463331750830813, "grad_norm": 2.21875, "learning_rate": 3.6857279089153154e-06, "loss": 1.0071, "step": 8815 }, { "epoch": 1.7465330701381774, "grad_norm": 2.15625, "learning_rate": 3.6847111931310363e-06, "loss": 1.0236, "step": 8816 }, { "epoch": 1.7467329651932735, "grad_norm": 2.109375, "learning_rate": 3.683694535771745e-06, "loss": 0.9984, "step": 8817 }, { "epoch": 1.7469328602483696, "grad_norm": 2.03125, "learning_rate": 3.6826779368826015e-06, "loss": 0.8575, "step": 8818 }, { "epoch": 1.7471327553034657, "grad_norm": 2.09375, "learning_rate": 3.681661396508765e-06, "loss": 0.9144, "step": 8819 }, { "epoch": 1.7473326503585618, "grad_norm": 2.203125, "learning_rate": 3.680644914695387e-06, "loss": 0.9785, "step": 8820 }, { "epoch": 1.7475325454136579, "grad_norm": 2.171875, "learning_rate": 3.679628491487621e-06, "loss": 0.9525, "step": 8821 }, { "epoch": 1.747732440468754, "grad_norm": 2.125, "learning_rate": 3.6786121269306173e-06, "loss": 1.0481, "step": 8822 }, { "epoch": 1.74793233552385, "grad_norm": 2.15625, "learning_rate": 3.677595821069521e-06, "loss": 0.9572, "step": 8823 }, { "epoch": 1.7481322305789462, "grad_norm": 2.09375, "learning_rate": 3.676579573949477e-06, "loss": 0.9119, "step": 8824 }, { "epoch": 1.7483321256340423, "grad_norm": 2.203125, "learning_rate": 3.6755633856156283e-06, "loss": 1.0921, "step": 8825 }, { "epoch": 1.7485320206891382, "grad_norm": 2.21875, "learning_rate": 3.6745472561131107e-06, "loss": 0.9407, "step": 8826 }, { "epoch": 1.7487319157442343, "grad_norm": 2.09375, "learning_rate": 3.673531185487064e-06, "loss": 1.0492, "step": 8827 }, { "epoch": 1.7489318107993304, "grad_norm": 2.09375, "learning_rate": 3.6725151737826193e-06, "loss": 0.914, "step": 8828 }, { "epoch": 1.7491317058544265, "grad_norm": 2.21875, "learning_rate": 3.6714992210449084e-06, "loss": 1.0461, "step": 8829 }, { "epoch": 1.7493316009095226, "grad_norm": 2.171875, "learning_rate": 3.670483327319062e-06, "loss": 0.965, "step": 8830 }, { "epoch": 1.7495314959646184, "grad_norm": 2.328125, "learning_rate": 3.669467492650203e-06, "loss": 1.0768, "step": 8831 }, { "epoch": 1.7497313910197145, "grad_norm": 2.1875, "learning_rate": 3.6684517170834554e-06, "loss": 0.9636, "step": 8832 }, { "epoch": 1.7499312860748106, "grad_norm": 2.203125, "learning_rate": 3.667436000663942e-06, "loss": 0.9212, "step": 8833 }, { "epoch": 1.7501311811299067, "grad_norm": 2.1875, "learning_rate": 3.666420343436777e-06, "loss": 0.9206, "step": 8834 }, { "epoch": 1.7503310761850028, "grad_norm": 2.234375, "learning_rate": 3.66540474544708e-06, "loss": 0.8463, "step": 8835 }, { "epoch": 1.750530971240099, "grad_norm": 2.15625, "learning_rate": 3.6643892067399604e-06, "loss": 0.9841, "step": 8836 }, { "epoch": 1.750730866295195, "grad_norm": 2.171875, "learning_rate": 3.663373727360532e-06, "loss": 0.9349, "step": 8837 }, { "epoch": 1.7509307613502911, "grad_norm": 2.15625, "learning_rate": 3.662358307353897e-06, "loss": 0.9983, "step": 8838 }, { "epoch": 1.7511306564053872, "grad_norm": 2.125, "learning_rate": 3.6613429467651644e-06, "loss": 0.8888, "step": 8839 }, { "epoch": 1.7513305514604833, "grad_norm": 2.03125, "learning_rate": 3.6603276456394342e-06, "loss": 0.8516, "step": 8840 }, { "epoch": 1.7515304465155794, "grad_norm": 2.109375, "learning_rate": 3.659312404021807e-06, "loss": 0.9168, "step": 8841 }, { "epoch": 1.7517303415706755, "grad_norm": 2.21875, "learning_rate": 3.6582972219573808e-06, "loss": 1.1102, "step": 8842 }, { "epoch": 1.7519302366257716, "grad_norm": 2.296875, "learning_rate": 3.6572820994912474e-06, "loss": 0.922, "step": 8843 }, { "epoch": 1.7521301316808675, "grad_norm": 2.046875, "learning_rate": 3.6562670366684995e-06, "loss": 0.9361, "step": 8844 }, { "epoch": 1.7523300267359636, "grad_norm": 2.171875, "learning_rate": 3.655252033534228e-06, "loss": 1.0295, "step": 8845 }, { "epoch": 1.7525299217910597, "grad_norm": 2.203125, "learning_rate": 3.6542370901335163e-06, "loss": 0.9455, "step": 8846 }, { "epoch": 1.7527298168461558, "grad_norm": 2.15625, "learning_rate": 3.65322220651145e-06, "loss": 0.9995, "step": 8847 }, { "epoch": 1.7529297119012517, "grad_norm": 2.28125, "learning_rate": 3.652207382713109e-06, "loss": 1.013, "step": 8848 }, { "epoch": 1.7531296069563478, "grad_norm": 2.140625, "learning_rate": 3.651192618783571e-06, "loss": 0.9757, "step": 8849 }, { "epoch": 1.7533295020114439, "grad_norm": 2.15625, "learning_rate": 3.650177914767915e-06, "loss": 0.9842, "step": 8850 }, { "epoch": 1.75352939706654, "grad_norm": 2.234375, "learning_rate": 3.64916327071121e-06, "loss": 0.9903, "step": 8851 }, { "epoch": 1.753729292121636, "grad_norm": 2.234375, "learning_rate": 3.6481486866585292e-06, "loss": 1.0488, "step": 8852 }, { "epoch": 1.7539291871767322, "grad_norm": 2.21875, "learning_rate": 3.6471341626549397e-06, "loss": 0.9775, "step": 8853 }, { "epoch": 1.7541290822318283, "grad_norm": 2.078125, "learning_rate": 3.646119698745505e-06, "loss": 0.943, "step": 8854 }, { "epoch": 1.7543289772869244, "grad_norm": 2.28125, "learning_rate": 3.6451052949752902e-06, "loss": 1.0549, "step": 8855 }, { "epoch": 1.7545288723420205, "grad_norm": 2.265625, "learning_rate": 3.6440909513893517e-06, "loss": 0.9492, "step": 8856 }, { "epoch": 1.7547287673971166, "grad_norm": 2.09375, "learning_rate": 3.643076668032749e-06, "loss": 0.96, "step": 8857 }, { "epoch": 1.7549286624522127, "grad_norm": 2.125, "learning_rate": 3.642062444950537e-06, "loss": 0.9833, "step": 8858 }, { "epoch": 1.7551285575073088, "grad_norm": 2.171875, "learning_rate": 3.6410482821877647e-06, "loss": 0.9437, "step": 8859 }, { "epoch": 1.7553284525624049, "grad_norm": 2.09375, "learning_rate": 3.640034179789483e-06, "loss": 0.9675, "step": 8860 }, { "epoch": 1.7555283476175008, "grad_norm": 2.15625, "learning_rate": 3.6390201378007383e-06, "loss": 0.9612, "step": 8861 }, { "epoch": 1.7557282426725969, "grad_norm": 2.328125, "learning_rate": 3.638006156266573e-06, "loss": 0.94, "step": 8862 }, { "epoch": 1.755928137727693, "grad_norm": 2.078125, "learning_rate": 3.6369922352320285e-06, "loss": 0.969, "step": 8863 }, { "epoch": 1.756128032782789, "grad_norm": 2.0625, "learning_rate": 3.635978374742145e-06, "loss": 0.8889, "step": 8864 }, { "epoch": 1.7563279278378852, "grad_norm": 2.0625, "learning_rate": 3.634964574841955e-06, "loss": 0.8754, "step": 8865 }, { "epoch": 1.756527822892981, "grad_norm": 2.15625, "learning_rate": 3.6339508355764935e-06, "loss": 0.9049, "step": 8866 }, { "epoch": 1.7567277179480771, "grad_norm": 2.15625, "learning_rate": 3.632937156990789e-06, "loss": 0.8882, "step": 8867 }, { "epoch": 1.7569276130031732, "grad_norm": 2.15625, "learning_rate": 3.6319235391298724e-06, "loss": 0.9551, "step": 8868 }, { "epoch": 1.7571275080582693, "grad_norm": 2.171875, "learning_rate": 3.630909982038764e-06, "loss": 1.0042, "step": 8869 }, { "epoch": 1.7573274031133654, "grad_norm": 2.34375, "learning_rate": 3.6298964857624885e-06, "loss": 1.1014, "step": 8870 }, { "epoch": 1.7575272981684615, "grad_norm": 2.171875, "learning_rate": 3.628883050346065e-06, "loss": 0.8785, "step": 8871 }, { "epoch": 1.7577271932235576, "grad_norm": 2.1875, "learning_rate": 3.627869675834509e-06, "loss": 0.992, "step": 8872 }, { "epoch": 1.7579270882786537, "grad_norm": 2.140625, "learning_rate": 3.626856362272836e-06, "loss": 0.9159, "step": 8873 }, { "epoch": 1.7581269833337498, "grad_norm": 2.171875, "learning_rate": 3.6258431097060567e-06, "loss": 1.1005, "step": 8874 }, { "epoch": 1.758326878388846, "grad_norm": 2.1875, "learning_rate": 3.624829918179179e-06, "loss": 0.9464, "step": 8875 }, { "epoch": 1.758526773443942, "grad_norm": 2.171875, "learning_rate": 3.623816787737211e-06, "loss": 0.9043, "step": 8876 }, { "epoch": 1.7587266684990381, "grad_norm": 2.09375, "learning_rate": 3.622803718425153e-06, "loss": 0.9667, "step": 8877 }, { "epoch": 1.7589265635541342, "grad_norm": 2.203125, "learning_rate": 3.6217907102880075e-06, "loss": 0.9796, "step": 8878 }, { "epoch": 1.7591264586092301, "grad_norm": 2.25, "learning_rate": 3.620777763370771e-06, "loss": 0.9393, "step": 8879 }, { "epoch": 1.7593263536643262, "grad_norm": 2.296875, "learning_rate": 3.619764877718438e-06, "loss": 1.1024, "step": 8880 }, { "epoch": 1.7595262487194223, "grad_norm": 2.28125, "learning_rate": 3.6187520533760034e-06, "loss": 0.9499, "step": 8881 }, { "epoch": 1.7597261437745184, "grad_norm": 2.203125, "learning_rate": 3.6177392903884545e-06, "loss": 1.0295, "step": 8882 }, { "epoch": 1.7599260388296143, "grad_norm": 2.109375, "learning_rate": 3.616726588800778e-06, "loss": 0.9496, "step": 8883 }, { "epoch": 1.7601259338847104, "grad_norm": 2.015625, "learning_rate": 3.61571394865796e-06, "loss": 0.9193, "step": 8884 }, { "epoch": 1.7603258289398065, "grad_norm": 2.171875, "learning_rate": 3.6147013700049793e-06, "loss": 0.974, "step": 8885 }, { "epoch": 1.7605257239949026, "grad_norm": 2.015625, "learning_rate": 3.6136888528868177e-06, "loss": 0.9011, "step": 8886 }, { "epoch": 1.7607256190499987, "grad_norm": 2.09375, "learning_rate": 3.612676397348447e-06, "loss": 0.9091, "step": 8887 }, { "epoch": 1.7609255141050948, "grad_norm": 2.03125, "learning_rate": 3.6116640034348426e-06, "loss": 0.9124, "step": 8888 }, { "epoch": 1.761125409160191, "grad_norm": 2.046875, "learning_rate": 3.610651671190977e-06, "loss": 0.9292, "step": 8889 }, { "epoch": 1.761325304215287, "grad_norm": 2.0625, "learning_rate": 3.609639400661814e-06, "loss": 0.9587, "step": 8890 }, { "epoch": 1.761525199270383, "grad_norm": 2.203125, "learning_rate": 3.60862719189232e-06, "loss": 0.9473, "step": 8891 }, { "epoch": 1.7617250943254792, "grad_norm": 2.1875, "learning_rate": 3.6076150449274595e-06, "loss": 0.9856, "step": 8892 }, { "epoch": 1.7619249893805753, "grad_norm": 2.265625, "learning_rate": 3.6066029598121883e-06, "loss": 0.9389, "step": 8893 }, { "epoch": 1.7621248844356714, "grad_norm": 2.21875, "learning_rate": 3.605590936591466e-06, "loss": 0.9567, "step": 8894 }, { "epoch": 1.7623247794907675, "grad_norm": 2.140625, "learning_rate": 3.6045789753102443e-06, "loss": 0.9552, "step": 8895 }, { "epoch": 1.7625246745458634, "grad_norm": 2.25, "learning_rate": 3.6035670760134756e-06, "loss": 1.0126, "step": 8896 }, { "epoch": 1.7627245696009595, "grad_norm": 2.203125, "learning_rate": 3.6025552387461086e-06, "loss": 0.9932, "step": 8897 }, { "epoch": 1.7629244646560556, "grad_norm": 2.09375, "learning_rate": 3.6015434635530888e-06, "loss": 1.0051, "step": 8898 }, { "epoch": 1.7631243597111517, "grad_norm": 2.15625, "learning_rate": 3.6005317504793604e-06, "loss": 0.9816, "step": 8899 }, { "epoch": 1.7633242547662478, "grad_norm": 2.125, "learning_rate": 3.59952009956986e-06, "loss": 0.9812, "step": 8900 }, { "epoch": 1.7635241498213436, "grad_norm": 2.34375, "learning_rate": 3.5985085108695285e-06, "loss": 0.975, "step": 8901 }, { "epoch": 1.7637240448764397, "grad_norm": 2.234375, "learning_rate": 3.5974969844232974e-06, "loss": 0.9464, "step": 8902 }, { "epoch": 1.7639239399315358, "grad_norm": 2.1875, "learning_rate": 3.5964855202761016e-06, "loss": 0.939, "step": 8903 }, { "epoch": 1.764123834986632, "grad_norm": 2.078125, "learning_rate": 3.5954741184728698e-06, "loss": 0.8953, "step": 8904 }, { "epoch": 1.764323730041728, "grad_norm": 2.3125, "learning_rate": 3.5944627790585264e-06, "loss": 1.086, "step": 8905 }, { "epoch": 1.7645236250968241, "grad_norm": 2.046875, "learning_rate": 3.5934515020779974e-06, "loss": 0.931, "step": 8906 }, { "epoch": 1.7647235201519202, "grad_norm": 2.21875, "learning_rate": 3.592440287576201e-06, "loss": 0.9439, "step": 8907 }, { "epoch": 1.7649234152070163, "grad_norm": 2.140625, "learning_rate": 3.5914291355980573e-06, "loss": 0.9707, "step": 8908 }, { "epoch": 1.7651233102621124, "grad_norm": 2.203125, "learning_rate": 3.5904180461884818e-06, "loss": 0.9281, "step": 8909 }, { "epoch": 1.7653232053172085, "grad_norm": 2.109375, "learning_rate": 3.589407019392385e-06, "loss": 0.9643, "step": 8910 }, { "epoch": 1.7655231003723046, "grad_norm": 2.125, "learning_rate": 3.5883960552546783e-06, "loss": 0.9312, "step": 8911 }, { "epoch": 1.7657229954274007, "grad_norm": 2.078125, "learning_rate": 3.587385153820269e-06, "loss": 0.9281, "step": 8912 }, { "epoch": 1.7659228904824968, "grad_norm": 2.09375, "learning_rate": 3.586374315134058e-06, "loss": 1.0413, "step": 8913 }, { "epoch": 1.7661227855375927, "grad_norm": 2.203125, "learning_rate": 3.5853635392409504e-06, "loss": 0.9381, "step": 8914 }, { "epoch": 1.7663226805926888, "grad_norm": 2.078125, "learning_rate": 3.5843528261858434e-06, "loss": 0.9366, "step": 8915 }, { "epoch": 1.766522575647785, "grad_norm": 2.171875, "learning_rate": 3.5833421760136323e-06, "loss": 0.9485, "step": 8916 }, { "epoch": 1.766722470702881, "grad_norm": 2.21875, "learning_rate": 3.582331588769211e-06, "loss": 0.9681, "step": 8917 }, { "epoch": 1.766922365757977, "grad_norm": 2.109375, "learning_rate": 3.5813210644974685e-06, "loss": 0.8463, "step": 8918 }, { "epoch": 1.767122260813073, "grad_norm": 2.046875, "learning_rate": 3.5803106032432923e-06, "loss": 0.9746, "step": 8919 }, { "epoch": 1.767322155868169, "grad_norm": 2.125, "learning_rate": 3.5793002050515686e-06, "loss": 0.8872, "step": 8920 }, { "epoch": 1.7675220509232652, "grad_norm": 2.125, "learning_rate": 3.578289869967177e-06, "loss": 0.8397, "step": 8921 }, { "epoch": 1.7677219459783613, "grad_norm": 2.0625, "learning_rate": 3.5772795980349976e-06, "loss": 0.993, "step": 8922 }, { "epoch": 1.7679218410334574, "grad_norm": 2.0625, "learning_rate": 3.576269389299908e-06, "loss": 0.9071, "step": 8923 }, { "epoch": 1.7681217360885535, "grad_norm": 2.28125, "learning_rate": 3.5752592438067785e-06, "loss": 1.0297, "step": 8924 }, { "epoch": 1.7683216311436496, "grad_norm": 2.140625, "learning_rate": 3.5742491616004828e-06, "loss": 0.9557, "step": 8925 }, { "epoch": 1.7685215261987457, "grad_norm": 2.125, "learning_rate": 3.573239142725885e-06, "loss": 0.9459, "step": 8926 }, { "epoch": 1.7687214212538418, "grad_norm": 2.078125, "learning_rate": 3.572229187227853e-06, "loss": 1.0032, "step": 8927 }, { "epoch": 1.768921316308938, "grad_norm": 2.125, "learning_rate": 3.571219295151249e-06, "loss": 1.0218, "step": 8928 }, { "epoch": 1.769121211364034, "grad_norm": 2.203125, "learning_rate": 3.57020946654093e-06, "loss": 1.0085, "step": 8929 }, { "epoch": 1.76932110641913, "grad_norm": 2.296875, "learning_rate": 3.5691997014417558e-06, "loss": 1.0393, "step": 8930 }, { "epoch": 1.7695210014742262, "grad_norm": 2.171875, "learning_rate": 3.568189999898576e-06, "loss": 0.9354, "step": 8931 }, { "epoch": 1.769720896529322, "grad_norm": 2.1875, "learning_rate": 3.567180361956245e-06, "loss": 0.9892, "step": 8932 }, { "epoch": 1.7699207915844182, "grad_norm": 2.203125, "learning_rate": 3.5661707876596073e-06, "loss": 0.9455, "step": 8933 }, { "epoch": 1.7701206866395143, "grad_norm": 2.1875, "learning_rate": 3.565161277053511e-06, "loss": 0.9616, "step": 8934 }, { "epoch": 1.7703205816946104, "grad_norm": 2.078125, "learning_rate": 3.5641518301827983e-06, "loss": 0.9205, "step": 8935 }, { "epoch": 1.7705204767497063, "grad_norm": 2.1875, "learning_rate": 3.563142447092307e-06, "loss": 1.0233, "step": 8936 }, { "epoch": 1.7707203718048024, "grad_norm": 2.3125, "learning_rate": 3.5621331278268754e-06, "loss": 1.0387, "step": 8937 }, { "epoch": 1.7709202668598985, "grad_norm": 2.09375, "learning_rate": 3.5611238724313357e-06, "loss": 0.9357, "step": 8938 }, { "epoch": 1.7711201619149946, "grad_norm": 2.078125, "learning_rate": 3.5601146809505204e-06, "loss": 0.8826, "step": 8939 }, { "epoch": 1.7713200569700907, "grad_norm": 2.25, "learning_rate": 3.5591055534292573e-06, "loss": 1.0714, "step": 8940 }, { "epoch": 1.7715199520251868, "grad_norm": 2.140625, "learning_rate": 3.5580964899123715e-06, "loss": 0.9932, "step": 8941 }, { "epoch": 1.7717198470802829, "grad_norm": 2.078125, "learning_rate": 3.557087490444685e-06, "loss": 0.8739, "step": 8942 }, { "epoch": 1.771919742135379, "grad_norm": 2.140625, "learning_rate": 3.55607855507102e-06, "loss": 1.0024, "step": 8943 }, { "epoch": 1.772119637190475, "grad_norm": 2.109375, "learning_rate": 3.55506968383619e-06, "loss": 0.9236, "step": 8944 }, { "epoch": 1.7723195322455711, "grad_norm": 2.09375, "learning_rate": 3.5540608767850106e-06, "loss": 0.985, "step": 8945 }, { "epoch": 1.7725194273006672, "grad_norm": 2.28125, "learning_rate": 3.5530521339622923e-06, "loss": 0.985, "step": 8946 }, { "epoch": 1.7727193223557633, "grad_norm": 2.328125, "learning_rate": 3.5520434554128437e-06, "loss": 1.0439, "step": 8947 }, { "epoch": 1.7729192174108594, "grad_norm": 2.03125, "learning_rate": 3.5510348411814705e-06, "loss": 1.0106, "step": 8948 }, { "epoch": 1.7731191124659553, "grad_norm": 2.0625, "learning_rate": 3.5500262913129745e-06, "loss": 0.9443, "step": 8949 }, { "epoch": 1.7733190075210514, "grad_norm": 2.09375, "learning_rate": 3.5490178058521553e-06, "loss": 0.9458, "step": 8950 }, { "epoch": 1.7735189025761475, "grad_norm": 2.140625, "learning_rate": 3.548009384843811e-06, "loss": 0.9217, "step": 8951 }, { "epoch": 1.7737187976312436, "grad_norm": 2.21875, "learning_rate": 3.547001028332735e-06, "loss": 0.9978, "step": 8952 }, { "epoch": 1.7739186926863397, "grad_norm": 2.28125, "learning_rate": 3.545992736363717e-06, "loss": 1.0665, "step": 8953 }, { "epoch": 1.7741185877414356, "grad_norm": 2.265625, "learning_rate": 3.544984508981548e-06, "loss": 0.9647, "step": 8954 }, { "epoch": 1.7743184827965317, "grad_norm": 2.0625, "learning_rate": 3.5439763462310107e-06, "loss": 0.8975, "step": 8955 }, { "epoch": 1.7745183778516278, "grad_norm": 2.171875, "learning_rate": 3.5429682481568894e-06, "loss": 1.015, "step": 8956 }, { "epoch": 1.774718272906724, "grad_norm": 2.15625, "learning_rate": 3.5419602148039618e-06, "loss": 0.8829, "step": 8957 }, { "epoch": 1.77491816796182, "grad_norm": 2.21875, "learning_rate": 3.5409522462170054e-06, "loss": 0.9894, "step": 8958 }, { "epoch": 1.775118063016916, "grad_norm": 2.34375, "learning_rate": 3.539944342440796e-06, "loss": 1.0176, "step": 8959 }, { "epoch": 1.7753179580720122, "grad_norm": 2.40625, "learning_rate": 3.5389365035201016e-06, "loss": 0.9431, "step": 8960 }, { "epoch": 1.7755178531271083, "grad_norm": 2.25, "learning_rate": 3.537928729499694e-06, "loss": 0.9652, "step": 8961 }, { "epoch": 1.7757177481822044, "grad_norm": 2.140625, "learning_rate": 3.536921020424334e-06, "loss": 0.8999, "step": 8962 }, { "epoch": 1.7759176432373005, "grad_norm": 2.21875, "learning_rate": 3.5359133763387866e-06, "loss": 1.0306, "step": 8963 }, { "epoch": 1.7761175382923966, "grad_norm": 2.34375, "learning_rate": 3.53490579728781e-06, "loss": 1.0175, "step": 8964 }, { "epoch": 1.7763174333474927, "grad_norm": 2.171875, "learning_rate": 3.533898283316162e-06, "loss": 0.9064, "step": 8965 }, { "epoch": 1.7765173284025888, "grad_norm": 2.109375, "learning_rate": 3.5328908344685952e-06, "loss": 0.9434, "step": 8966 }, { "epoch": 1.7767172234576847, "grad_norm": 2.234375, "learning_rate": 3.5318834507898607e-06, "loss": 0.9599, "step": 8967 }, { "epoch": 1.7769171185127808, "grad_norm": 2.109375, "learning_rate": 3.5308761323247077e-06, "loss": 0.9761, "step": 8968 }, { "epoch": 1.7771170135678769, "grad_norm": 2.109375, "learning_rate": 3.5298688791178788e-06, "loss": 0.9672, "step": 8969 }, { "epoch": 1.777316908622973, "grad_norm": 2.171875, "learning_rate": 3.528861691214117e-06, "loss": 0.9342, "step": 8970 }, { "epoch": 1.7775168036780689, "grad_norm": 2.171875, "learning_rate": 3.5278545686581633e-06, "loss": 1.0538, "step": 8971 }, { "epoch": 1.777716698733165, "grad_norm": 2.078125, "learning_rate": 3.526847511494751e-06, "loss": 0.9025, "step": 8972 }, { "epoch": 1.777916593788261, "grad_norm": 2.0625, "learning_rate": 3.5258405197686154e-06, "loss": 0.9172, "step": 8973 }, { "epoch": 1.7781164888433572, "grad_norm": 2.109375, "learning_rate": 3.524833593524487e-06, "loss": 0.8952, "step": 8974 }, { "epoch": 1.7783163838984533, "grad_norm": 2.21875, "learning_rate": 3.523826732807092e-06, "loss": 0.9515, "step": 8975 }, { "epoch": 1.7785162789535494, "grad_norm": 2.109375, "learning_rate": 3.5228199376611564e-06, "loss": 0.923, "step": 8976 }, { "epoch": 1.7787161740086455, "grad_norm": 2.15625, "learning_rate": 3.521813208131401e-06, "loss": 0.9622, "step": 8977 }, { "epoch": 1.7789160690637416, "grad_norm": 2.359375, "learning_rate": 3.520806544262545e-06, "loss": 1.0126, "step": 8978 }, { "epoch": 1.7791159641188377, "grad_norm": 2.34375, "learning_rate": 3.519799946099305e-06, "loss": 0.8961, "step": 8979 }, { "epoch": 1.7793158591739338, "grad_norm": 2.125, "learning_rate": 3.518793413686392e-06, "loss": 1.0055, "step": 8980 }, { "epoch": 1.7795157542290299, "grad_norm": 2.34375, "learning_rate": 3.5177869470685175e-06, "loss": 0.9555, "step": 8981 }, { "epoch": 1.779715649284126, "grad_norm": 2.25, "learning_rate": 3.5167805462903903e-06, "loss": 0.9801, "step": 8982 }, { "epoch": 1.779915544339222, "grad_norm": 2.15625, "learning_rate": 3.5157742113967113e-06, "loss": 0.9423, "step": 8983 }, { "epoch": 1.780115439394318, "grad_norm": 2.109375, "learning_rate": 3.5147679424321848e-06, "loss": 0.9504, "step": 8984 }, { "epoch": 1.780315334449414, "grad_norm": 2.1875, "learning_rate": 3.5137617394415057e-06, "loss": 1.0039, "step": 8985 }, { "epoch": 1.7805152295045101, "grad_norm": 2.09375, "learning_rate": 3.512755602469372e-06, "loss": 1.0008, "step": 8986 }, { "epoch": 1.7807151245596062, "grad_norm": 2.109375, "learning_rate": 3.5117495315604766e-06, "loss": 0.9502, "step": 8987 }, { "epoch": 1.7809150196147023, "grad_norm": 2.125, "learning_rate": 3.5107435267595067e-06, "loss": 0.9923, "step": 8988 }, { "epoch": 1.7811149146697982, "grad_norm": 2.296875, "learning_rate": 3.5097375881111506e-06, "loss": 1.0216, "step": 8989 }, { "epoch": 1.7813148097248943, "grad_norm": 2.15625, "learning_rate": 3.508731715660093e-06, "loss": 0.9487, "step": 8990 }, { "epoch": 1.7815147047799904, "grad_norm": 2.21875, "learning_rate": 3.5077259094510118e-06, "loss": 1.0448, "step": 8991 }, { "epoch": 1.7817145998350865, "grad_norm": 2.1875, "learning_rate": 3.5067201695285884e-06, "loss": 0.9856, "step": 8992 }, { "epoch": 1.7819144948901826, "grad_norm": 2.296875, "learning_rate": 3.5057144959374934e-06, "loss": 1.033, "step": 8993 }, { "epoch": 1.7821143899452787, "grad_norm": 2.15625, "learning_rate": 3.5047088887224024e-06, "loss": 0.9622, "step": 8994 }, { "epoch": 1.7823142850003748, "grad_norm": 1.9765625, "learning_rate": 3.5037033479279813e-06, "loss": 0.8222, "step": 8995 }, { "epoch": 1.782514180055471, "grad_norm": 2.140625, "learning_rate": 3.502697873598899e-06, "loss": 0.9553, "step": 8996 }, { "epoch": 1.782714075110567, "grad_norm": 2.109375, "learning_rate": 3.5016924657798166e-06, "loss": 0.9852, "step": 8997 }, { "epoch": 1.782913970165663, "grad_norm": 2.078125, "learning_rate": 3.5006871245153947e-06, "loss": 0.9359, "step": 8998 }, { "epoch": 1.7831138652207592, "grad_norm": 2.171875, "learning_rate": 3.499681849850291e-06, "loss": 0.9947, "step": 8999 }, { "epoch": 1.7833137602758553, "grad_norm": 2.140625, "learning_rate": 3.4986766418291584e-06, "loss": 1.0271, "step": 9000 }, { "epoch": 1.7833137602758553, "eval_loss": 0.897596001625061, "eval_runtime": 593.4535, "eval_samples_per_second": 3.603, "eval_steps_per_second": 3.603, "step": 9000 }, { "epoch": 1.7835136553309514, "grad_norm": 2.25, "learning_rate": 3.4976715004966492e-06, "loss": 1.1224, "step": 9001 }, { "epoch": 1.7837135503860473, "grad_norm": 2.140625, "learning_rate": 3.4966664258974128e-06, "loss": 0.8661, "step": 9002 }, { "epoch": 1.7839134454411434, "grad_norm": 2.0625, "learning_rate": 3.4956614180760918e-06, "loss": 0.8782, "step": 9003 }, { "epoch": 1.7841133404962395, "grad_norm": 2.1875, "learning_rate": 3.4946564770773305e-06, "loss": 0.9052, "step": 9004 }, { "epoch": 1.7843132355513356, "grad_norm": 2.296875, "learning_rate": 3.493651602945768e-06, "loss": 0.9965, "step": 9005 }, { "epoch": 1.7845131306064315, "grad_norm": 2.125, "learning_rate": 3.49264679572604e-06, "loss": 0.9936, "step": 9006 }, { "epoch": 1.7847130256615276, "grad_norm": 2.15625, "learning_rate": 3.4916420554627806e-06, "loss": 0.9557, "step": 9007 }, { "epoch": 1.7849129207166237, "grad_norm": 2.21875, "learning_rate": 3.49063738220062e-06, "loss": 0.8943, "step": 9008 }, { "epoch": 1.7851128157717198, "grad_norm": 2.15625, "learning_rate": 3.4896327759841854e-06, "loss": 0.9469, "step": 9009 }, { "epoch": 1.7853127108268159, "grad_norm": 2.234375, "learning_rate": 3.4886282368581014e-06, "loss": 1.0564, "step": 9010 }, { "epoch": 1.785512605881912, "grad_norm": 2.1875, "learning_rate": 3.4876237648669896e-06, "loss": 0.9653, "step": 9011 }, { "epoch": 1.785712500937008, "grad_norm": 2.375, "learning_rate": 3.486619360055469e-06, "loss": 1.0446, "step": 9012 }, { "epoch": 1.7859123959921042, "grad_norm": 2.265625, "learning_rate": 3.4856150224681557e-06, "loss": 0.9555, "step": 9013 }, { "epoch": 1.7861122910472003, "grad_norm": 2.265625, "learning_rate": 3.48461075214966e-06, "loss": 0.9931, "step": 9014 }, { "epoch": 1.7863121861022964, "grad_norm": 2.296875, "learning_rate": 3.4836065491445935e-06, "loss": 1.013, "step": 9015 }, { "epoch": 1.7865120811573925, "grad_norm": 2.15625, "learning_rate": 3.482602413497562e-06, "loss": 0.9331, "step": 9016 }, { "epoch": 1.7867119762124886, "grad_norm": 2.171875, "learning_rate": 3.481598345253169e-06, "loss": 1.0055, "step": 9017 }, { "epoch": 1.7869118712675847, "grad_norm": 2.28125, "learning_rate": 3.480594344456016e-06, "loss": 1.0414, "step": 9018 }, { "epoch": 1.7871117663226805, "grad_norm": 2.34375, "learning_rate": 3.4795904111506988e-06, "loss": 1.0796, "step": 9019 }, { "epoch": 1.7873116613777766, "grad_norm": 2.1875, "learning_rate": 3.4785865453818135e-06, "loss": 0.9932, "step": 9020 }, { "epoch": 1.7875115564328727, "grad_norm": 2.125, "learning_rate": 3.477582747193953e-06, "loss": 1.0241, "step": 9021 }, { "epoch": 1.7877114514879688, "grad_norm": 2.3125, "learning_rate": 3.4765790166317036e-06, "loss": 0.9822, "step": 9022 }, { "epoch": 1.787911346543065, "grad_norm": 2.203125, "learning_rate": 3.475575353739651e-06, "loss": 0.9923, "step": 9023 }, { "epoch": 1.7881112415981608, "grad_norm": 2.125, "learning_rate": 3.4745717585623778e-06, "loss": 0.9738, "step": 9024 }, { "epoch": 1.788311136653257, "grad_norm": 2.1875, "learning_rate": 3.473568231144465e-06, "loss": 1.0057, "step": 9025 }, { "epoch": 1.788511031708353, "grad_norm": 2.0625, "learning_rate": 3.4725647715304876e-06, "loss": 0.9126, "step": 9026 }, { "epoch": 1.7887109267634491, "grad_norm": 2.125, "learning_rate": 3.4715613797650216e-06, "loss": 0.9076, "step": 9027 }, { "epoch": 1.7889108218185452, "grad_norm": 2.078125, "learning_rate": 3.4705580558926337e-06, "loss": 0.9161, "step": 9028 }, { "epoch": 1.7891107168736413, "grad_norm": 2.21875, "learning_rate": 3.4695547999578946e-06, "loss": 0.9221, "step": 9029 }, { "epoch": 1.7893106119287374, "grad_norm": 2.15625, "learning_rate": 3.468551612005368e-06, "loss": 0.9742, "step": 9030 }, { "epoch": 1.7895105069838335, "grad_norm": 2.265625, "learning_rate": 3.4675484920796143e-06, "loss": 1.0193, "step": 9031 }, { "epoch": 1.7897104020389296, "grad_norm": 2.09375, "learning_rate": 3.4665454402251937e-06, "loss": 0.9693, "step": 9032 }, { "epoch": 1.7899102970940257, "grad_norm": 2.1875, "learning_rate": 3.4655424564866615e-06, "loss": 0.9386, "step": 9033 }, { "epoch": 1.7901101921491218, "grad_norm": 2.1875, "learning_rate": 3.464539540908568e-06, "loss": 1.0275, "step": 9034 }, { "epoch": 1.790310087204218, "grad_norm": 2.09375, "learning_rate": 3.4635366935354654e-06, "loss": 0.9418, "step": 9035 }, { "epoch": 1.790509982259314, "grad_norm": 2.21875, "learning_rate": 3.462533914411898e-06, "loss": 0.9292, "step": 9036 }, { "epoch": 1.79070987731441, "grad_norm": 2.0625, "learning_rate": 3.4615312035824093e-06, "loss": 0.928, "step": 9037 }, { "epoch": 1.790909772369506, "grad_norm": 2.140625, "learning_rate": 3.4605285610915417e-06, "loss": 0.9658, "step": 9038 }, { "epoch": 1.791109667424602, "grad_norm": 2.0625, "learning_rate": 3.4595259869838306e-06, "loss": 0.9019, "step": 9039 }, { "epoch": 1.7913095624796982, "grad_norm": 2.109375, "learning_rate": 3.4585234813038095e-06, "loss": 1.0131, "step": 9040 }, { "epoch": 1.791509457534794, "grad_norm": 2.15625, "learning_rate": 3.457521044096012e-06, "loss": 0.9699, "step": 9041 }, { "epoch": 1.7917093525898902, "grad_norm": 2.125, "learning_rate": 3.4565186754049647e-06, "loss": 1.0297, "step": 9042 }, { "epoch": 1.7919092476449863, "grad_norm": 2.171875, "learning_rate": 3.455516375275192e-06, "loss": 0.9788, "step": 9043 }, { "epoch": 1.7921091427000824, "grad_norm": 2.234375, "learning_rate": 3.4545141437512186e-06, "loss": 0.9358, "step": 9044 }, { "epoch": 1.7923090377551785, "grad_norm": 2.328125, "learning_rate": 3.4535119808775608e-06, "loss": 1.0411, "step": 9045 }, { "epoch": 1.7925089328102746, "grad_norm": 2.25, "learning_rate": 3.4525098866987366e-06, "loss": 1.0401, "step": 9046 }, { "epoch": 1.7927088278653707, "grad_norm": 2.203125, "learning_rate": 3.451507861259257e-06, "loss": 1.0765, "step": 9047 }, { "epoch": 1.7929087229204668, "grad_norm": 2.0, "learning_rate": 3.4505059046036326e-06, "loss": 0.9126, "step": 9048 }, { "epoch": 1.7931086179755629, "grad_norm": 2.3125, "learning_rate": 3.4495040167763717e-06, "loss": 0.8841, "step": 9049 }, { "epoch": 1.793308513030659, "grad_norm": 2.203125, "learning_rate": 3.4485021978219755e-06, "loss": 0.9835, "step": 9050 }, { "epoch": 1.793508408085755, "grad_norm": 2.078125, "learning_rate": 3.447500447784946e-06, "loss": 1.0108, "step": 9051 }, { "epoch": 1.7937083031408512, "grad_norm": 2.234375, "learning_rate": 3.4464987667097818e-06, "loss": 1.069, "step": 9052 }, { "epoch": 1.7939081981959473, "grad_norm": 2.125, "learning_rate": 3.4454971546409777e-06, "loss": 0.9084, "step": 9053 }, { "epoch": 1.7941080932510434, "grad_norm": 2.140625, "learning_rate": 3.4444956116230216e-06, "loss": 0.9625, "step": 9054 }, { "epoch": 1.7943079883061392, "grad_norm": 2.1875, "learning_rate": 3.443494137700405e-06, "loss": 0.9626, "step": 9055 }, { "epoch": 1.7945078833612353, "grad_norm": 2.28125, "learning_rate": 3.4424927329176128e-06, "loss": 1.0615, "step": 9056 }, { "epoch": 1.7947077784163314, "grad_norm": 2.09375, "learning_rate": 3.441491397319126e-06, "loss": 0.9225, "step": 9057 }, { "epoch": 1.7949076734714275, "grad_norm": 2.03125, "learning_rate": 3.4404901309494264e-06, "loss": 0.9529, "step": 9058 }, { "epoch": 1.7951075685265234, "grad_norm": 2.25, "learning_rate": 3.4394889338529874e-06, "loss": 1.0643, "step": 9059 }, { "epoch": 1.7953074635816195, "grad_norm": 2.125, "learning_rate": 3.4384878060742833e-06, "loss": 0.9179, "step": 9060 }, { "epoch": 1.7955073586367156, "grad_norm": 2.171875, "learning_rate": 3.437486747657785e-06, "loss": 0.9156, "step": 9061 }, { "epoch": 1.7957072536918117, "grad_norm": 2.125, "learning_rate": 3.4364857586479576e-06, "loss": 0.9051, "step": 9062 }, { "epoch": 1.7959071487469078, "grad_norm": 2.109375, "learning_rate": 3.435484839089266e-06, "loss": 0.99, "step": 9063 }, { "epoch": 1.796107043802004, "grad_norm": 2.3125, "learning_rate": 3.4344839890261718e-06, "loss": 0.9345, "step": 9064 }, { "epoch": 1.7963069388571, "grad_norm": 2.0625, "learning_rate": 3.43348320850313e-06, "loss": 0.8536, "step": 9065 }, { "epoch": 1.7965068339121961, "grad_norm": 2.1875, "learning_rate": 3.4324824975645984e-06, "loss": 0.8766, "step": 9066 }, { "epoch": 1.7967067289672922, "grad_norm": 2.109375, "learning_rate": 3.4314818562550254e-06, "loss": 1.0165, "step": 9067 }, { "epoch": 1.7969066240223883, "grad_norm": 2.171875, "learning_rate": 3.430481284618861e-06, "loss": 0.8893, "step": 9068 }, { "epoch": 1.7971065190774844, "grad_norm": 2.171875, "learning_rate": 3.429480782700551e-06, "loss": 1.0252, "step": 9069 }, { "epoch": 1.7973064141325805, "grad_norm": 2.25, "learning_rate": 3.4284803505445363e-06, "loss": 1.0497, "step": 9070 }, { "epoch": 1.7975063091876766, "grad_norm": 2.140625, "learning_rate": 3.4274799881952564e-06, "loss": 0.9632, "step": 9071 }, { "epoch": 1.7977062042427725, "grad_norm": 2.140625, "learning_rate": 3.4264796956971486e-06, "loss": 0.979, "step": 9072 }, { "epoch": 1.7979060992978686, "grad_norm": 2.140625, "learning_rate": 3.425479473094644e-06, "loss": 1.0513, "step": 9073 }, { "epoch": 1.7981059943529647, "grad_norm": 2.03125, "learning_rate": 3.4244793204321727e-06, "loss": 0.9315, "step": 9074 }, { "epoch": 1.7983058894080608, "grad_norm": 2.140625, "learning_rate": 3.423479237754162e-06, "loss": 0.9543, "step": 9075 }, { "epoch": 1.798505784463157, "grad_norm": 2.234375, "learning_rate": 3.422479225105035e-06, "loss": 1.0306, "step": 9076 }, { "epoch": 1.7987056795182528, "grad_norm": 2.125, "learning_rate": 3.4214792825292133e-06, "loss": 0.9779, "step": 9077 }, { "epoch": 1.7989055745733489, "grad_norm": 2.171875, "learning_rate": 3.420479410071112e-06, "loss": 1.0356, "step": 9078 }, { "epoch": 1.799105469628445, "grad_norm": 2.09375, "learning_rate": 3.419479607775147e-06, "loss": 0.9675, "step": 9079 }, { "epoch": 1.799305364683541, "grad_norm": 2.109375, "learning_rate": 3.4184798756857294e-06, "loss": 0.9125, "step": 9080 }, { "epoch": 1.7995052597386372, "grad_norm": 2.203125, "learning_rate": 3.4174802138472663e-06, "loss": 1.0142, "step": 9081 }, { "epoch": 1.7997051547937333, "grad_norm": 2.125, "learning_rate": 3.416480622304163e-06, "loss": 0.9753, "step": 9082 }, { "epoch": 1.7999050498488294, "grad_norm": 2.296875, "learning_rate": 3.4154811011008224e-06, "loss": 1.0182, "step": 9083 }, { "epoch": 1.8001049449039255, "grad_norm": 2.171875, "learning_rate": 3.414481650281643e-06, "loss": 0.9982, "step": 9084 }, { "epoch": 1.8003048399590216, "grad_norm": 2.515625, "learning_rate": 3.4134822698910176e-06, "loss": 0.9966, "step": 9085 }, { "epoch": 1.8005047350141177, "grad_norm": 2.21875, "learning_rate": 3.4124829599733404e-06, "loss": 0.9943, "step": 9086 }, { "epoch": 1.8007046300692138, "grad_norm": 2.078125, "learning_rate": 3.411483720573e-06, "loss": 0.9798, "step": 9087 }, { "epoch": 1.8009045251243099, "grad_norm": 2.078125, "learning_rate": 3.4104845517343837e-06, "loss": 0.8583, "step": 9088 }, { "epoch": 1.801104420179406, "grad_norm": 2.171875, "learning_rate": 3.4094854535018744e-06, "loss": 1.0153, "step": 9089 }, { "epoch": 1.8013043152345019, "grad_norm": 2.109375, "learning_rate": 3.40848642591985e-06, "loss": 0.9333, "step": 9090 }, { "epoch": 1.801504210289598, "grad_norm": 2.1875, "learning_rate": 3.4074874690326897e-06, "loss": 1.0667, "step": 9091 }, { "epoch": 1.801704105344694, "grad_norm": 2.078125, "learning_rate": 3.406488582884766e-06, "loss": 0.87, "step": 9092 }, { "epoch": 1.8019040003997902, "grad_norm": 2.25, "learning_rate": 3.4054897675204485e-06, "loss": 1.0195, "step": 9093 }, { "epoch": 1.802103895454886, "grad_norm": 2.109375, "learning_rate": 3.404491022984105e-06, "loss": 0.9611, "step": 9094 }, { "epoch": 1.8023037905099821, "grad_norm": 2.09375, "learning_rate": 3.403492349320101e-06, "loss": 1.0533, "step": 9095 }, { "epoch": 1.8025036855650782, "grad_norm": 2.25, "learning_rate": 3.402493746572796e-06, "loss": 1.0336, "step": 9096 }, { "epoch": 1.8027035806201743, "grad_norm": 2.171875, "learning_rate": 3.401495214786548e-06, "loss": 0.9208, "step": 9097 }, { "epoch": 1.8029034756752704, "grad_norm": 2.1875, "learning_rate": 3.4004967540057117e-06, "loss": 0.965, "step": 9098 }, { "epoch": 1.8031033707303665, "grad_norm": 2.0625, "learning_rate": 3.3994983642746384e-06, "loss": 0.9449, "step": 9099 }, { "epoch": 1.8033032657854626, "grad_norm": 2.21875, "learning_rate": 3.3985000456376775e-06, "loss": 0.9817, "step": 9100 }, { "epoch": 1.8035031608405587, "grad_norm": 2.15625, "learning_rate": 3.3975017981391735e-06, "loss": 0.9605, "step": 9101 }, { "epoch": 1.8037030558956548, "grad_norm": 2.078125, "learning_rate": 3.396503621823467e-06, "loss": 0.9228, "step": 9102 }, { "epoch": 1.803902950950751, "grad_norm": 2.25, "learning_rate": 3.3955055167349006e-06, "loss": 1.0007, "step": 9103 }, { "epoch": 1.804102846005847, "grad_norm": 2.25, "learning_rate": 3.394507482917806e-06, "loss": 0.9854, "step": 9104 }, { "epoch": 1.8043027410609431, "grad_norm": 2.125, "learning_rate": 3.393509520416519e-06, "loss": 1.02, "step": 9105 }, { "epoch": 1.8045026361160392, "grad_norm": 2.21875, "learning_rate": 3.392511629275367e-06, "loss": 0.8621, "step": 9106 }, { "epoch": 1.804702531171135, "grad_norm": 2.234375, "learning_rate": 3.3915138095386758e-06, "loss": 0.9195, "step": 9107 }, { "epoch": 1.8049024262262312, "grad_norm": 2.234375, "learning_rate": 3.390516061250771e-06, "loss": 0.9501, "step": 9108 }, { "epoch": 1.8051023212813273, "grad_norm": 2.25, "learning_rate": 3.38951838445597e-06, "loss": 0.9412, "step": 9109 }, { "epoch": 1.8053022163364234, "grad_norm": 2.109375, "learning_rate": 3.3885207791985898e-06, "loss": 1.0443, "step": 9110 }, { "epoch": 1.8055021113915195, "grad_norm": 2.15625, "learning_rate": 3.3875232455229455e-06, "loss": 0.9549, "step": 9111 }, { "epoch": 1.8057020064466154, "grad_norm": 2.328125, "learning_rate": 3.3865257834733454e-06, "loss": 0.9883, "step": 9112 }, { "epoch": 1.8059019015017115, "grad_norm": 2.0625, "learning_rate": 3.385528393094098e-06, "loss": 0.9083, "step": 9113 }, { "epoch": 1.8061017965568076, "grad_norm": 2.078125, "learning_rate": 3.3845310744295078e-06, "loss": 0.9786, "step": 9114 }, { "epoch": 1.8063016916119037, "grad_norm": 2.15625, "learning_rate": 3.383533827523876e-06, "loss": 0.9428, "step": 9115 }, { "epoch": 1.8065015866669998, "grad_norm": 2.171875, "learning_rate": 3.3825366524214965e-06, "loss": 0.9264, "step": 9116 }, { "epoch": 1.8067014817220959, "grad_norm": 2.125, "learning_rate": 3.3815395491666676e-06, "loss": 0.9085, "step": 9117 }, { "epoch": 1.806901376777192, "grad_norm": 2.203125, "learning_rate": 3.3805425178036776e-06, "loss": 1.0277, "step": 9118 }, { "epoch": 1.807101271832288, "grad_norm": 2.078125, "learning_rate": 3.379545558376816e-06, "loss": 0.9798, "step": 9119 }, { "epoch": 1.8073011668873842, "grad_norm": 2.1875, "learning_rate": 3.378548670930369e-06, "loss": 0.9551, "step": 9120 }, { "epoch": 1.8075010619424803, "grad_norm": 2.453125, "learning_rate": 3.3775518555086158e-06, "loss": 1.0912, "step": 9121 }, { "epoch": 1.8077009569975764, "grad_norm": 2.046875, "learning_rate": 3.376555112155836e-06, "loss": 0.9048, "step": 9122 }, { "epoch": 1.8079008520526725, "grad_norm": 2.125, "learning_rate": 3.3755584409163058e-06, "loss": 0.937, "step": 9123 }, { "epoch": 1.8081007471077686, "grad_norm": 2.21875, "learning_rate": 3.3745618418342942e-06, "loss": 0.9743, "step": 9124 }, { "epoch": 1.8083006421628645, "grad_norm": 2.03125, "learning_rate": 3.3735653149540737e-06, "loss": 0.9448, "step": 9125 }, { "epoch": 1.8085005372179606, "grad_norm": 2.109375, "learning_rate": 3.372568860319907e-06, "loss": 1.0084, "step": 9126 }, { "epoch": 1.8087004322730567, "grad_norm": 2.125, "learning_rate": 3.3715724779760586e-06, "loss": 0.9859, "step": 9127 }, { "epoch": 1.8089003273281528, "grad_norm": 2.203125, "learning_rate": 3.3705761679667865e-06, "loss": 0.9299, "step": 9128 }, { "epoch": 1.8091002223832486, "grad_norm": 2.109375, "learning_rate": 3.3695799303363463e-06, "loss": 0.9784, "step": 9129 }, { "epoch": 1.8093001174383447, "grad_norm": 2.3125, "learning_rate": 3.3685837651289922e-06, "loss": 1.0141, "step": 9130 }, { "epoch": 1.8095000124934408, "grad_norm": 2.234375, "learning_rate": 3.3675876723889735e-06, "loss": 1.0483, "step": 9131 }, { "epoch": 1.809699907548537, "grad_norm": 2.171875, "learning_rate": 3.3665916521605346e-06, "loss": 0.8994, "step": 9132 }, { "epoch": 1.809899802603633, "grad_norm": 2.09375, "learning_rate": 3.3655957044879207e-06, "loss": 0.9049, "step": 9133 }, { "epoch": 1.8100996976587291, "grad_norm": 2.21875, "learning_rate": 3.364599829415372e-06, "loss": 0.9387, "step": 9134 }, { "epoch": 1.8102995927138252, "grad_norm": 2.1875, "learning_rate": 3.3636040269871227e-06, "loss": 0.9692, "step": 9135 }, { "epoch": 1.8104994877689213, "grad_norm": 2.125, "learning_rate": 3.3626082972474096e-06, "loss": 0.9207, "step": 9136 }, { "epoch": 1.8106993828240174, "grad_norm": 2.078125, "learning_rate": 3.3616126402404594e-06, "loss": 0.9487, "step": 9137 }, { "epoch": 1.8108992778791135, "grad_norm": 2.140625, "learning_rate": 3.360617056010501e-06, "loss": 0.9734, "step": 9138 }, { "epoch": 1.8110991729342096, "grad_norm": 2.125, "learning_rate": 3.3596215446017587e-06, "loss": 1.07, "step": 9139 }, { "epoch": 1.8112990679893057, "grad_norm": 2.1875, "learning_rate": 3.358626106058451e-06, "loss": 1.0255, "step": 9140 }, { "epoch": 1.8114989630444018, "grad_norm": 2.125, "learning_rate": 3.357630740424797e-06, "loss": 0.9221, "step": 9141 }, { "epoch": 1.8116988580994977, "grad_norm": 2.140625, "learning_rate": 3.356635447745011e-06, "loss": 0.9733, "step": 9142 }, { "epoch": 1.8118987531545938, "grad_norm": 2.421875, "learning_rate": 3.3556402280633017e-06, "loss": 1.0517, "step": 9143 }, { "epoch": 1.81209864820969, "grad_norm": 2.1875, "learning_rate": 3.3546450814238786e-06, "loss": 1.0527, "step": 9144 }, { "epoch": 1.812298543264786, "grad_norm": 2.1875, "learning_rate": 3.3536500078709445e-06, "loss": 0.9925, "step": 9145 }, { "epoch": 1.812498438319882, "grad_norm": 2.125, "learning_rate": 3.352655007448703e-06, "loss": 0.9813, "step": 9146 }, { "epoch": 1.812698333374978, "grad_norm": 2.171875, "learning_rate": 3.3516600802013487e-06, "loss": 1.0226, "step": 9147 }, { "epoch": 1.812898228430074, "grad_norm": 2.03125, "learning_rate": 3.350665226173078e-06, "loss": 0.9334, "step": 9148 }, { "epoch": 1.8130981234851702, "grad_norm": 2.203125, "learning_rate": 3.3496704454080807e-06, "loss": 0.9609, "step": 9149 }, { "epoch": 1.8132980185402663, "grad_norm": 2.078125, "learning_rate": 3.3486757379505465e-06, "loss": 0.8925, "step": 9150 }, { "epoch": 1.8134979135953624, "grad_norm": 2.1875, "learning_rate": 3.3476811038446603e-06, "loss": 1.0665, "step": 9151 }, { "epoch": 1.8136978086504585, "grad_norm": 2.375, "learning_rate": 3.3466865431346017e-06, "loss": 0.9094, "step": 9152 }, { "epoch": 1.8138977037055546, "grad_norm": 2.21875, "learning_rate": 3.345692055864551e-06, "loss": 0.9556, "step": 9153 }, { "epoch": 1.8140975987606507, "grad_norm": 2.15625, "learning_rate": 3.3446976420786835e-06, "loss": 0.9073, "step": 9154 }, { "epoch": 1.8142974938157468, "grad_norm": 2.265625, "learning_rate": 3.3437033018211682e-06, "loss": 0.9369, "step": 9155 }, { "epoch": 1.8144973888708429, "grad_norm": 2.1875, "learning_rate": 3.3427090351361767e-06, "loss": 0.9804, "step": 9156 }, { "epoch": 1.814697283925939, "grad_norm": 2.3125, "learning_rate": 3.3417148420678723e-06, "loss": 1.0827, "step": 9157 }, { "epoch": 1.814897178981035, "grad_norm": 2.15625, "learning_rate": 3.3407207226604164e-06, "loss": 0.8971, "step": 9158 }, { "epoch": 1.8150970740361312, "grad_norm": 2.140625, "learning_rate": 3.339726676957971e-06, "loss": 0.9962, "step": 9159 }, { "epoch": 1.815296969091227, "grad_norm": 2.1875, "learning_rate": 3.338732705004688e-06, "loss": 0.9177, "step": 9160 }, { "epoch": 1.8154968641463232, "grad_norm": 2.0625, "learning_rate": 3.3377388068447203e-06, "loss": 1.0041, "step": 9161 }, { "epoch": 1.8156967592014193, "grad_norm": 2.171875, "learning_rate": 3.3367449825222188e-06, "loss": 1.0346, "step": 9162 }, { "epoch": 1.8158966542565154, "grad_norm": 2.28125, "learning_rate": 3.3357512320813258e-06, "loss": 0.9842, "step": 9163 }, { "epoch": 1.8160965493116112, "grad_norm": 2.125, "learning_rate": 3.334757555566186e-06, "loss": 0.9713, "step": 9164 }, { "epoch": 1.8162964443667073, "grad_norm": 2.078125, "learning_rate": 3.333763953020939e-06, "loss": 0.9551, "step": 9165 }, { "epoch": 1.8164963394218034, "grad_norm": 2.15625, "learning_rate": 3.3327704244897176e-06, "loss": 0.9508, "step": 9166 }, { "epoch": 1.8166962344768995, "grad_norm": 2.1875, "learning_rate": 3.331776970016657e-06, "loss": 1.0225, "step": 9167 }, { "epoch": 1.8168961295319956, "grad_norm": 2.203125, "learning_rate": 3.330783589645884e-06, "loss": 0.9797, "step": 9168 }, { "epoch": 1.8170960245870917, "grad_norm": 2.125, "learning_rate": 3.329790283421526e-06, "loss": 0.9614, "step": 9169 }, { "epoch": 1.8172959196421878, "grad_norm": 2.203125, "learning_rate": 3.3287970513877067e-06, "loss": 1.0246, "step": 9170 }, { "epoch": 1.817495814697284, "grad_norm": 2.109375, "learning_rate": 3.3278038935885415e-06, "loss": 0.9702, "step": 9171 }, { "epoch": 1.81769570975238, "grad_norm": 2.1875, "learning_rate": 3.3268108100681494e-06, "loss": 0.9394, "step": 9172 }, { "epoch": 1.8178956048074761, "grad_norm": 2.171875, "learning_rate": 3.325817800870644e-06, "loss": 0.906, "step": 9173 }, { "epoch": 1.8180954998625722, "grad_norm": 2.25, "learning_rate": 3.3248248660401317e-06, "loss": 1.0423, "step": 9174 }, { "epoch": 1.8182953949176683, "grad_norm": 2.0625, "learning_rate": 3.3238320056207208e-06, "loss": 0.8253, "step": 9175 }, { "epoch": 1.8184952899727644, "grad_norm": 2.234375, "learning_rate": 3.322839219656513e-06, "loss": 1.006, "step": 9176 }, { "epoch": 1.8186951850278605, "grad_norm": 2.328125, "learning_rate": 3.321846508191609e-06, "loss": 1.0534, "step": 9177 }, { "epoch": 1.8188950800829564, "grad_norm": 2.09375, "learning_rate": 3.320853871270102e-06, "loss": 0.9099, "step": 9178 }, { "epoch": 1.8190949751380525, "grad_norm": 2.0625, "learning_rate": 3.3198613089360875e-06, "loss": 0.9262, "step": 9179 }, { "epoch": 1.8192948701931486, "grad_norm": 2.25, "learning_rate": 3.318868821233654e-06, "loss": 0.9757, "step": 9180 }, { "epoch": 1.8194947652482447, "grad_norm": 2.203125, "learning_rate": 3.317876408206887e-06, "loss": 1.0132, "step": 9181 }, { "epoch": 1.8196946603033406, "grad_norm": 2.125, "learning_rate": 3.3168840698998722e-06, "loss": 0.905, "step": 9182 }, { "epoch": 1.8198945553584367, "grad_norm": 2.203125, "learning_rate": 3.315891806356686e-06, "loss": 1.0229, "step": 9183 }, { "epoch": 1.8200944504135328, "grad_norm": 2.09375, "learning_rate": 3.3148996176214054e-06, "loss": 0.9095, "step": 9184 }, { "epoch": 1.820294345468629, "grad_norm": 2.109375, "learning_rate": 3.3139075037381053e-06, "loss": 0.9197, "step": 9185 }, { "epoch": 1.820494240523725, "grad_norm": 2.171875, "learning_rate": 3.312915464750852e-06, "loss": 0.9501, "step": 9186 }, { "epoch": 1.820694135578821, "grad_norm": 2.03125, "learning_rate": 3.3119235007037155e-06, "loss": 0.959, "step": 9187 }, { "epoch": 1.8208940306339172, "grad_norm": 2.15625, "learning_rate": 3.3109316116407554e-06, "loss": 0.963, "step": 9188 }, { "epoch": 1.8210939256890133, "grad_norm": 2.125, "learning_rate": 3.309939797606033e-06, "loss": 0.9995, "step": 9189 }, { "epoch": 1.8212938207441094, "grad_norm": 2.125, "learning_rate": 3.308948058643605e-06, "loss": 0.9072, "step": 9190 }, { "epoch": 1.8214937157992055, "grad_norm": 2.25, "learning_rate": 3.3079563947975225e-06, "loss": 0.98, "step": 9191 }, { "epoch": 1.8216936108543016, "grad_norm": 2.171875, "learning_rate": 3.3069648061118366e-06, "loss": 1.0131, "step": 9192 }, { "epoch": 1.8218935059093977, "grad_norm": 2.03125, "learning_rate": 3.3059732926305943e-06, "loss": 0.889, "step": 9193 }, { "epoch": 1.8220934009644938, "grad_norm": 2.1875, "learning_rate": 3.3049818543978363e-06, "loss": 0.9544, "step": 9194 }, { "epoch": 1.8222932960195897, "grad_norm": 2.109375, "learning_rate": 3.3039904914576036e-06, "loss": 0.8702, "step": 9195 }, { "epoch": 1.8224931910746858, "grad_norm": 2.21875, "learning_rate": 3.3029992038539318e-06, "loss": 0.9753, "step": 9196 }, { "epoch": 1.8226930861297819, "grad_norm": 2.125, "learning_rate": 3.302007991630854e-06, "loss": 0.9726, "step": 9197 }, { "epoch": 1.822892981184878, "grad_norm": 2.203125, "learning_rate": 3.3010168548324006e-06, "loss": 1.0036, "step": 9198 }, { "epoch": 1.8230928762399738, "grad_norm": 2.171875, "learning_rate": 3.3000257935025963e-06, "loss": 0.9817, "step": 9199 }, { "epoch": 1.82329277129507, "grad_norm": 2.109375, "learning_rate": 3.299034807685465e-06, "loss": 0.9199, "step": 9200 }, { "epoch": 1.823492666350166, "grad_norm": 2.28125, "learning_rate": 3.2980438974250262e-06, "loss": 1.0242, "step": 9201 }, { "epoch": 1.8236925614052621, "grad_norm": 2.171875, "learning_rate": 3.297053062765295e-06, "loss": 0.9565, "step": 9202 }, { "epoch": 1.8238924564603582, "grad_norm": 2.140625, "learning_rate": 3.2960623037502847e-06, "loss": 0.9995, "step": 9203 }, { "epoch": 1.8240923515154543, "grad_norm": 2.28125, "learning_rate": 3.2950716204240065e-06, "loss": 1.005, "step": 9204 }, { "epoch": 1.8242922465705504, "grad_norm": 2.234375, "learning_rate": 3.2940810128304634e-06, "loss": 0.9792, "step": 9205 }, { "epoch": 1.8244921416256465, "grad_norm": 2.171875, "learning_rate": 3.293090481013661e-06, "loss": 0.9746, "step": 9206 }, { "epoch": 1.8246920366807426, "grad_norm": 2.140625, "learning_rate": 3.2921000250175948e-06, "loss": 1.0629, "step": 9207 }, { "epoch": 1.8248919317358387, "grad_norm": 2.171875, "learning_rate": 3.2911096448862666e-06, "loss": 0.9234, "step": 9208 }, { "epoch": 1.8250918267909348, "grad_norm": 2.296875, "learning_rate": 3.290119340663663e-06, "loss": 1.0217, "step": 9209 }, { "epoch": 1.825291721846031, "grad_norm": 2.15625, "learning_rate": 3.2891291123937764e-06, "loss": 0.9754, "step": 9210 }, { "epoch": 1.825491616901127, "grad_norm": 2.234375, "learning_rate": 3.2881389601205906e-06, "loss": 1.0121, "step": 9211 }, { "epoch": 1.8256915119562231, "grad_norm": 2.0625, "learning_rate": 3.28714888388809e-06, "loss": 0.9866, "step": 9212 }, { "epoch": 1.825891407011319, "grad_norm": 2.09375, "learning_rate": 3.2861588837402534e-06, "loss": 0.97, "step": 9213 }, { "epoch": 1.8260913020664151, "grad_norm": 2.171875, "learning_rate": 3.2851689597210555e-06, "loss": 0.9061, "step": 9214 }, { "epoch": 1.8262911971215112, "grad_norm": 2.21875, "learning_rate": 3.2841791118744704e-06, "loss": 1.002, "step": 9215 }, { "epoch": 1.8264910921766073, "grad_norm": 2.15625, "learning_rate": 3.2831893402444648e-06, "loss": 0.9203, "step": 9216 }, { "epoch": 1.8266909872317032, "grad_norm": 2.3125, "learning_rate": 3.2821996448750054e-06, "loss": 1.0905, "step": 9217 }, { "epoch": 1.8268908822867993, "grad_norm": 2.203125, "learning_rate": 3.2812100258100556e-06, "loss": 1.0289, "step": 9218 }, { "epoch": 1.8270907773418954, "grad_norm": 2.328125, "learning_rate": 3.280220483093571e-06, "loss": 0.9328, "step": 9219 }, { "epoch": 1.8272906723969915, "grad_norm": 2.15625, "learning_rate": 3.2792310167695097e-06, "loss": 0.9384, "step": 9220 }, { "epoch": 1.8274905674520876, "grad_norm": 2.359375, "learning_rate": 3.278241626881823e-06, "loss": 0.9116, "step": 9221 }, { "epoch": 1.8276904625071837, "grad_norm": 2.25, "learning_rate": 3.2772523134744592e-06, "loss": 0.918, "step": 9222 }, { "epoch": 1.8278903575622798, "grad_norm": 2.1875, "learning_rate": 3.2762630765913626e-06, "loss": 0.9216, "step": 9223 }, { "epoch": 1.828090252617376, "grad_norm": 2.171875, "learning_rate": 3.275273916276478e-06, "loss": 1.0595, "step": 9224 }, { "epoch": 1.828290147672472, "grad_norm": 2.234375, "learning_rate": 3.274284832573741e-06, "loss": 1.0279, "step": 9225 }, { "epoch": 1.828490042727568, "grad_norm": 2.140625, "learning_rate": 3.273295825527088e-06, "loss": 0.9786, "step": 9226 }, { "epoch": 1.8286899377826642, "grad_norm": 2.15625, "learning_rate": 3.2723068951804486e-06, "loss": 1.0637, "step": 9227 }, { "epoch": 1.8288898328377603, "grad_norm": 2.15625, "learning_rate": 3.2713180415777536e-06, "loss": 1.0626, "step": 9228 }, { "epoch": 1.8290897278928564, "grad_norm": 2.125, "learning_rate": 3.270329264762926e-06, "loss": 0.9125, "step": 9229 }, { "epoch": 1.8292896229479523, "grad_norm": 2.203125, "learning_rate": 3.2693405647798878e-06, "loss": 1.0265, "step": 9230 }, { "epoch": 1.8294895180030484, "grad_norm": 2.140625, "learning_rate": 3.2683519416725564e-06, "loss": 0.9802, "step": 9231 }, { "epoch": 1.8296894130581445, "grad_norm": 2.1875, "learning_rate": 3.267363395484848e-06, "loss": 0.9942, "step": 9232 }, { "epoch": 1.8298893081132406, "grad_norm": 2.03125, "learning_rate": 3.2663749262606715e-06, "loss": 0.965, "step": 9233 }, { "epoch": 1.8300892031683367, "grad_norm": 2.296875, "learning_rate": 3.2653865340439366e-06, "loss": 0.9769, "step": 9234 }, { "epoch": 1.8302890982234326, "grad_norm": 2.0625, "learning_rate": 3.2643982188785457e-06, "loss": 0.9969, "step": 9235 }, { "epoch": 1.8304889932785287, "grad_norm": 2.171875, "learning_rate": 3.2634099808084004e-06, "loss": 1.0316, "step": 9236 }, { "epoch": 1.8306888883336248, "grad_norm": 2.125, "learning_rate": 3.2624218198773994e-06, "loss": 0.9977, "step": 9237 }, { "epoch": 1.8308887833887209, "grad_norm": 2.40625, "learning_rate": 3.2614337361294345e-06, "loss": 0.9797, "step": 9238 }, { "epoch": 1.831088678443817, "grad_norm": 2.125, "learning_rate": 3.260445729608399e-06, "loss": 0.95, "step": 9239 }, { "epoch": 1.831288573498913, "grad_norm": 2.28125, "learning_rate": 3.259457800358177e-06, "loss": 1.0814, "step": 9240 }, { "epoch": 1.8314884685540092, "grad_norm": 2.25, "learning_rate": 3.2584699484226547e-06, "loss": 0.9625, "step": 9241 }, { "epoch": 1.8316883636091053, "grad_norm": 2.21875, "learning_rate": 3.2574821738457096e-06, "loss": 0.94, "step": 9242 }, { "epoch": 1.8318882586642014, "grad_norm": 2.015625, "learning_rate": 3.25649447667122e-06, "loss": 0.911, "step": 9243 }, { "epoch": 1.8320881537192975, "grad_norm": 2.15625, "learning_rate": 3.2555068569430614e-06, "loss": 0.9125, "step": 9244 }, { "epoch": 1.8322880487743936, "grad_norm": 2.375, "learning_rate": 3.2545193147051015e-06, "loss": 1.056, "step": 9245 }, { "epoch": 1.8324879438294897, "grad_norm": 2.28125, "learning_rate": 3.253531850001207e-06, "loss": 1.0154, "step": 9246 }, { "epoch": 1.8326878388845858, "grad_norm": 2.15625, "learning_rate": 3.25254446287524e-06, "loss": 1.0136, "step": 9247 }, { "epoch": 1.8328877339396816, "grad_norm": 2.234375, "learning_rate": 3.251557153371062e-06, "loss": 0.9848, "step": 9248 }, { "epoch": 1.8330876289947777, "grad_norm": 2.125, "learning_rate": 3.250569921532529e-06, "loss": 0.9669, "step": 9249 }, { "epoch": 1.8332875240498738, "grad_norm": 2.15625, "learning_rate": 3.249582767403493e-06, "loss": 1.0233, "step": 9250 }, { "epoch": 1.83348741910497, "grad_norm": 2.265625, "learning_rate": 3.2485956910278033e-06, "loss": 1.081, "step": 9251 }, { "epoch": 1.8336873141600658, "grad_norm": 2.265625, "learning_rate": 3.2476086924493067e-06, "loss": 1.0206, "step": 9252 }, { "epoch": 1.833887209215162, "grad_norm": 2.078125, "learning_rate": 3.2466217717118442e-06, "loss": 0.9491, "step": 9253 }, { "epoch": 1.834087104270258, "grad_norm": 2.15625, "learning_rate": 3.2456349288592547e-06, "loss": 0.947, "step": 9254 }, { "epoch": 1.834286999325354, "grad_norm": 2.171875, "learning_rate": 3.2446481639353757e-06, "loss": 0.9895, "step": 9255 }, { "epoch": 1.8344868943804502, "grad_norm": 2.125, "learning_rate": 3.2436614769840367e-06, "loss": 0.9144, "step": 9256 }, { "epoch": 1.8346867894355463, "grad_norm": 2.21875, "learning_rate": 3.2426748680490684e-06, "loss": 1.0338, "step": 9257 }, { "epoch": 1.8348866844906424, "grad_norm": 2.203125, "learning_rate": 3.2416883371742937e-06, "loss": 0.9259, "step": 9258 }, { "epoch": 1.8350865795457385, "grad_norm": 2.0625, "learning_rate": 3.2407018844035348e-06, "loss": 0.8412, "step": 9259 }, { "epoch": 1.8352864746008346, "grad_norm": 2.21875, "learning_rate": 3.239715509780612e-06, "loss": 0.964, "step": 9260 }, { "epoch": 1.8354863696559307, "grad_norm": 2.328125, "learning_rate": 3.238729213349337e-06, "loss": 1.0716, "step": 9261 }, { "epoch": 1.8356862647110268, "grad_norm": 2.1875, "learning_rate": 3.2377429951535223e-06, "loss": 1.0144, "step": 9262 }, { "epoch": 1.835886159766123, "grad_norm": 2.109375, "learning_rate": 3.2367568552369765e-06, "loss": 0.9675, "step": 9263 }, { "epoch": 1.836086054821219, "grad_norm": 2.109375, "learning_rate": 3.2357707936435013e-06, "loss": 0.9698, "step": 9264 }, { "epoch": 1.8362859498763149, "grad_norm": 2.125, "learning_rate": 3.2347848104169012e-06, "loss": 0.9298, "step": 9265 }, { "epoch": 1.836485844931411, "grad_norm": 2.140625, "learning_rate": 3.233798905600969e-06, "loss": 0.8792, "step": 9266 }, { "epoch": 1.836685739986507, "grad_norm": 2.15625, "learning_rate": 3.232813079239502e-06, "loss": 0.9292, "step": 9267 }, { "epoch": 1.8368856350416032, "grad_norm": 2.171875, "learning_rate": 3.231827331376289e-06, "loss": 0.8959, "step": 9268 }, { "epoch": 1.8370855300966993, "grad_norm": 2.15625, "learning_rate": 3.2308416620551175e-06, "loss": 1.0066, "step": 9269 }, { "epoch": 1.8372854251517952, "grad_norm": 2.125, "learning_rate": 3.2298560713197712e-06, "loss": 1.0907, "step": 9270 }, { "epoch": 1.8374853202068913, "grad_norm": 2.171875, "learning_rate": 3.228870559214028e-06, "loss": 1.0026, "step": 9271 }, { "epoch": 1.8376852152619874, "grad_norm": 2.125, "learning_rate": 3.2278851257816655e-06, "loss": 0.9451, "step": 9272 }, { "epoch": 1.8378851103170835, "grad_norm": 2.15625, "learning_rate": 3.226899771066456e-06, "loss": 0.9575, "step": 9273 }, { "epoch": 1.8380850053721796, "grad_norm": 2.203125, "learning_rate": 3.2259144951121697e-06, "loss": 0.9289, "step": 9274 }, { "epoch": 1.8382849004272757, "grad_norm": 2.1875, "learning_rate": 3.224929297962572e-06, "loss": 0.9889, "step": 9275 }, { "epoch": 1.8384847954823718, "grad_norm": 2.1875, "learning_rate": 3.2239441796614256e-06, "loss": 1.069, "step": 9276 }, { "epoch": 1.8386846905374679, "grad_norm": 2.296875, "learning_rate": 3.2229591402524894e-06, "loss": 1.095, "step": 9277 }, { "epoch": 1.838884585592564, "grad_norm": 2.21875, "learning_rate": 3.2219741797795175e-06, "loss": 1.061, "step": 9278 }, { "epoch": 1.83908448064766, "grad_norm": 2.203125, "learning_rate": 3.220989298286262e-06, "loss": 0.9144, "step": 9279 }, { "epoch": 1.8392843757027562, "grad_norm": 2.203125, "learning_rate": 3.220004495816474e-06, "loss": 0.9368, "step": 9280 }, { "epoch": 1.8394842707578523, "grad_norm": 2.1875, "learning_rate": 3.2190197724138943e-06, "loss": 0.9001, "step": 9281 }, { "epoch": 1.8396841658129484, "grad_norm": 2.265625, "learning_rate": 3.2180351281222665e-06, "loss": 1.0191, "step": 9282 }, { "epoch": 1.8398840608680442, "grad_norm": 2.109375, "learning_rate": 3.217050562985329e-06, "loss": 0.9633, "step": 9283 }, { "epoch": 1.8400839559231403, "grad_norm": 2.140625, "learning_rate": 3.216066077046814e-06, "loss": 1.0508, "step": 9284 }, { "epoch": 1.8402838509782364, "grad_norm": 2.1875, "learning_rate": 3.215081670350454e-06, "loss": 1.0296, "step": 9285 }, { "epoch": 1.8404837460333325, "grad_norm": 2.25, "learning_rate": 3.2140973429399747e-06, "loss": 1.0299, "step": 9286 }, { "epoch": 1.8406836410884284, "grad_norm": 2.1875, "learning_rate": 3.213113094859101e-06, "loss": 0.9849, "step": 9287 }, { "epoch": 1.8408835361435245, "grad_norm": 2.171875, "learning_rate": 3.2121289261515535e-06, "loss": 0.9708, "step": 9288 }, { "epoch": 1.8410834311986206, "grad_norm": 2.140625, "learning_rate": 3.2111448368610476e-06, "loss": 0.9623, "step": 9289 }, { "epoch": 1.8412833262537167, "grad_norm": 2.34375, "learning_rate": 3.2101608270312963e-06, "loss": 1.0313, "step": 9290 }, { "epoch": 1.8414832213088128, "grad_norm": 2.1875, "learning_rate": 3.209176896706011e-06, "loss": 0.9675, "step": 9291 }, { "epoch": 1.841683116363909, "grad_norm": 2.125, "learning_rate": 3.2081930459288963e-06, "loss": 1.0305, "step": 9292 }, { "epoch": 1.841883011419005, "grad_norm": 2.0625, "learning_rate": 3.2072092747436546e-06, "loss": 0.9266, "step": 9293 }, { "epoch": 1.8420829064741011, "grad_norm": 2.15625, "learning_rate": 3.2062255831939863e-06, "loss": 0.9272, "step": 9294 }, { "epoch": 1.8422828015291972, "grad_norm": 2.3125, "learning_rate": 3.2052419713235854e-06, "loss": 0.9379, "step": 9295 }, { "epoch": 1.8424826965842933, "grad_norm": 2.21875, "learning_rate": 3.2042584391761457e-06, "loss": 1.0046, "step": 9296 }, { "epoch": 1.8426825916393894, "grad_norm": 2.25, "learning_rate": 3.203274986795353e-06, "loss": 0.9611, "step": 9297 }, { "epoch": 1.8428824866944855, "grad_norm": 2.1875, "learning_rate": 3.2022916142248933e-06, "loss": 1.0321, "step": 9298 }, { "epoch": 1.8430823817495816, "grad_norm": 2.15625, "learning_rate": 3.2013083215084495e-06, "loss": 0.9563, "step": 9299 }, { "epoch": 1.8432822768046777, "grad_norm": 2.25, "learning_rate": 3.2003251086896963e-06, "loss": 1.0356, "step": 9300 }, { "epoch": 1.8434821718597736, "grad_norm": 2.21875, "learning_rate": 3.199341975812312e-06, "loss": 1.0392, "step": 9301 }, { "epoch": 1.8436820669148697, "grad_norm": 2.203125, "learning_rate": 3.198358922919963e-06, "loss": 1.0086, "step": 9302 }, { "epoch": 1.8438819619699658, "grad_norm": 2.125, "learning_rate": 3.197375950056319e-06, "loss": 0.9148, "step": 9303 }, { "epoch": 1.8440818570250619, "grad_norm": 2.0625, "learning_rate": 3.1963930572650414e-06, "loss": 0.8526, "step": 9304 }, { "epoch": 1.8442817520801578, "grad_norm": 2.171875, "learning_rate": 3.1954102445897923e-06, "loss": 0.9934, "step": 9305 }, { "epoch": 1.8444816471352539, "grad_norm": 2.1875, "learning_rate": 3.194427512074228e-06, "loss": 0.9847, "step": 9306 }, { "epoch": 1.84468154219035, "grad_norm": 2.109375, "learning_rate": 3.1934448597619997e-06, "loss": 0.9596, "step": 9307 }, { "epoch": 1.844881437245446, "grad_norm": 2.3125, "learning_rate": 3.192462287696759e-06, "loss": 1.0517, "step": 9308 }, { "epoch": 1.8450813323005422, "grad_norm": 2.203125, "learning_rate": 3.1914797959221495e-06, "loss": 0.9667, "step": 9309 }, { "epoch": 1.8452812273556383, "grad_norm": 2.15625, "learning_rate": 3.1904973844818144e-06, "loss": 0.9874, "step": 9310 }, { "epoch": 1.8454811224107344, "grad_norm": 2.171875, "learning_rate": 3.189515053419393e-06, "loss": 0.9769, "step": 9311 }, { "epoch": 1.8456810174658305, "grad_norm": 2.25, "learning_rate": 3.188532802778518e-06, "loss": 1.0035, "step": 9312 }, { "epoch": 1.8458809125209266, "grad_norm": 2.046875, "learning_rate": 3.1875506326028226e-06, "loss": 0.9881, "step": 9313 }, { "epoch": 1.8460808075760227, "grad_norm": 2.296875, "learning_rate": 3.186568542935936e-06, "loss": 1.0568, "step": 9314 }, { "epoch": 1.8462807026311188, "grad_norm": 2.15625, "learning_rate": 3.1855865338214803e-06, "loss": 0.914, "step": 9315 }, { "epoch": 1.8464805976862149, "grad_norm": 2.125, "learning_rate": 3.1846046053030778e-06, "loss": 0.9923, "step": 9316 }, { "epoch": 1.846680492741311, "grad_norm": 2.203125, "learning_rate": 3.1836227574243434e-06, "loss": 0.9697, "step": 9317 }, { "epoch": 1.8468803877964068, "grad_norm": 2.203125, "learning_rate": 3.1826409902288925e-06, "loss": 1.0062, "step": 9318 }, { "epoch": 1.847080282851503, "grad_norm": 2.15625, "learning_rate": 3.1816593037603356e-06, "loss": 0.9864, "step": 9319 }, { "epoch": 1.847280177906599, "grad_norm": 2.171875, "learning_rate": 3.1806776980622773e-06, "loss": 0.9804, "step": 9320 }, { "epoch": 1.8474800729616951, "grad_norm": 2.140625, "learning_rate": 3.179696173178321e-06, "loss": 0.985, "step": 9321 }, { "epoch": 1.847679968016791, "grad_norm": 2.109375, "learning_rate": 3.1787147291520675e-06, "loss": 0.9131, "step": 9322 }, { "epoch": 1.8478798630718871, "grad_norm": 2.1875, "learning_rate": 3.1777333660271103e-06, "loss": 0.8775, "step": 9323 }, { "epoch": 1.8480797581269832, "grad_norm": 2.125, "learning_rate": 3.1767520838470433e-06, "loss": 0.9141, "step": 9324 }, { "epoch": 1.8482796531820793, "grad_norm": 2.078125, "learning_rate": 3.175770882655453e-06, "loss": 0.9623, "step": 9325 }, { "epoch": 1.8484795482371754, "grad_norm": 2.171875, "learning_rate": 3.174789762495925e-06, "loss": 0.9542, "step": 9326 }, { "epoch": 1.8486794432922715, "grad_norm": 2.25, "learning_rate": 3.1738087234120426e-06, "loss": 0.9142, "step": 9327 }, { "epoch": 1.8488793383473676, "grad_norm": 2.21875, "learning_rate": 3.1728277654473793e-06, "loss": 0.9389, "step": 9328 }, { "epoch": 1.8490792334024637, "grad_norm": 2.234375, "learning_rate": 3.171846888645513e-06, "loss": 0.9108, "step": 9329 }, { "epoch": 1.8492791284575598, "grad_norm": 2.140625, "learning_rate": 3.1708660930500124e-06, "loss": 0.942, "step": 9330 }, { "epoch": 1.849479023512656, "grad_norm": 2.09375, "learning_rate": 3.1698853787044447e-06, "loss": 0.9554, "step": 9331 }, { "epoch": 1.849678918567752, "grad_norm": 2.21875, "learning_rate": 3.1689047456523746e-06, "loss": 0.9705, "step": 9332 }, { "epoch": 1.8498788136228481, "grad_norm": 2.1875, "learning_rate": 3.1679241939373578e-06, "loss": 1.073, "step": 9333 }, { "epoch": 1.8500787086779442, "grad_norm": 2.140625, "learning_rate": 3.1669437236029536e-06, "loss": 0.9328, "step": 9334 }, { "epoch": 1.8502786037330403, "grad_norm": 2.09375, "learning_rate": 3.1659633346927133e-06, "loss": 0.983, "step": 9335 }, { "epoch": 1.8504784987881362, "grad_norm": 2.140625, "learning_rate": 3.1649830272501857e-06, "loss": 0.9917, "step": 9336 }, { "epoch": 1.8506783938432323, "grad_norm": 2.21875, "learning_rate": 3.1640028013189155e-06, "loss": 1.005, "step": 9337 }, { "epoch": 1.8508782888983284, "grad_norm": 2.328125, "learning_rate": 3.163022656942445e-06, "loss": 0.9806, "step": 9338 }, { "epoch": 1.8510781839534245, "grad_norm": 2.234375, "learning_rate": 3.162042594164313e-06, "loss": 0.9685, "step": 9339 }, { "epoch": 1.8512780790085204, "grad_norm": 2.21875, "learning_rate": 3.1610626130280507e-06, "loss": 0.9463, "step": 9340 }, { "epoch": 1.8514779740636165, "grad_norm": 2.0625, "learning_rate": 3.1600827135771914e-06, "loss": 0.9251, "step": 9341 }, { "epoch": 1.8516778691187126, "grad_norm": 2.25, "learning_rate": 3.1591028958552627e-06, "loss": 1.0171, "step": 9342 }, { "epoch": 1.8518777641738087, "grad_norm": 2.140625, "learning_rate": 3.158123159905785e-06, "loss": 0.8933, "step": 9343 }, { "epoch": 1.8520776592289048, "grad_norm": 2.34375, "learning_rate": 3.1571435057722806e-06, "loss": 0.8957, "step": 9344 }, { "epoch": 1.8522775542840009, "grad_norm": 2.234375, "learning_rate": 3.1561639334982653e-06, "loss": 0.9443, "step": 9345 }, { "epoch": 1.852477449339097, "grad_norm": 2.078125, "learning_rate": 3.1551844431272505e-06, "loss": 0.9494, "step": 9346 }, { "epoch": 1.852677344394193, "grad_norm": 2.078125, "learning_rate": 3.1542050347027465e-06, "loss": 0.922, "step": 9347 }, { "epoch": 1.8528772394492892, "grad_norm": 2.015625, "learning_rate": 3.1532257082682573e-06, "loss": 0.8955, "step": 9348 }, { "epoch": 1.8530771345043853, "grad_norm": 2.203125, "learning_rate": 3.152246463867284e-06, "loss": 1.1093, "step": 9349 }, { "epoch": 1.8532770295594814, "grad_norm": 2.078125, "learning_rate": 3.1512673015433272e-06, "loss": 0.9333, "step": 9350 }, { "epoch": 1.8534769246145775, "grad_norm": 2.171875, "learning_rate": 3.1502882213398776e-06, "loss": 0.9682, "step": 9351 }, { "epoch": 1.8536768196696736, "grad_norm": 2.140625, "learning_rate": 3.149309223300428e-06, "loss": 0.9936, "step": 9352 }, { "epoch": 1.8538767147247694, "grad_norm": 2.140625, "learning_rate": 3.1483303074684663e-06, "loss": 0.9197, "step": 9353 }, { "epoch": 1.8540766097798655, "grad_norm": 2.1875, "learning_rate": 3.1473514738874734e-06, "loss": 1.0184, "step": 9354 }, { "epoch": 1.8542765048349616, "grad_norm": 2.28125, "learning_rate": 3.146372722600931e-06, "loss": 0.9506, "step": 9355 }, { "epoch": 1.8544763998900577, "grad_norm": 2.203125, "learning_rate": 3.1453940536523135e-06, "loss": 0.9858, "step": 9356 }, { "epoch": 1.8546762949451538, "grad_norm": 2.0625, "learning_rate": 3.144415467085094e-06, "loss": 0.998, "step": 9357 }, { "epoch": 1.8548761900002497, "grad_norm": 2.109375, "learning_rate": 3.1434369629427425e-06, "loss": 0.898, "step": 9358 }, { "epoch": 1.8550760850553458, "grad_norm": 2.171875, "learning_rate": 3.1424585412687215e-06, "loss": 1.0378, "step": 9359 }, { "epoch": 1.855275980110442, "grad_norm": 2.1875, "learning_rate": 3.141480202106494e-06, "loss": 0.9777, "step": 9360 }, { "epoch": 1.855475875165538, "grad_norm": 2.109375, "learning_rate": 3.1405019454995178e-06, "loss": 0.9472, "step": 9361 }, { "epoch": 1.8556757702206341, "grad_norm": 2.203125, "learning_rate": 3.139523771491246e-06, "loss": 0.9036, "step": 9362 }, { "epoch": 1.8558756652757302, "grad_norm": 2.28125, "learning_rate": 3.138545680125132e-06, "loss": 1.049, "step": 9363 }, { "epoch": 1.8560755603308263, "grad_norm": 2.09375, "learning_rate": 3.1375676714446173e-06, "loss": 0.9575, "step": 9364 }, { "epoch": 1.8562754553859224, "grad_norm": 2.78125, "learning_rate": 3.1365897454931495e-06, "loss": 0.9965, "step": 9365 }, { "epoch": 1.8564753504410185, "grad_norm": 2.171875, "learning_rate": 3.1356119023141644e-06, "loss": 0.9977, "step": 9366 }, { "epoch": 1.8566752454961146, "grad_norm": 2.0625, "learning_rate": 3.1346341419511008e-06, "loss": 0.9958, "step": 9367 }, { "epoch": 1.8568751405512107, "grad_norm": 2.15625, "learning_rate": 3.1336564644473886e-06, "loss": 1.0637, "step": 9368 }, { "epoch": 1.8570750356063068, "grad_norm": 2.0625, "learning_rate": 3.1326788698464565e-06, "loss": 0.9981, "step": 9369 }, { "epoch": 1.857274930661403, "grad_norm": 2.21875, "learning_rate": 3.131701358191731e-06, "loss": 0.9778, "step": 9370 }, { "epoch": 1.8574748257164988, "grad_norm": 2.1875, "learning_rate": 3.1307239295266303e-06, "loss": 0.9657, "step": 9371 }, { "epoch": 1.857674720771595, "grad_norm": 2.078125, "learning_rate": 3.129746583894573e-06, "loss": 1.0535, "step": 9372 }, { "epoch": 1.857874615826691, "grad_norm": 2.203125, "learning_rate": 3.128769321338974e-06, "loss": 0.9256, "step": 9373 }, { "epoch": 1.858074510881787, "grad_norm": 2.140625, "learning_rate": 3.127792141903241e-06, "loss": 0.9787, "step": 9374 }, { "epoch": 1.858274405936883, "grad_norm": 2.0625, "learning_rate": 3.1268150456307817e-06, "loss": 0.9957, "step": 9375 }, { "epoch": 1.858474300991979, "grad_norm": 2.234375, "learning_rate": 3.1258380325649973e-06, "loss": 1.0009, "step": 9376 }, { "epoch": 1.8586741960470752, "grad_norm": 2.125, "learning_rate": 3.124861102749287e-06, "loss": 0.9454, "step": 9377 }, { "epoch": 1.8588740911021713, "grad_norm": 2.1875, "learning_rate": 3.123884256227047e-06, "loss": 0.8927, "step": 9378 }, { "epoch": 1.8590739861572674, "grad_norm": 2.125, "learning_rate": 3.1229074930416674e-06, "loss": 0.963, "step": 9379 }, { "epoch": 1.8592738812123635, "grad_norm": 2.109375, "learning_rate": 3.1219308132365365e-06, "loss": 0.969, "step": 9380 }, { "epoch": 1.8594737762674596, "grad_norm": 2.21875, "learning_rate": 3.1209542168550393e-06, "loss": 0.9602, "step": 9381 }, { "epoch": 1.8596736713225557, "grad_norm": 2.109375, "learning_rate": 3.119977703940554e-06, "loss": 1.013, "step": 9382 }, { "epoch": 1.8598735663776518, "grad_norm": 2.1875, "learning_rate": 3.1190012745364584e-06, "loss": 1.064, "step": 9383 }, { "epoch": 1.8600734614327479, "grad_norm": 2.09375, "learning_rate": 3.118024928686126e-06, "loss": 0.9555, "step": 9384 }, { "epoch": 1.860273356487844, "grad_norm": 2.109375, "learning_rate": 3.1170486664329246e-06, "loss": 1.0132, "step": 9385 }, { "epoch": 1.86047325154294, "grad_norm": 2.0625, "learning_rate": 3.1160724878202213e-06, "loss": 1.0332, "step": 9386 }, { "epoch": 1.8606731465980362, "grad_norm": 2.125, "learning_rate": 3.1150963928913756e-06, "loss": 1.0393, "step": 9387 }, { "epoch": 1.860873041653132, "grad_norm": 2.125, "learning_rate": 3.114120381689747e-06, "loss": 0.9029, "step": 9388 }, { "epoch": 1.8610729367082282, "grad_norm": 2.0625, "learning_rate": 3.1131444542586906e-06, "loss": 0.9409, "step": 9389 }, { "epoch": 1.8612728317633243, "grad_norm": 2.25, "learning_rate": 3.1121686106415553e-06, "loss": 0.9847, "step": 9390 }, { "epoch": 1.8614727268184204, "grad_norm": 2.125, "learning_rate": 3.1111928508816885e-06, "loss": 0.9834, "step": 9391 }, { "epoch": 1.8616726218735165, "grad_norm": 2.1875, "learning_rate": 3.110217175022434e-06, "loss": 0.9899, "step": 9392 }, { "epoch": 1.8618725169286123, "grad_norm": 2.140625, "learning_rate": 3.109241583107131e-06, "loss": 0.9833, "step": 9393 }, { "epoch": 1.8620724119837084, "grad_norm": 2.078125, "learning_rate": 3.108266075179116e-06, "loss": 0.9947, "step": 9394 }, { "epoch": 1.8622723070388045, "grad_norm": 2.15625, "learning_rate": 3.107290651281718e-06, "loss": 0.8924, "step": 9395 }, { "epoch": 1.8624722020939006, "grad_norm": 2.21875, "learning_rate": 3.1063153114582677e-06, "loss": 0.9985, "step": 9396 }, { "epoch": 1.8626720971489967, "grad_norm": 2.109375, "learning_rate": 3.105340055752089e-06, "loss": 0.9268, "step": 9397 }, { "epoch": 1.8628719922040928, "grad_norm": 2.234375, "learning_rate": 3.1043648842065032e-06, "loss": 1.05, "step": 9398 }, { "epoch": 1.863071887259189, "grad_norm": 2.203125, "learning_rate": 3.1033897968648258e-06, "loss": 0.9761, "step": 9399 }, { "epoch": 1.863271782314285, "grad_norm": 2.171875, "learning_rate": 3.102414793770371e-06, "loss": 0.9489, "step": 9400 }, { "epoch": 1.8634716773693811, "grad_norm": 2.140625, "learning_rate": 3.1014398749664494e-06, "loss": 0.9223, "step": 9401 }, { "epoch": 1.8636715724244772, "grad_norm": 2.078125, "learning_rate": 3.1004650404963643e-06, "loss": 0.9064, "step": 9402 }, { "epoch": 1.8638714674795733, "grad_norm": 2.140625, "learning_rate": 3.0994902904034196e-06, "loss": 0.9444, "step": 9403 }, { "epoch": 1.8640713625346694, "grad_norm": 2.046875, "learning_rate": 3.0985156247309145e-06, "loss": 0.8982, "step": 9404 }, { "epoch": 1.8642712575897655, "grad_norm": 2.125, "learning_rate": 3.0975410435221415e-06, "loss": 0.9376, "step": 9405 }, { "epoch": 1.8644711526448614, "grad_norm": 2.1875, "learning_rate": 3.0965665468203926e-06, "loss": 1.0606, "step": 9406 }, { "epoch": 1.8646710476999575, "grad_norm": 2.21875, "learning_rate": 3.0955921346689533e-06, "loss": 0.9864, "step": 9407 }, { "epoch": 1.8648709427550536, "grad_norm": 2.234375, "learning_rate": 3.094617807111109e-06, "loss": 0.9607, "step": 9408 }, { "epoch": 1.8650708378101497, "grad_norm": 2.109375, "learning_rate": 3.093643564190138e-06, "loss": 0.9449, "step": 9409 }, { "epoch": 1.8652707328652456, "grad_norm": 2.171875, "learning_rate": 3.0926694059493155e-06, "loss": 0.9976, "step": 9410 }, { "epoch": 1.8654706279203417, "grad_norm": 2.140625, "learning_rate": 3.0916953324319144e-06, "loss": 0.9687, "step": 9411 }, { "epoch": 1.8656705229754378, "grad_norm": 2.0625, "learning_rate": 3.0907213436812037e-06, "loss": 0.9989, "step": 9412 }, { "epoch": 1.8658704180305339, "grad_norm": 2.265625, "learning_rate": 3.0897474397404466e-06, "loss": 0.9618, "step": 9413 }, { "epoch": 1.86607031308563, "grad_norm": 2.171875, "learning_rate": 3.0887736206529053e-06, "loss": 1.0123, "step": 9414 }, { "epoch": 1.866270208140726, "grad_norm": 2.171875, "learning_rate": 3.0877998864618334e-06, "loss": 1.0452, "step": 9415 }, { "epoch": 1.8664701031958222, "grad_norm": 2.09375, "learning_rate": 3.0868262372104873e-06, "loss": 0.9167, "step": 9416 }, { "epoch": 1.8666699982509183, "grad_norm": 2.140625, "learning_rate": 3.085852672942116e-06, "loss": 1.0233, "step": 9417 }, { "epoch": 1.8668698933060144, "grad_norm": 2.28125, "learning_rate": 3.084879193699963e-06, "loss": 1.0442, "step": 9418 }, { "epoch": 1.8670697883611105, "grad_norm": 2.15625, "learning_rate": 3.083905799527273e-06, "loss": 1.0295, "step": 9419 }, { "epoch": 1.8672696834162066, "grad_norm": 2.25, "learning_rate": 3.082932490467283e-06, "loss": 0.9886, "step": 9420 }, { "epoch": 1.8674695784713027, "grad_norm": 2.125, "learning_rate": 3.0819592665632262e-06, "loss": 0.9874, "step": 9421 }, { "epoch": 1.8676694735263988, "grad_norm": 2.171875, "learning_rate": 3.080986127858334e-06, "loss": 0.991, "step": 9422 }, { "epoch": 1.8678693685814947, "grad_norm": 2.125, "learning_rate": 3.080013074395834e-06, "loss": 0.9268, "step": 9423 }, { "epoch": 1.8680692636365908, "grad_norm": 2.140625, "learning_rate": 3.0790401062189488e-06, "loss": 0.8571, "step": 9424 }, { "epoch": 1.8682691586916869, "grad_norm": 2.125, "learning_rate": 3.0780672233708954e-06, "loss": 1.0306, "step": 9425 }, { "epoch": 1.868469053746783, "grad_norm": 2.140625, "learning_rate": 3.0770944258948913e-06, "loss": 0.9527, "step": 9426 }, { "epoch": 1.868668948801879, "grad_norm": 2.046875, "learning_rate": 3.076121713834147e-06, "loss": 0.8627, "step": 9427 }, { "epoch": 1.868868843856975, "grad_norm": 2.109375, "learning_rate": 3.07514908723187e-06, "loss": 0.918, "step": 9428 }, { "epoch": 1.869068738912071, "grad_norm": 2.0625, "learning_rate": 3.074176546131266e-06, "loss": 0.9442, "step": 9429 }, { "epoch": 1.8692686339671671, "grad_norm": 2.109375, "learning_rate": 3.0732040905755333e-06, "loss": 1.0042, "step": 9430 }, { "epoch": 1.8694685290222632, "grad_norm": 2.15625, "learning_rate": 3.0722317206078692e-06, "loss": 1.0081, "step": 9431 }, { "epoch": 1.8696684240773593, "grad_norm": 2.21875, "learning_rate": 3.0712594362714677e-06, "loss": 0.9252, "step": 9432 }, { "epoch": 1.8698683191324554, "grad_norm": 2.25, "learning_rate": 3.0702872376095146e-06, "loss": 0.8877, "step": 9433 }, { "epoch": 1.8700682141875515, "grad_norm": 2.125, "learning_rate": 3.069315124665196e-06, "loss": 0.9872, "step": 9434 }, { "epoch": 1.8702681092426476, "grad_norm": 2.140625, "learning_rate": 3.068343097481694e-06, "loss": 0.9615, "step": 9435 }, { "epoch": 1.8704680042977437, "grad_norm": 2.203125, "learning_rate": 3.0673711561021857e-06, "loss": 1.0107, "step": 9436 }, { "epoch": 1.8706678993528398, "grad_norm": 2.0, "learning_rate": 3.0663993005698444e-06, "loss": 0.9041, "step": 9437 }, { "epoch": 1.870867794407936, "grad_norm": 2.25, "learning_rate": 3.0654275309278382e-06, "loss": 1.033, "step": 9438 }, { "epoch": 1.871067689463032, "grad_norm": 2.171875, "learning_rate": 3.0644558472193355e-06, "loss": 0.9913, "step": 9439 }, { "epoch": 1.8712675845181281, "grad_norm": 2.0625, "learning_rate": 3.0634842494874974e-06, "loss": 0.9967, "step": 9440 }, { "epoch": 1.871467479573224, "grad_norm": 2.140625, "learning_rate": 3.0625127377754814e-06, "loss": 0.9785, "step": 9441 }, { "epoch": 1.8716673746283201, "grad_norm": 2.125, "learning_rate": 3.0615413121264427e-06, "loss": 0.9842, "step": 9442 }, { "epoch": 1.8718672696834162, "grad_norm": 2.171875, "learning_rate": 3.060569972583533e-06, "loss": 0.9799, "step": 9443 }, { "epoch": 1.8720671647385123, "grad_norm": 2.234375, "learning_rate": 3.0595987191898968e-06, "loss": 0.9764, "step": 9444 }, { "epoch": 1.8722670597936082, "grad_norm": 2.3125, "learning_rate": 3.0586275519886792e-06, "loss": 1.0285, "step": 9445 }, { "epoch": 1.8724669548487043, "grad_norm": 2.0, "learning_rate": 3.057656471023018e-06, "loss": 0.9092, "step": 9446 }, { "epoch": 1.8726668499038004, "grad_norm": 2.15625, "learning_rate": 3.056685476336048e-06, "loss": 0.9515, "step": 9447 }, { "epoch": 1.8728667449588965, "grad_norm": 2.09375, "learning_rate": 3.0557145679709033e-06, "loss": 0.9823, "step": 9448 }, { "epoch": 1.8730666400139926, "grad_norm": 2.28125, "learning_rate": 3.0547437459707086e-06, "loss": 0.9461, "step": 9449 }, { "epoch": 1.8732665350690887, "grad_norm": 2.203125, "learning_rate": 3.053773010378589e-06, "loss": 0.8853, "step": 9450 }, { "epoch": 1.8734664301241848, "grad_norm": 2.265625, "learning_rate": 3.052802361237665e-06, "loss": 1.0347, "step": 9451 }, { "epoch": 1.873666325179281, "grad_norm": 2.09375, "learning_rate": 3.0518317985910516e-06, "loss": 0.9607, "step": 9452 }, { "epoch": 1.873866220234377, "grad_norm": 2.203125, "learning_rate": 3.050861322481863e-06, "loss": 0.9928, "step": 9453 }, { "epoch": 1.874066115289473, "grad_norm": 2.234375, "learning_rate": 3.0498909329532047e-06, "loss": 1.031, "step": 9454 }, { "epoch": 1.8742660103445692, "grad_norm": 2.1875, "learning_rate": 3.0489206300481846e-06, "loss": 0.9818, "step": 9455 }, { "epoch": 1.8744659053996653, "grad_norm": 2.203125, "learning_rate": 3.0479504138099e-06, "loss": 1.0457, "step": 9456 }, { "epoch": 1.8746658004547614, "grad_norm": 2.046875, "learning_rate": 3.046980284281451e-06, "loss": 0.8744, "step": 9457 }, { "epoch": 1.8748656955098575, "grad_norm": 2.265625, "learning_rate": 3.046010241505928e-06, "loss": 0.9854, "step": 9458 }, { "epoch": 1.8750655905649534, "grad_norm": 2.21875, "learning_rate": 3.0450402855264204e-06, "loss": 0.9403, "step": 9459 }, { "epoch": 1.8752654856200495, "grad_norm": 2.109375, "learning_rate": 3.0440704163860167e-06, "loss": 1.02, "step": 9460 }, { "epoch": 1.8754653806751456, "grad_norm": 2.0625, "learning_rate": 3.0431006341277945e-06, "loss": 0.9089, "step": 9461 }, { "epoch": 1.8756652757302417, "grad_norm": 2.109375, "learning_rate": 3.042130938794834e-06, "loss": 0.8881, "step": 9462 }, { "epoch": 1.8758651707853375, "grad_norm": 2.171875, "learning_rate": 3.041161330430208e-06, "loss": 0.8822, "step": 9463 }, { "epoch": 1.8760650658404336, "grad_norm": 2.09375, "learning_rate": 3.0401918090769868e-06, "loss": 0.9422, "step": 9464 }, { "epoch": 1.8762649608955297, "grad_norm": 2.140625, "learning_rate": 3.0392223747782358e-06, "loss": 0.9783, "step": 9465 }, { "epoch": 1.8764648559506258, "grad_norm": 2.140625, "learning_rate": 3.0382530275770172e-06, "loss": 0.9372, "step": 9466 }, { "epoch": 1.876664751005722, "grad_norm": 2.15625, "learning_rate": 3.037283767516389e-06, "loss": 0.9253, "step": 9467 }, { "epoch": 1.876864646060818, "grad_norm": 2.140625, "learning_rate": 3.036314594639408e-06, "loss": 1.0243, "step": 9468 }, { "epoch": 1.8770645411159141, "grad_norm": 2.125, "learning_rate": 3.0353455089891213e-06, "loss": 0.9791, "step": 9469 }, { "epoch": 1.8772644361710102, "grad_norm": 2.203125, "learning_rate": 3.0343765106085778e-06, "loss": 0.973, "step": 9470 }, { "epoch": 1.8774643312261063, "grad_norm": 2.21875, "learning_rate": 3.0334075995408206e-06, "loss": 1.0732, "step": 9471 }, { "epoch": 1.8776642262812024, "grad_norm": 2.09375, "learning_rate": 3.032438775828887e-06, "loss": 0.931, "step": 9472 }, { "epoch": 1.8778641213362985, "grad_norm": 2.09375, "learning_rate": 3.0314700395158125e-06, "loss": 1.0091, "step": 9473 }, { "epoch": 1.8780640163913946, "grad_norm": 2.125, "learning_rate": 3.0305013906446296e-06, "loss": 0.9834, "step": 9474 }, { "epoch": 1.8782639114464907, "grad_norm": 2.078125, "learning_rate": 3.0295328292583636e-06, "loss": 0.9206, "step": 9475 }, { "epoch": 1.8784638065015866, "grad_norm": 2.171875, "learning_rate": 3.0285643554000398e-06, "loss": 1.083, "step": 9476 }, { "epoch": 1.8786637015566827, "grad_norm": 2.046875, "learning_rate": 3.0275959691126763e-06, "loss": 0.9827, "step": 9477 }, { "epoch": 1.8788635966117788, "grad_norm": 2.140625, "learning_rate": 3.0266276704392884e-06, "loss": 0.97, "step": 9478 }, { "epoch": 1.879063491666875, "grad_norm": 2.328125, "learning_rate": 3.0256594594228906e-06, "loss": 1.0129, "step": 9479 }, { "epoch": 1.879263386721971, "grad_norm": 2.09375, "learning_rate": 3.024691336106487e-06, "loss": 0.9386, "step": 9480 }, { "epoch": 1.879463281777067, "grad_norm": 2.203125, "learning_rate": 3.0237233005330833e-06, "loss": 0.9983, "step": 9481 }, { "epoch": 1.879663176832163, "grad_norm": 2.15625, "learning_rate": 3.022755352745681e-06, "loss": 0.9233, "step": 9482 }, { "epoch": 1.879863071887259, "grad_norm": 2.125, "learning_rate": 3.021787492787273e-06, "loss": 1.0003, "step": 9483 }, { "epoch": 1.8800629669423552, "grad_norm": 2.265625, "learning_rate": 3.020819720700855e-06, "loss": 1.0818, "step": 9484 }, { "epoch": 1.8802628619974513, "grad_norm": 2.09375, "learning_rate": 3.019852036529412e-06, "loss": 0.9582, "step": 9485 }, { "epoch": 1.8804627570525474, "grad_norm": 2.109375, "learning_rate": 3.018884440315932e-06, "loss": 0.9328, "step": 9486 }, { "epoch": 1.8806626521076435, "grad_norm": 2.1875, "learning_rate": 3.0179169321033926e-06, "loss": 0.9897, "step": 9487 }, { "epoch": 1.8808625471627396, "grad_norm": 2.0625, "learning_rate": 3.016949511934771e-06, "loss": 0.9947, "step": 9488 }, { "epoch": 1.8810624422178357, "grad_norm": 2.125, "learning_rate": 3.015982179853041e-06, "loss": 0.9682, "step": 9489 }, { "epoch": 1.8812623372729318, "grad_norm": 2.203125, "learning_rate": 3.0150149359011694e-06, "loss": 0.962, "step": 9490 }, { "epoch": 1.881462232328028, "grad_norm": 2.234375, "learning_rate": 3.0140477801221235e-06, "loss": 1.0181, "step": 9491 }, { "epoch": 1.881662127383124, "grad_norm": 2.28125, "learning_rate": 3.0130807125588625e-06, "loss": 1.0336, "step": 9492 }, { "epoch": 1.88186202243822, "grad_norm": 2.125, "learning_rate": 3.0121137332543438e-06, "loss": 0.9699, "step": 9493 }, { "epoch": 1.882061917493316, "grad_norm": 2.328125, "learning_rate": 3.0111468422515215e-06, "loss": 1.0088, "step": 9494 }, { "epoch": 1.882261812548412, "grad_norm": 2.0625, "learning_rate": 3.0101800395933433e-06, "loss": 0.9987, "step": 9495 }, { "epoch": 1.8824617076035082, "grad_norm": 2.125, "learning_rate": 3.0092133253227563e-06, "loss": 0.9607, "step": 9496 }, { "epoch": 1.8826616026586043, "grad_norm": 2.21875, "learning_rate": 3.0082466994827e-06, "loss": 0.9607, "step": 9497 }, { "epoch": 1.8828614977137001, "grad_norm": 2.171875, "learning_rate": 3.007280162116112e-06, "loss": 0.9824, "step": 9498 }, { "epoch": 1.8830613927687962, "grad_norm": 2.109375, "learning_rate": 3.0063137132659277e-06, "loss": 1.0015, "step": 9499 }, { "epoch": 1.8832612878238923, "grad_norm": 2.140625, "learning_rate": 3.0053473529750743e-06, "loss": 0.907, "step": 9500 }, { "epoch": 1.8834611828789884, "grad_norm": 2.046875, "learning_rate": 3.0043810812864783e-06, "loss": 0.9253, "step": 9501 }, { "epoch": 1.8836610779340845, "grad_norm": 2.21875, "learning_rate": 3.0034148982430624e-06, "loss": 0.951, "step": 9502 }, { "epoch": 1.8838609729891806, "grad_norm": 2.1875, "learning_rate": 3.002448803887743e-06, "loss": 0.9961, "step": 9503 }, { "epoch": 1.8840608680442767, "grad_norm": 2.203125, "learning_rate": 3.001482798263435e-06, "loss": 1.0291, "step": 9504 }, { "epoch": 1.8842607630993728, "grad_norm": 2.09375, "learning_rate": 3.0005168814130463e-06, "loss": 0.9918, "step": 9505 }, { "epoch": 1.884460658154469, "grad_norm": 2.78125, "learning_rate": 2.9995510533794846e-06, "loss": 0.971, "step": 9506 }, { "epoch": 1.884660553209565, "grad_norm": 2.40625, "learning_rate": 2.9985853142056527e-06, "loss": 0.9807, "step": 9507 }, { "epoch": 1.8848604482646611, "grad_norm": 2.1875, "learning_rate": 2.997619663934446e-06, "loss": 0.9397, "step": 9508 }, { "epoch": 1.8850603433197572, "grad_norm": 2.3125, "learning_rate": 2.9966541026087602e-06, "loss": 0.964, "step": 9509 }, { "epoch": 1.8852602383748533, "grad_norm": 2.0625, "learning_rate": 2.995688630271486e-06, "loss": 1.0068, "step": 9510 }, { "epoch": 1.8854601334299492, "grad_norm": 2.171875, "learning_rate": 2.994723246965508e-06, "loss": 0.9264, "step": 9511 }, { "epoch": 1.8856600284850453, "grad_norm": 2.09375, "learning_rate": 2.9937579527337092e-06, "loss": 0.9491, "step": 9512 }, { "epoch": 1.8858599235401414, "grad_norm": 2.125, "learning_rate": 2.992792747618969e-06, "loss": 1.0221, "step": 9513 }, { "epoch": 1.8860598185952375, "grad_norm": 2.171875, "learning_rate": 2.9918276316641592e-06, "loss": 0.957, "step": 9514 }, { "epoch": 1.8862597136503336, "grad_norm": 2.171875, "learning_rate": 2.9908626049121523e-06, "loss": 0.936, "step": 9515 }, { "epoch": 1.8864596087054295, "grad_norm": 2.1875, "learning_rate": 2.989897667405813e-06, "loss": 0.9702, "step": 9516 }, { "epoch": 1.8866595037605256, "grad_norm": 2.109375, "learning_rate": 2.9889328191880075e-06, "loss": 0.9699, "step": 9517 }, { "epoch": 1.8868593988156217, "grad_norm": 2.09375, "learning_rate": 2.987968060301588e-06, "loss": 1.0664, "step": 9518 }, { "epoch": 1.8870592938707178, "grad_norm": 2.25, "learning_rate": 2.9870033907894146e-06, "loss": 1.07, "step": 9519 }, { "epoch": 1.887259188925814, "grad_norm": 2.125, "learning_rate": 2.986038810694334e-06, "loss": 0.9554, "step": 9520 }, { "epoch": 1.88745908398091, "grad_norm": 2.078125, "learning_rate": 2.985074320059195e-06, "loss": 0.9381, "step": 9521 }, { "epoch": 1.887658979036006, "grad_norm": 2.015625, "learning_rate": 2.9841099189268397e-06, "loss": 0.8263, "step": 9522 }, { "epoch": 1.8878588740911022, "grad_norm": 2.0, "learning_rate": 2.9831456073401056e-06, "loss": 0.8908, "step": 9523 }, { "epoch": 1.8880587691461983, "grad_norm": 2.15625, "learning_rate": 2.982181385341828e-06, "loss": 1.0653, "step": 9524 }, { "epoch": 1.8882586642012944, "grad_norm": 2.125, "learning_rate": 2.9812172529748395e-06, "loss": 0.9158, "step": 9525 }, { "epoch": 1.8884585592563905, "grad_norm": 2.203125, "learning_rate": 2.9802532102819637e-06, "loss": 1.1446, "step": 9526 }, { "epoch": 1.8886584543114866, "grad_norm": 2.15625, "learning_rate": 2.9792892573060257e-06, "loss": 0.9912, "step": 9527 }, { "epoch": 1.8888583493665827, "grad_norm": 2.125, "learning_rate": 2.9783253940898417e-06, "loss": 0.9292, "step": 9528 }, { "epoch": 1.8890582444216786, "grad_norm": 2.1875, "learning_rate": 2.977361620676228e-06, "loss": 0.9676, "step": 9529 }, { "epoch": 1.8892581394767747, "grad_norm": 2.25, "learning_rate": 2.976397937107996e-06, "loss": 1.0119, "step": 9530 }, { "epoch": 1.8894580345318708, "grad_norm": 2.140625, "learning_rate": 2.9754343434279504e-06, "loss": 1.0663, "step": 9531 }, { "epoch": 1.8896579295869669, "grad_norm": 2.09375, "learning_rate": 2.974470839678895e-06, "loss": 0.939, "step": 9532 }, { "epoch": 1.8898578246420628, "grad_norm": 2.5, "learning_rate": 2.97350742590363e-06, "loss": 0.9304, "step": 9533 }, { "epoch": 1.8900577196971589, "grad_norm": 2.140625, "learning_rate": 2.9725441021449477e-06, "loss": 0.9278, "step": 9534 }, { "epoch": 1.890257614752255, "grad_norm": 2.21875, "learning_rate": 2.9715808684456402e-06, "loss": 0.952, "step": 9535 }, { "epoch": 1.890457509807351, "grad_norm": 2.15625, "learning_rate": 2.9706177248484936e-06, "loss": 1.0302, "step": 9536 }, { "epoch": 1.8906574048624472, "grad_norm": 2.234375, "learning_rate": 2.9696546713962904e-06, "loss": 0.9503, "step": 9537 }, { "epoch": 1.8908572999175433, "grad_norm": 2.125, "learning_rate": 2.968691708131811e-06, "loss": 0.9209, "step": 9538 }, { "epoch": 1.8910571949726394, "grad_norm": 2.34375, "learning_rate": 2.9677288350978286e-06, "loss": 0.9386, "step": 9539 }, { "epoch": 1.8912570900277355, "grad_norm": 2.1875, "learning_rate": 2.9667660523371134e-06, "loss": 0.9958, "step": 9540 }, { "epoch": 1.8914569850828316, "grad_norm": 2.25, "learning_rate": 2.965803359892434e-06, "loss": 0.9637, "step": 9541 }, { "epoch": 1.8916568801379277, "grad_norm": 2.078125, "learning_rate": 2.9648407578065515e-06, "loss": 1.0209, "step": 9542 }, { "epoch": 1.8918567751930238, "grad_norm": 2.234375, "learning_rate": 2.9638782461222258e-06, "loss": 1.0401, "step": 9543 }, { "epoch": 1.8920566702481199, "grad_norm": 2.1875, "learning_rate": 2.9629158248822105e-06, "loss": 0.9794, "step": 9544 }, { "epoch": 1.892256565303216, "grad_norm": 2.09375, "learning_rate": 2.9619534941292562e-06, "loss": 0.9372, "step": 9545 }, { "epoch": 1.8924564603583118, "grad_norm": 2.1875, "learning_rate": 2.9609912539061114e-06, "loss": 0.9428, "step": 9546 }, { "epoch": 1.892656355413408, "grad_norm": 2.171875, "learning_rate": 2.960029104255516e-06, "loss": 0.9534, "step": 9547 }, { "epoch": 1.892856250468504, "grad_norm": 2.234375, "learning_rate": 2.959067045220212e-06, "loss": 1.0459, "step": 9548 }, { "epoch": 1.8930561455236001, "grad_norm": 2.140625, "learning_rate": 2.95810507684293e-06, "loss": 1.0112, "step": 9549 }, { "epoch": 1.8932560405786962, "grad_norm": 2.109375, "learning_rate": 2.9571431991664036e-06, "loss": 0.9578, "step": 9550 }, { "epoch": 1.893455935633792, "grad_norm": 2.140625, "learning_rate": 2.9561814122333564e-06, "loss": 1.0109, "step": 9551 }, { "epoch": 1.8936558306888882, "grad_norm": 2.109375, "learning_rate": 2.955219716086514e-06, "loss": 0.8693, "step": 9552 }, { "epoch": 1.8938557257439843, "grad_norm": 2.078125, "learning_rate": 2.954258110768593e-06, "loss": 0.9462, "step": 9553 }, { "epoch": 1.8940556207990804, "grad_norm": 2.109375, "learning_rate": 2.9532965963223076e-06, "loss": 1.0066, "step": 9554 }, { "epoch": 1.8942555158541765, "grad_norm": 2.21875, "learning_rate": 2.95233517279037e-06, "loss": 0.9776, "step": 9555 }, { "epoch": 1.8944554109092726, "grad_norm": 2.15625, "learning_rate": 2.951373840215484e-06, "loss": 1.0024, "step": 9556 }, { "epoch": 1.8946553059643687, "grad_norm": 2.078125, "learning_rate": 2.950412598640353e-06, "loss": 0.9587, "step": 9557 }, { "epoch": 1.8948552010194648, "grad_norm": 2.21875, "learning_rate": 2.9494514481076773e-06, "loss": 1.0363, "step": 9558 }, { "epoch": 1.895055096074561, "grad_norm": 2.25, "learning_rate": 2.948490388660148e-06, "loss": 1.0407, "step": 9559 }, { "epoch": 1.895254991129657, "grad_norm": 2.109375, "learning_rate": 2.9475294203404557e-06, "loss": 0.9162, "step": 9560 }, { "epoch": 1.895454886184753, "grad_norm": 2.046875, "learning_rate": 2.9465685431912895e-06, "loss": 0.9416, "step": 9561 }, { "epoch": 1.8956547812398492, "grad_norm": 2.25, "learning_rate": 2.945607757255328e-06, "loss": 1.0465, "step": 9562 }, { "epoch": 1.8958546762949453, "grad_norm": 2.09375, "learning_rate": 2.9446470625752497e-06, "loss": 0.964, "step": 9563 }, { "epoch": 1.8960545713500412, "grad_norm": 2.109375, "learning_rate": 2.9436864591937312e-06, "loss": 0.9395, "step": 9564 }, { "epoch": 1.8962544664051373, "grad_norm": 2.15625, "learning_rate": 2.9427259471534396e-06, "loss": 0.9681, "step": 9565 }, { "epoch": 1.8964543614602334, "grad_norm": 2.1875, "learning_rate": 2.9417655264970424e-06, "loss": 1.02, "step": 9566 }, { "epoch": 1.8966542565153295, "grad_norm": 2.09375, "learning_rate": 2.9408051972672e-06, "loss": 0.9638, "step": 9567 }, { "epoch": 1.8968541515704254, "grad_norm": 2.140625, "learning_rate": 2.9398449595065705e-06, "loss": 1.0314, "step": 9568 }, { "epoch": 1.8970540466255215, "grad_norm": 2.15625, "learning_rate": 2.9388848132578087e-06, "loss": 0.8992, "step": 9569 }, { "epoch": 1.8972539416806176, "grad_norm": 2.15625, "learning_rate": 2.937924758563563e-06, "loss": 0.9696, "step": 9570 }, { "epoch": 1.8974538367357137, "grad_norm": 2.171875, "learning_rate": 2.9369647954664783e-06, "loss": 0.9212, "step": 9571 }, { "epoch": 1.8976537317908098, "grad_norm": 2.265625, "learning_rate": 2.9360049240091988e-06, "loss": 0.9186, "step": 9572 }, { "epoch": 1.8978536268459059, "grad_norm": 2.421875, "learning_rate": 2.9350451442343585e-06, "loss": 0.9811, "step": 9573 }, { "epoch": 1.898053521901002, "grad_norm": 2.140625, "learning_rate": 2.9340854561845945e-06, "loss": 1.0047, "step": 9574 }, { "epoch": 1.898253416956098, "grad_norm": 2.21875, "learning_rate": 2.933125859902532e-06, "loss": 1.0303, "step": 9575 }, { "epoch": 1.8984533120111942, "grad_norm": 2.109375, "learning_rate": 2.9321663554307977e-06, "loss": 0.9834, "step": 9576 }, { "epoch": 1.8986532070662903, "grad_norm": 2.109375, "learning_rate": 2.931206942812015e-06, "loss": 0.9697, "step": 9577 }, { "epoch": 1.8988531021213864, "grad_norm": 2.25, "learning_rate": 2.9302476220887975e-06, "loss": 1.0374, "step": 9578 }, { "epoch": 1.8990529971764825, "grad_norm": 2.21875, "learning_rate": 2.929288393303762e-06, "loss": 0.9615, "step": 9579 }, { "epoch": 1.8992528922315786, "grad_norm": 2.078125, "learning_rate": 2.928329256499512e-06, "loss": 0.9689, "step": 9580 }, { "epoch": 1.8994527872866747, "grad_norm": 2.171875, "learning_rate": 2.9273702117186564e-06, "loss": 0.9642, "step": 9581 }, { "epoch": 1.8996526823417705, "grad_norm": 2.140625, "learning_rate": 2.926411259003794e-06, "loss": 1.0004, "step": 9582 }, { "epoch": 1.8998525773968666, "grad_norm": 2.234375, "learning_rate": 2.9254523983975224e-06, "loss": 0.9807, "step": 9583 }, { "epoch": 1.9000524724519627, "grad_norm": 2.171875, "learning_rate": 2.924493629942434e-06, "loss": 0.9657, "step": 9584 }, { "epoch": 1.9002523675070588, "grad_norm": 2.09375, "learning_rate": 2.923534953681116e-06, "loss": 1.0369, "step": 9585 }, { "epoch": 1.9004522625621547, "grad_norm": 2.09375, "learning_rate": 2.922576369656155e-06, "loss": 0.8696, "step": 9586 }, { "epoch": 1.9006521576172508, "grad_norm": 2.0625, "learning_rate": 2.9216178779101276e-06, "loss": 0.9599, "step": 9587 }, { "epoch": 1.900852052672347, "grad_norm": 2.09375, "learning_rate": 2.9206594784856133e-06, "loss": 0.9909, "step": 9588 }, { "epoch": 1.901051947727443, "grad_norm": 2.125, "learning_rate": 2.9197011714251833e-06, "loss": 1.0464, "step": 9589 }, { "epoch": 1.9012518427825391, "grad_norm": 2.0625, "learning_rate": 2.9187429567714044e-06, "loss": 0.9008, "step": 9590 }, { "epoch": 1.9014517378376352, "grad_norm": 2.125, "learning_rate": 2.9177848345668426e-06, "loss": 1.0138, "step": 9591 }, { "epoch": 1.9016516328927313, "grad_norm": 2.109375, "learning_rate": 2.9168268048540527e-06, "loss": 1.0418, "step": 9592 }, { "epoch": 1.9018515279478274, "grad_norm": 2.109375, "learning_rate": 2.9158688676755966e-06, "loss": 0.9981, "step": 9593 }, { "epoch": 1.9020514230029235, "grad_norm": 2.015625, "learning_rate": 2.914911023074023e-06, "loss": 0.8566, "step": 9594 }, { "epoch": 1.9022513180580196, "grad_norm": 2.171875, "learning_rate": 2.913953271091876e-06, "loss": 0.9912, "step": 9595 }, { "epoch": 1.9024512131131157, "grad_norm": 2.125, "learning_rate": 2.912995611771705e-06, "loss": 0.9226, "step": 9596 }, { "epoch": 1.9026511081682118, "grad_norm": 2.203125, "learning_rate": 2.9120380451560456e-06, "loss": 0.948, "step": 9597 }, { "epoch": 1.902851003223308, "grad_norm": 2.296875, "learning_rate": 2.911080571287433e-06, "loss": 0.9533, "step": 9598 }, { "epoch": 1.9030508982784038, "grad_norm": 2.296875, "learning_rate": 2.9101231902083963e-06, "loss": 1.0581, "step": 9599 }, { "epoch": 1.9032507933335, "grad_norm": 2.1875, "learning_rate": 2.909165901961467e-06, "loss": 1.0024, "step": 9600 }, { "epoch": 1.903450688388596, "grad_norm": 2.15625, "learning_rate": 2.908208706589164e-06, "loss": 0.9489, "step": 9601 }, { "epoch": 1.903650583443692, "grad_norm": 2.125, "learning_rate": 2.907251604134006e-06, "loss": 0.9358, "step": 9602 }, { "epoch": 1.9038504784987882, "grad_norm": 2.25, "learning_rate": 2.9062945946385092e-06, "loss": 1.04, "step": 9603 }, { "epoch": 1.904050373553884, "grad_norm": 2.0625, "learning_rate": 2.9053376781451836e-06, "loss": 0.9114, "step": 9604 }, { "epoch": 1.9042502686089802, "grad_norm": 2.109375, "learning_rate": 2.904380854696532e-06, "loss": 0.9902, "step": 9605 }, { "epoch": 1.9044501636640763, "grad_norm": 2.15625, "learning_rate": 2.9034241243350615e-06, "loss": 0.9319, "step": 9606 }, { "epoch": 1.9046500587191724, "grad_norm": 2.125, "learning_rate": 2.902467487103267e-06, "loss": 0.96, "step": 9607 }, { "epoch": 1.9048499537742685, "grad_norm": 2.171875, "learning_rate": 2.901510943043641e-06, "loss": 0.9625, "step": 9608 }, { "epoch": 1.9050498488293646, "grad_norm": 2.046875, "learning_rate": 2.9005544921986774e-06, "loss": 0.8741, "step": 9609 }, { "epoch": 1.9052497438844607, "grad_norm": 2.203125, "learning_rate": 2.8995981346108598e-06, "loss": 1.0453, "step": 9610 }, { "epoch": 1.9054496389395568, "grad_norm": 2.046875, "learning_rate": 2.8986418703226655e-06, "loss": 0.9462, "step": 9611 }, { "epoch": 1.9056495339946529, "grad_norm": 2.09375, "learning_rate": 2.8976856993765766e-06, "loss": 0.9167, "step": 9612 }, { "epoch": 1.905849429049749, "grad_norm": 2.265625, "learning_rate": 2.896729621815064e-06, "loss": 1.0164, "step": 9613 }, { "epoch": 1.906049324104845, "grad_norm": 2.15625, "learning_rate": 2.8957736376805963e-06, "loss": 0.9024, "step": 9614 }, { "epoch": 1.9062492191599412, "grad_norm": 2.046875, "learning_rate": 2.8948177470156404e-06, "loss": 0.9204, "step": 9615 }, { "epoch": 1.9064491142150373, "grad_norm": 2.046875, "learning_rate": 2.8938619498626542e-06, "loss": 0.9531, "step": 9616 }, { "epoch": 1.9066490092701331, "grad_norm": 2.296875, "learning_rate": 2.8929062462640946e-06, "loss": 1.0757, "step": 9617 }, { "epoch": 1.9068489043252292, "grad_norm": 2.140625, "learning_rate": 2.8919506362624156e-06, "loss": 0.9839, "step": 9618 }, { "epoch": 1.9070487993803253, "grad_norm": 2.046875, "learning_rate": 2.8909951199000645e-06, "loss": 0.8796, "step": 9619 }, { "epoch": 1.9072486944354214, "grad_norm": 2.203125, "learning_rate": 2.8900396972194834e-06, "loss": 1.0045, "step": 9620 }, { "epoch": 1.9074485894905173, "grad_norm": 2.234375, "learning_rate": 2.8890843682631147e-06, "loss": 0.9391, "step": 9621 }, { "epoch": 1.9076484845456134, "grad_norm": 2.21875, "learning_rate": 2.8881291330733933e-06, "loss": 0.9263, "step": 9622 }, { "epoch": 1.9078483796007095, "grad_norm": 2.15625, "learning_rate": 2.8871739916927487e-06, "loss": 1.0736, "step": 9623 }, { "epoch": 1.9080482746558056, "grad_norm": 2.0625, "learning_rate": 2.8862189441636113e-06, "loss": 0.8903, "step": 9624 }, { "epoch": 1.9082481697109017, "grad_norm": 2.109375, "learning_rate": 2.8852639905284026e-06, "loss": 0.9666, "step": 9625 }, { "epoch": 1.9084480647659978, "grad_norm": 2.203125, "learning_rate": 2.8843091308295395e-06, "loss": 0.9188, "step": 9626 }, { "epoch": 1.908647959821094, "grad_norm": 2.03125, "learning_rate": 2.883354365109441e-06, "loss": 0.917, "step": 9627 }, { "epoch": 1.90884785487619, "grad_norm": 2.0625, "learning_rate": 2.882399693410516e-06, "loss": 0.9333, "step": 9628 }, { "epoch": 1.9090477499312861, "grad_norm": 2.046875, "learning_rate": 2.8814451157751697e-06, "loss": 0.9324, "step": 9629 }, { "epoch": 1.9092476449863822, "grad_norm": 2.0625, "learning_rate": 2.880490632245803e-06, "loss": 0.9385, "step": 9630 }, { "epoch": 1.9094475400414783, "grad_norm": 2.21875, "learning_rate": 2.8795362428648186e-06, "loss": 0.8715, "step": 9631 }, { "epoch": 1.9096474350965744, "grad_norm": 2.21875, "learning_rate": 2.878581947674608e-06, "loss": 0.8694, "step": 9632 }, { "epoch": 1.9098473301516705, "grad_norm": 2.0625, "learning_rate": 2.8776277467175583e-06, "loss": 0.8945, "step": 9633 }, { "epoch": 1.9100472252067664, "grad_norm": 2.21875, "learning_rate": 2.8766736400360595e-06, "loss": 0.9825, "step": 9634 }, { "epoch": 1.9102471202618625, "grad_norm": 2.203125, "learning_rate": 2.875719627672491e-06, "loss": 1.0558, "step": 9635 }, { "epoch": 1.9104470153169586, "grad_norm": 2.15625, "learning_rate": 2.874765709669227e-06, "loss": 0.9971, "step": 9636 }, { "epoch": 1.9106469103720547, "grad_norm": 2.21875, "learning_rate": 2.8738118860686457e-06, "loss": 0.9931, "step": 9637 }, { "epoch": 1.9108468054271508, "grad_norm": 2.203125, "learning_rate": 2.8728581569131137e-06, "loss": 1.0429, "step": 9638 }, { "epoch": 1.9110467004822467, "grad_norm": 2.15625, "learning_rate": 2.8719045222449925e-06, "loss": 0.9666, "step": 9639 }, { "epoch": 1.9112465955373428, "grad_norm": 2.046875, "learning_rate": 2.8709509821066478e-06, "loss": 0.9532, "step": 9640 }, { "epoch": 1.9114464905924389, "grad_norm": 2.171875, "learning_rate": 2.869997536540435e-06, "loss": 0.9724, "step": 9641 }, { "epoch": 1.911646385647535, "grad_norm": 2.265625, "learning_rate": 2.8690441855887e-06, "loss": 0.9422, "step": 9642 }, { "epoch": 1.911846280702631, "grad_norm": 2.09375, "learning_rate": 2.8680909292937965e-06, "loss": 0.963, "step": 9643 }, { "epoch": 1.9120461757577272, "grad_norm": 2.28125, "learning_rate": 2.867137767698066e-06, "loss": 0.9424, "step": 9644 }, { "epoch": 1.9122460708128233, "grad_norm": 2.203125, "learning_rate": 2.8661847008438466e-06, "loss": 0.9263, "step": 9645 }, { "epoch": 1.9124459658679194, "grad_norm": 2.171875, "learning_rate": 2.8652317287734766e-06, "loss": 0.9459, "step": 9646 }, { "epoch": 1.9126458609230155, "grad_norm": 2.15625, "learning_rate": 2.8642788515292854e-06, "loss": 0.9803, "step": 9647 }, { "epoch": 1.9128457559781116, "grad_norm": 2.21875, "learning_rate": 2.8633260691535973e-06, "loss": 1.0479, "step": 9648 }, { "epoch": 1.9130456510332077, "grad_norm": 2.265625, "learning_rate": 2.862373381688739e-06, "loss": 0.8759, "step": 9649 }, { "epoch": 1.9132455460883038, "grad_norm": 2.140625, "learning_rate": 2.8614207891770275e-06, "loss": 0.9741, "step": 9650 }, { "epoch": 1.9134454411433999, "grad_norm": 2.359375, "learning_rate": 2.8604682916607728e-06, "loss": 0.9743, "step": 9651 }, { "epoch": 1.9136453361984957, "grad_norm": 2.09375, "learning_rate": 2.859515889182291e-06, "loss": 1.0026, "step": 9652 }, { "epoch": 1.9138452312535918, "grad_norm": 2.125, "learning_rate": 2.858563581783885e-06, "loss": 0.9185, "step": 9653 }, { "epoch": 1.914045126308688, "grad_norm": 2.125, "learning_rate": 2.8576113695078534e-06, "loss": 0.9673, "step": 9654 }, { "epoch": 1.914245021363784, "grad_norm": 2.15625, "learning_rate": 2.856659252396498e-06, "loss": 0.9226, "step": 9655 }, { "epoch": 1.91444491641888, "grad_norm": 2.15625, "learning_rate": 2.8557072304921094e-06, "loss": 1.0275, "step": 9656 }, { "epoch": 1.914644811473976, "grad_norm": 2.40625, "learning_rate": 2.8547553038369756e-06, "loss": 0.9081, "step": 9657 }, { "epoch": 1.9148447065290721, "grad_norm": 2.265625, "learning_rate": 2.853803472473383e-06, "loss": 0.988, "step": 9658 }, { "epoch": 1.9150446015841682, "grad_norm": 2.125, "learning_rate": 2.8528517364436116e-06, "loss": 0.9797, "step": 9659 }, { "epoch": 1.9152444966392643, "grad_norm": 2.1875, "learning_rate": 2.8519000957899368e-06, "loss": 0.9943, "step": 9660 }, { "epoch": 1.9154443916943604, "grad_norm": 2.21875, "learning_rate": 2.850948550554628e-06, "loss": 1.0108, "step": 9661 }, { "epoch": 1.9156442867494565, "grad_norm": 2.25, "learning_rate": 2.8499971007799576e-06, "loss": 0.9656, "step": 9662 }, { "epoch": 1.9158441818045526, "grad_norm": 1.984375, "learning_rate": 2.8490457465081853e-06, "loss": 0.8991, "step": 9663 }, { "epoch": 1.9160440768596487, "grad_norm": 2.328125, "learning_rate": 2.84809448778157e-06, "loss": 1.0282, "step": 9664 }, { "epoch": 1.9162439719147448, "grad_norm": 2.078125, "learning_rate": 2.8471433246423697e-06, "loss": 0.9533, "step": 9665 }, { "epoch": 1.916443866969841, "grad_norm": 2.203125, "learning_rate": 2.8461922571328327e-06, "loss": 0.9021, "step": 9666 }, { "epoch": 1.916643762024937, "grad_norm": 2.171875, "learning_rate": 2.8452412852952037e-06, "loss": 0.8995, "step": 9667 }, { "epoch": 1.9168436570800331, "grad_norm": 2.21875, "learning_rate": 2.844290409171729e-06, "loss": 0.9884, "step": 9668 }, { "epoch": 1.917043552135129, "grad_norm": 2.171875, "learning_rate": 2.8433396288046433e-06, "loss": 0.977, "step": 9669 }, { "epoch": 1.917243447190225, "grad_norm": 2.171875, "learning_rate": 2.8423889442361797e-06, "loss": 1.0235, "step": 9670 }, { "epoch": 1.9174433422453212, "grad_norm": 2.1875, "learning_rate": 2.8414383555085708e-06, "loss": 0.9775, "step": 9671 }, { "epoch": 1.9176432373004173, "grad_norm": 2.140625, "learning_rate": 2.8404878626640408e-06, "loss": 0.9528, "step": 9672 }, { "epoch": 1.9178431323555134, "grad_norm": 2.328125, "learning_rate": 2.839537465744806e-06, "loss": 1.037, "step": 9673 }, { "epoch": 1.9180430274106093, "grad_norm": 2.234375, "learning_rate": 2.8385871647930886e-06, "loss": 1.0233, "step": 9674 }, { "epoch": 1.9182429224657054, "grad_norm": 2.140625, "learning_rate": 2.837636959851098e-06, "loss": 1.0528, "step": 9675 }, { "epoch": 1.9184428175208015, "grad_norm": 2.1875, "learning_rate": 2.83668685096104e-06, "loss": 0.9207, "step": 9676 }, { "epoch": 1.9186427125758976, "grad_norm": 2.171875, "learning_rate": 2.8357368381651242e-06, "loss": 1.0023, "step": 9677 }, { "epoch": 1.9188426076309937, "grad_norm": 2.265625, "learning_rate": 2.8347869215055455e-06, "loss": 1.0113, "step": 9678 }, { "epoch": 1.9190425026860898, "grad_norm": 2.140625, "learning_rate": 2.8338371010244997e-06, "loss": 1.0195, "step": 9679 }, { "epoch": 1.9192423977411859, "grad_norm": 2.078125, "learning_rate": 2.83288737676418e-06, "loss": 0.9366, "step": 9680 }, { "epoch": 1.919442292796282, "grad_norm": 2.1875, "learning_rate": 2.831937748766772e-06, "loss": 0.9912, "step": 9681 }, { "epoch": 1.919642187851378, "grad_norm": 2.171875, "learning_rate": 2.830988217074455e-06, "loss": 0.9882, "step": 9682 }, { "epoch": 1.9198420829064742, "grad_norm": 2.21875, "learning_rate": 2.8300387817294122e-06, "loss": 1.0041, "step": 9683 }, { "epoch": 1.9200419779615703, "grad_norm": 2.0625, "learning_rate": 2.8290894427738148e-06, "loss": 0.9378, "step": 9684 }, { "epoch": 1.9202418730166664, "grad_norm": 2.15625, "learning_rate": 2.828140200249831e-06, "loss": 0.9609, "step": 9685 }, { "epoch": 1.9204417680717625, "grad_norm": 2.171875, "learning_rate": 2.827191054199629e-06, "loss": 0.9683, "step": 9686 }, { "epoch": 1.9206416631268584, "grad_norm": 2.15625, "learning_rate": 2.826242004665368e-06, "loss": 1.0936, "step": 9687 }, { "epoch": 1.9208415581819545, "grad_norm": 2.109375, "learning_rate": 2.825293051689204e-06, "loss": 0.9332, "step": 9688 }, { "epoch": 1.9210414532370506, "grad_norm": 2.140625, "learning_rate": 2.8243441953132918e-06, "loss": 1.0441, "step": 9689 }, { "epoch": 1.9212413482921467, "grad_norm": 2.0625, "learning_rate": 2.8233954355797775e-06, "loss": 0.9193, "step": 9690 }, { "epoch": 1.9214412433472425, "grad_norm": 2.109375, "learning_rate": 2.8224467725308064e-06, "loss": 0.9336, "step": 9691 }, { "epoch": 1.9216411384023386, "grad_norm": 2.109375, "learning_rate": 2.821498206208515e-06, "loss": 0.9434, "step": 9692 }, { "epoch": 1.9218410334574347, "grad_norm": 2.203125, "learning_rate": 2.8205497366550414e-06, "loss": 1.0168, "step": 9693 }, { "epoch": 1.9220409285125308, "grad_norm": 2.09375, "learning_rate": 2.8196013639125175e-06, "loss": 0.9444, "step": 9694 }, { "epoch": 1.922240823567627, "grad_norm": 2.296875, "learning_rate": 2.818653088023065e-06, "loss": 0.9618, "step": 9695 }, { "epoch": 1.922440718622723, "grad_norm": 2.09375, "learning_rate": 2.8177049090288115e-06, "loss": 0.9761, "step": 9696 }, { "epoch": 1.9226406136778191, "grad_norm": 2.046875, "learning_rate": 2.816756826971873e-06, "loss": 0.9491, "step": 9697 }, { "epoch": 1.9228405087329152, "grad_norm": 2.1875, "learning_rate": 2.8158088418943613e-06, "loss": 0.9522, "step": 9698 }, { "epoch": 1.9230404037880113, "grad_norm": 2.21875, "learning_rate": 2.814860953838389e-06, "loss": 0.9115, "step": 9699 }, { "epoch": 1.9232402988431074, "grad_norm": 2.15625, "learning_rate": 2.8139131628460605e-06, "loss": 0.883, "step": 9700 }, { "epoch": 1.9234401938982035, "grad_norm": 2.171875, "learning_rate": 2.8129654689594733e-06, "loss": 1.0504, "step": 9701 }, { "epoch": 1.9236400889532996, "grad_norm": 2.15625, "learning_rate": 2.8120178722207287e-06, "loss": 0.9033, "step": 9702 }, { "epoch": 1.9238399840083957, "grad_norm": 2.328125, "learning_rate": 2.811070372671918e-06, "loss": 1.075, "step": 9703 }, { "epoch": 1.9240398790634918, "grad_norm": 2.25, "learning_rate": 2.810122970355124e-06, "loss": 1.0194, "step": 9704 }, { "epoch": 1.9242397741185877, "grad_norm": 2.109375, "learning_rate": 2.809175665312436e-06, "loss": 0.9821, "step": 9705 }, { "epoch": 1.9244396691736838, "grad_norm": 2.203125, "learning_rate": 2.8082284575859302e-06, "loss": 0.986, "step": 9706 }, { "epoch": 1.92463956422878, "grad_norm": 2.140625, "learning_rate": 2.8072813472176807e-06, "loss": 1.0015, "step": 9707 }, { "epoch": 1.924839459283876, "grad_norm": 2.171875, "learning_rate": 2.8063343342497616e-06, "loss": 0.8788, "step": 9708 }, { "epoch": 1.9250393543389719, "grad_norm": 2.171875, "learning_rate": 2.805387418724237e-06, "loss": 0.9795, "step": 9709 }, { "epoch": 1.925239249394068, "grad_norm": 2.140625, "learning_rate": 2.804440600683167e-06, "loss": 0.9401, "step": 9710 }, { "epoch": 1.925439144449164, "grad_norm": 2.203125, "learning_rate": 2.803493880168613e-06, "loss": 0.9852, "step": 9711 }, { "epoch": 1.9256390395042602, "grad_norm": 2.15625, "learning_rate": 2.8025472572226266e-06, "loss": 0.9925, "step": 9712 }, { "epoch": 1.9258389345593563, "grad_norm": 2.03125, "learning_rate": 2.8016007318872532e-06, "loss": 0.946, "step": 9713 }, { "epoch": 1.9260388296144524, "grad_norm": 2.09375, "learning_rate": 2.800654304204543e-06, "loss": 0.9504, "step": 9714 }, { "epoch": 1.9262387246695485, "grad_norm": 2.171875, "learning_rate": 2.7997079742165346e-06, "loss": 0.9226, "step": 9715 }, { "epoch": 1.9264386197246446, "grad_norm": 2.171875, "learning_rate": 2.7987617419652603e-06, "loss": 0.9339, "step": 9716 }, { "epoch": 1.9266385147797407, "grad_norm": 2.171875, "learning_rate": 2.797815607492756e-06, "loss": 1.0389, "step": 9717 }, { "epoch": 1.9268384098348368, "grad_norm": 2.28125, "learning_rate": 2.7968695708410476e-06, "loss": 0.9964, "step": 9718 }, { "epoch": 1.9270383048899329, "grad_norm": 2.1875, "learning_rate": 2.7959236320521573e-06, "loss": 1.0257, "step": 9719 }, { "epoch": 1.927238199945029, "grad_norm": 2.203125, "learning_rate": 2.794977791168102e-06, "loss": 0.9098, "step": 9720 }, { "epoch": 1.927438095000125, "grad_norm": 2.046875, "learning_rate": 2.7940320482308995e-06, "loss": 0.857, "step": 9721 }, { "epoch": 1.927637990055221, "grad_norm": 2.109375, "learning_rate": 2.7930864032825582e-06, "loss": 1.0006, "step": 9722 }, { "epoch": 1.927837885110317, "grad_norm": 2.09375, "learning_rate": 2.792140856365081e-06, "loss": 0.9269, "step": 9723 }, { "epoch": 1.9280377801654132, "grad_norm": 2.1875, "learning_rate": 2.7911954075204734e-06, "loss": 0.9125, "step": 9724 }, { "epoch": 1.9282376752205093, "grad_norm": 2.234375, "learning_rate": 2.7902500567907297e-06, "loss": 0.8546, "step": 9725 }, { "epoch": 1.9284375702756054, "grad_norm": 2.140625, "learning_rate": 2.7893048042178405e-06, "loss": 1.0623, "step": 9726 }, { "epoch": 1.9286374653307012, "grad_norm": 2.234375, "learning_rate": 2.788359649843797e-06, "loss": 0.9814, "step": 9727 }, { "epoch": 1.9288373603857973, "grad_norm": 2.15625, "learning_rate": 2.787414593710583e-06, "loss": 1.0193, "step": 9728 }, { "epoch": 1.9290372554408934, "grad_norm": 2.234375, "learning_rate": 2.786469635860174e-06, "loss": 0.8954, "step": 9729 }, { "epoch": 1.9292371504959895, "grad_norm": 2.1875, "learning_rate": 2.7855247763345483e-06, "loss": 1.1665, "step": 9730 }, { "epoch": 1.9294370455510856, "grad_norm": 2.234375, "learning_rate": 2.7845800151756768e-06, "loss": 0.9786, "step": 9731 }, { "epoch": 1.9296369406061817, "grad_norm": 2.125, "learning_rate": 2.783635352425522e-06, "loss": 0.9566, "step": 9732 }, { "epoch": 1.9298368356612778, "grad_norm": 2.109375, "learning_rate": 2.78269078812605e-06, "loss": 0.8177, "step": 9733 }, { "epoch": 1.930036730716374, "grad_norm": 2.078125, "learning_rate": 2.781746322319219e-06, "loss": 1.009, "step": 9734 }, { "epoch": 1.93023662577147, "grad_norm": 2.046875, "learning_rate": 2.7808019550469745e-06, "loss": 0.9294, "step": 9735 }, { "epoch": 1.9304365208265661, "grad_norm": 2.234375, "learning_rate": 2.779857686351273e-06, "loss": 0.9833, "step": 9736 }, { "epoch": 1.9306364158816622, "grad_norm": 2.1875, "learning_rate": 2.7789135162740555e-06, "loss": 1.0077, "step": 9737 }, { "epoch": 1.9308363109367583, "grad_norm": 2.203125, "learning_rate": 2.7779694448572605e-06, "loss": 1.0207, "step": 9738 }, { "epoch": 1.9310362059918544, "grad_norm": 2.140625, "learning_rate": 2.777025472142827e-06, "loss": 0.9238, "step": 9739 }, { "epoch": 1.9312361010469503, "grad_norm": 2.125, "learning_rate": 2.7760815981726854e-06, "loss": 0.9788, "step": 9740 }, { "epoch": 1.9314359961020464, "grad_norm": 2.171875, "learning_rate": 2.7751378229887586e-06, "loss": 0.9759, "step": 9741 }, { "epoch": 1.9316358911571425, "grad_norm": 2.234375, "learning_rate": 2.774194146632975e-06, "loss": 0.9478, "step": 9742 }, { "epoch": 1.9318357862122386, "grad_norm": 2.125, "learning_rate": 2.773250569147249e-06, "loss": 1.08, "step": 9743 }, { "epoch": 1.9320356812673345, "grad_norm": 2.109375, "learning_rate": 2.772307090573494e-06, "loss": 0.9411, "step": 9744 }, { "epoch": 1.9322355763224306, "grad_norm": 2.25, "learning_rate": 2.7713637109536207e-06, "loss": 1.0289, "step": 9745 }, { "epoch": 1.9324354713775267, "grad_norm": 2.28125, "learning_rate": 2.7704204303295348e-06, "loss": 0.9375, "step": 9746 }, { "epoch": 1.9326353664326228, "grad_norm": 2.234375, "learning_rate": 2.769477248743132e-06, "loss": 1.0683, "step": 9747 }, { "epoch": 1.932835261487719, "grad_norm": 1.9453125, "learning_rate": 2.768534166236314e-06, "loss": 0.8, "step": 9748 }, { "epoch": 1.933035156542815, "grad_norm": 2.140625, "learning_rate": 2.7675911828509703e-06, "loss": 0.9387, "step": 9749 }, { "epoch": 1.933235051597911, "grad_norm": 2.078125, "learning_rate": 2.7666482986289876e-06, "loss": 0.9022, "step": 9750 }, { "epoch": 1.9334349466530072, "grad_norm": 2.21875, "learning_rate": 2.765705513612247e-06, "loss": 0.9885, "step": 9751 }, { "epoch": 1.9336348417081033, "grad_norm": 2.1875, "learning_rate": 2.7647628278426306e-06, "loss": 0.9144, "step": 9752 }, { "epoch": 1.9338347367631994, "grad_norm": 2.15625, "learning_rate": 2.7638202413620106e-06, "loss": 1.0567, "step": 9753 }, { "epoch": 1.9340346318182955, "grad_norm": 2.125, "learning_rate": 2.7628777542122553e-06, "loss": 0.941, "step": 9754 }, { "epoch": 1.9342345268733916, "grad_norm": 2.046875, "learning_rate": 2.7619353664352326e-06, "loss": 0.9615, "step": 9755 }, { "epoch": 1.9344344219284877, "grad_norm": 2.15625, "learning_rate": 2.760993078072802e-06, "loss": 0.9721, "step": 9756 }, { "epoch": 1.9346343169835836, "grad_norm": 2.234375, "learning_rate": 2.760050889166818e-06, "loss": 0.9166, "step": 9757 }, { "epoch": 1.9348342120386797, "grad_norm": 2.171875, "learning_rate": 2.7591087997591366e-06, "loss": 0.9897, "step": 9758 }, { "epoch": 1.9350341070937758, "grad_norm": 2.25, "learning_rate": 2.7581668098916024e-06, "loss": 1.0961, "step": 9759 }, { "epoch": 1.9352340021488719, "grad_norm": 2.4375, "learning_rate": 2.7572249196060575e-06, "loss": 0.9858, "step": 9760 }, { "epoch": 1.935433897203968, "grad_norm": 2.234375, "learning_rate": 2.756283128944344e-06, "loss": 1.0277, "step": 9761 }, { "epoch": 1.9356337922590638, "grad_norm": 2.203125, "learning_rate": 2.7553414379482936e-06, "loss": 1.0197, "step": 9762 }, { "epoch": 1.93583368731416, "grad_norm": 2.140625, "learning_rate": 2.7543998466597357e-06, "loss": 0.8993, "step": 9763 }, { "epoch": 1.936033582369256, "grad_norm": 2.34375, "learning_rate": 2.753458355120498e-06, "loss": 0.9828, "step": 9764 }, { "epoch": 1.9362334774243521, "grad_norm": 2.234375, "learning_rate": 2.7525169633724024e-06, "loss": 0.9341, "step": 9765 }, { "epoch": 1.9364333724794482, "grad_norm": 2.125, "learning_rate": 2.7515756714572593e-06, "loss": 1.0403, "step": 9766 }, { "epoch": 1.9366332675345443, "grad_norm": 2.125, "learning_rate": 2.750634479416887e-06, "loss": 0.9437, "step": 9767 }, { "epoch": 1.9368331625896404, "grad_norm": 2.203125, "learning_rate": 2.7496933872930907e-06, "loss": 0.9842, "step": 9768 }, { "epoch": 1.9370330576447365, "grad_norm": 2.25, "learning_rate": 2.7487523951276716e-06, "loss": 0.9598, "step": 9769 }, { "epoch": 1.9372329526998326, "grad_norm": 2.1875, "learning_rate": 2.747811502962433e-06, "loss": 1.0267, "step": 9770 }, { "epoch": 1.9374328477549287, "grad_norm": 2.109375, "learning_rate": 2.7468707108391667e-06, "loss": 0.9902, "step": 9771 }, { "epoch": 1.9376327428100248, "grad_norm": 2.234375, "learning_rate": 2.7459300187996614e-06, "loss": 1.0448, "step": 9772 }, { "epoch": 1.937832637865121, "grad_norm": 2.21875, "learning_rate": 2.7449894268857055e-06, "loss": 0.9244, "step": 9773 }, { "epoch": 1.938032532920217, "grad_norm": 2.234375, "learning_rate": 2.7440489351390782e-06, "loss": 1.0769, "step": 9774 }, { "epoch": 1.938232427975313, "grad_norm": 2.046875, "learning_rate": 2.743108543601554e-06, "loss": 0.9104, "step": 9775 }, { "epoch": 1.938432323030409, "grad_norm": 2.15625, "learning_rate": 2.7421682523149097e-06, "loss": 0.9787, "step": 9776 }, { "epoch": 1.9386322180855051, "grad_norm": 2.390625, "learning_rate": 2.741228061320911e-06, "loss": 0.9726, "step": 9777 }, { "epoch": 1.9388321131406012, "grad_norm": 2.234375, "learning_rate": 2.7402879706613176e-06, "loss": 0.9924, "step": 9778 }, { "epoch": 1.939032008195697, "grad_norm": 2.28125, "learning_rate": 2.7393479803778933e-06, "loss": 1.009, "step": 9779 }, { "epoch": 1.9392319032507932, "grad_norm": 2.203125, "learning_rate": 2.7384080905123912e-06, "loss": 0.9448, "step": 9780 }, { "epoch": 1.9394317983058893, "grad_norm": 2.125, "learning_rate": 2.7374683011065594e-06, "loss": 0.9685, "step": 9781 }, { "epoch": 1.9396316933609854, "grad_norm": 2.265625, "learning_rate": 2.736528612202142e-06, "loss": 1.0365, "step": 9782 }, { "epoch": 1.9398315884160815, "grad_norm": 2.3125, "learning_rate": 2.7355890238408845e-06, "loss": 1.0848, "step": 9783 }, { "epoch": 1.9400314834711776, "grad_norm": 2.140625, "learning_rate": 2.73464953606452e-06, "loss": 0.9731, "step": 9784 }, { "epoch": 1.9402313785262737, "grad_norm": 2.171875, "learning_rate": 2.7337101489147792e-06, "loss": 1.0464, "step": 9785 }, { "epoch": 1.9404312735813698, "grad_norm": 2.34375, "learning_rate": 2.7327708624333936e-06, "loss": 1.0343, "step": 9786 }, { "epoch": 1.940631168636466, "grad_norm": 2.046875, "learning_rate": 2.7318316766620845e-06, "loss": 0.9942, "step": 9787 }, { "epoch": 1.940831063691562, "grad_norm": 2.203125, "learning_rate": 2.7308925916425676e-06, "loss": 0.9997, "step": 9788 }, { "epoch": 1.941030958746658, "grad_norm": 2.171875, "learning_rate": 2.7299536074165624e-06, "loss": 0.9747, "step": 9789 }, { "epoch": 1.9412308538017542, "grad_norm": 2.109375, "learning_rate": 2.729014724025775e-06, "loss": 0.9891, "step": 9790 }, { "epoch": 1.9414307488568503, "grad_norm": 2.234375, "learning_rate": 2.7280759415119087e-06, "loss": 0.961, "step": 9791 }, { "epoch": 1.9416306439119462, "grad_norm": 2.234375, "learning_rate": 2.727137259916668e-06, "loss": 1.0758, "step": 9792 }, { "epoch": 1.9418305389670423, "grad_norm": 2.15625, "learning_rate": 2.7261986792817484e-06, "loss": 1.0119, "step": 9793 }, { "epoch": 1.9420304340221384, "grad_norm": 2.203125, "learning_rate": 2.725260199648838e-06, "loss": 0.9422, "step": 9794 }, { "epoch": 1.9422303290772345, "grad_norm": 2.234375, "learning_rate": 2.7243218210596288e-06, "loss": 0.9585, "step": 9795 }, { "epoch": 1.9424302241323306, "grad_norm": 2.140625, "learning_rate": 2.7233835435558033e-06, "loss": 0.9701, "step": 9796 }, { "epoch": 1.9426301191874265, "grad_norm": 2.265625, "learning_rate": 2.722445367179034e-06, "loss": 0.9562, "step": 9797 }, { "epoch": 1.9428300142425226, "grad_norm": 2.1875, "learning_rate": 2.7215072919709996e-06, "loss": 0.963, "step": 9798 }, { "epoch": 1.9430299092976187, "grad_norm": 2.1875, "learning_rate": 2.720569317973368e-06, "loss": 0.9333, "step": 9799 }, { "epoch": 1.9432298043527148, "grad_norm": 2.171875, "learning_rate": 2.719631445227802e-06, "loss": 0.9703, "step": 9800 }, { "epoch": 1.9434296994078109, "grad_norm": 2.25, "learning_rate": 2.718693673775966e-06, "loss": 1.0323, "step": 9801 }, { "epoch": 1.943629594462907, "grad_norm": 2.15625, "learning_rate": 2.7177560036595128e-06, "loss": 0.9448, "step": 9802 }, { "epoch": 1.943829489518003, "grad_norm": 2.171875, "learning_rate": 2.7168184349200926e-06, "loss": 0.9027, "step": 9803 }, { "epoch": 1.9440293845730992, "grad_norm": 2.375, "learning_rate": 2.7158809675993556e-06, "loss": 1.0262, "step": 9804 }, { "epoch": 1.9442292796281953, "grad_norm": 2.1875, "learning_rate": 2.714943601738942e-06, "loss": 0.9659, "step": 9805 }, { "epoch": 1.9444291746832914, "grad_norm": 2.1875, "learning_rate": 2.714006337380487e-06, "loss": 1.0365, "step": 9806 }, { "epoch": 1.9446290697383875, "grad_norm": 2.15625, "learning_rate": 2.713069174565629e-06, "loss": 1.0349, "step": 9807 }, { "epoch": 1.9448289647934836, "grad_norm": 2.25, "learning_rate": 2.712132113335994e-06, "loss": 1.0042, "step": 9808 }, { "epoch": 1.9450288598485796, "grad_norm": 2.15625, "learning_rate": 2.7111951537332058e-06, "loss": 0.8987, "step": 9809 }, { "epoch": 1.9452287549036755, "grad_norm": 2.078125, "learning_rate": 2.710258295798883e-06, "loss": 0.9283, "step": 9810 }, { "epoch": 1.9454286499587716, "grad_norm": 2.046875, "learning_rate": 2.709321539574644e-06, "loss": 0.8604, "step": 9811 }, { "epoch": 1.9456285450138677, "grad_norm": 2.1875, "learning_rate": 2.708384885102097e-06, "loss": 1.0184, "step": 9812 }, { "epoch": 1.9458284400689638, "grad_norm": 2.1875, "learning_rate": 2.7074483324228474e-06, "loss": 0.8505, "step": 9813 }, { "epoch": 1.9460283351240597, "grad_norm": 2.078125, "learning_rate": 2.7065118815785e-06, "loss": 1.018, "step": 9814 }, { "epoch": 1.9462282301791558, "grad_norm": 2.1875, "learning_rate": 2.705575532610649e-06, "loss": 1.0354, "step": 9815 }, { "epoch": 1.946428125234252, "grad_norm": 2.046875, "learning_rate": 2.704639285560886e-06, "loss": 0.9605, "step": 9816 }, { "epoch": 1.946628020289348, "grad_norm": 2.234375, "learning_rate": 2.7037031404708038e-06, "loss": 1.0619, "step": 9817 }, { "epoch": 1.946827915344444, "grad_norm": 2.15625, "learning_rate": 2.702767097381982e-06, "loss": 1.083, "step": 9818 }, { "epoch": 1.9470278103995402, "grad_norm": 2.125, "learning_rate": 2.7018311563359977e-06, "loss": 1.0053, "step": 9819 }, { "epoch": 1.9472277054546363, "grad_norm": 2.03125, "learning_rate": 2.700895317374431e-06, "loss": 0.9911, "step": 9820 }, { "epoch": 1.9474276005097324, "grad_norm": 2.25, "learning_rate": 2.699959580538849e-06, "loss": 0.9673, "step": 9821 }, { "epoch": 1.9476274955648285, "grad_norm": 2.171875, "learning_rate": 2.6990239458708145e-06, "loss": 1.0125, "step": 9822 }, { "epoch": 1.9478273906199246, "grad_norm": 2.15625, "learning_rate": 2.6980884134118925e-06, "loss": 1.0734, "step": 9823 }, { "epoch": 1.9480272856750207, "grad_norm": 2.125, "learning_rate": 2.697152983203637e-06, "loss": 0.89, "step": 9824 }, { "epoch": 1.9482271807301168, "grad_norm": 2.0625, "learning_rate": 2.696217655287598e-06, "loss": 0.8876, "step": 9825 }, { "epoch": 1.948427075785213, "grad_norm": 2.0, "learning_rate": 2.6952824297053272e-06, "loss": 0.911, "step": 9826 }, { "epoch": 1.948626970840309, "grad_norm": 2.046875, "learning_rate": 2.694347306498366e-06, "loss": 0.8906, "step": 9827 }, { "epoch": 1.9488268658954049, "grad_norm": 2.0625, "learning_rate": 2.6934122857082478e-06, "loss": 0.9755, "step": 9828 }, { "epoch": 1.949026760950501, "grad_norm": 2.28125, "learning_rate": 2.6924773673765114e-06, "loss": 0.9884, "step": 9829 }, { "epoch": 1.949226656005597, "grad_norm": 2.21875, "learning_rate": 2.6915425515446835e-06, "loss": 1.0249, "step": 9830 }, { "epoch": 1.9494265510606932, "grad_norm": 2.15625, "learning_rate": 2.6906078382542877e-06, "loss": 1.0337, "step": 9831 }, { "epoch": 1.949626446115789, "grad_norm": 2.296875, "learning_rate": 2.689673227546847e-06, "loss": 1.0812, "step": 9832 }, { "epoch": 1.9498263411708852, "grad_norm": 2.28125, "learning_rate": 2.6887387194638744e-06, "loss": 1.0124, "step": 9833 }, { "epoch": 1.9500262362259813, "grad_norm": 2.234375, "learning_rate": 2.687804314046879e-06, "loss": 0.9958, "step": 9834 }, { "epoch": 1.9502261312810774, "grad_norm": 2.140625, "learning_rate": 2.686870011337371e-06, "loss": 1.0071, "step": 9835 }, { "epoch": 1.9504260263361735, "grad_norm": 2.171875, "learning_rate": 2.6859358113768496e-06, "loss": 0.8691, "step": 9836 }, { "epoch": 1.9506259213912696, "grad_norm": 2.171875, "learning_rate": 2.6850017142068113e-06, "loss": 1.082, "step": 9837 }, { "epoch": 1.9508258164463657, "grad_norm": 2.15625, "learning_rate": 2.6840677198687515e-06, "loss": 1.072, "step": 9838 }, { "epoch": 1.9510257115014618, "grad_norm": 2.09375, "learning_rate": 2.683133828404155e-06, "loss": 0.8452, "step": 9839 }, { "epoch": 1.9512256065565579, "grad_norm": 2.1875, "learning_rate": 2.6822000398545078e-06, "loss": 1.0922, "step": 9840 }, { "epoch": 1.951425501611654, "grad_norm": 2.25, "learning_rate": 2.681266354261285e-06, "loss": 1.0021, "step": 9841 }, { "epoch": 1.95162539666675, "grad_norm": 2.140625, "learning_rate": 2.6803327716659644e-06, "loss": 0.9239, "step": 9842 }, { "epoch": 1.9518252917218462, "grad_norm": 2.265625, "learning_rate": 2.6793992921100153e-06, "loss": 0.9683, "step": 9843 }, { "epoch": 1.9520251867769423, "grad_norm": 2.078125, "learning_rate": 2.678465915634899e-06, "loss": 0.9021, "step": 9844 }, { "epoch": 1.9522250818320381, "grad_norm": 2.03125, "learning_rate": 2.6775326422820813e-06, "loss": 0.8585, "step": 9845 }, { "epoch": 1.9524249768871342, "grad_norm": 2.203125, "learning_rate": 2.676599472093015e-06, "loss": 0.924, "step": 9846 }, { "epoch": 1.9526248719422303, "grad_norm": 2.078125, "learning_rate": 2.675666405109151e-06, "loss": 0.8473, "step": 9847 }, { "epoch": 1.9528247669973264, "grad_norm": 2.25, "learning_rate": 2.6747334413719377e-06, "loss": 1.0141, "step": 9848 }, { "epoch": 1.9530246620524225, "grad_norm": 2.296875, "learning_rate": 2.6738005809228175e-06, "loss": 0.95, "step": 9849 }, { "epoch": 1.9532245571075184, "grad_norm": 2.109375, "learning_rate": 2.6728678238032245e-06, "loss": 0.9748, "step": 9850 }, { "epoch": 1.9534244521626145, "grad_norm": 2.1875, "learning_rate": 2.671935170054597e-06, "loss": 1.0148, "step": 9851 }, { "epoch": 1.9536243472177106, "grad_norm": 2.171875, "learning_rate": 2.6710026197183595e-06, "loss": 1.0376, "step": 9852 }, { "epoch": 1.9538242422728067, "grad_norm": 2.15625, "learning_rate": 2.670070172835936e-06, "loss": 1.0051, "step": 9853 }, { "epoch": 1.9540241373279028, "grad_norm": 2.125, "learning_rate": 2.669137829448748e-06, "loss": 0.9586, "step": 9854 }, { "epoch": 1.954224032382999, "grad_norm": 2.15625, "learning_rate": 2.6682055895982085e-06, "loss": 0.9388, "step": 9855 }, { "epoch": 1.954423927438095, "grad_norm": 2.21875, "learning_rate": 2.667273453325726e-06, "loss": 1.0055, "step": 9856 }, { "epoch": 1.954623822493191, "grad_norm": 2.109375, "learning_rate": 2.6663414206727116e-06, "loss": 0.9695, "step": 9857 }, { "epoch": 1.9548237175482872, "grad_norm": 2.203125, "learning_rate": 2.66540949168056e-06, "loss": 1.0922, "step": 9858 }, { "epoch": 1.9550236126033833, "grad_norm": 2.1875, "learning_rate": 2.6644776663906674e-06, "loss": 0.9678, "step": 9859 }, { "epoch": 1.9552235076584794, "grad_norm": 2.328125, "learning_rate": 2.663545944844429e-06, "loss": 1.1365, "step": 9860 }, { "epoch": 1.9554234027135755, "grad_norm": 2.21875, "learning_rate": 2.6626143270832313e-06, "loss": 0.9636, "step": 9861 }, { "epoch": 1.9556232977686716, "grad_norm": 2.140625, "learning_rate": 2.6616828131484528e-06, "loss": 0.9908, "step": 9862 }, { "epoch": 1.9558231928237675, "grad_norm": 2.125, "learning_rate": 2.6607514030814757e-06, "loss": 0.9, "step": 9863 }, { "epoch": 1.9560230878788636, "grad_norm": 2.171875, "learning_rate": 2.659820096923672e-06, "loss": 0.9088, "step": 9864 }, { "epoch": 1.9562229829339597, "grad_norm": 2.09375, "learning_rate": 2.658888894716407e-06, "loss": 0.9174, "step": 9865 }, { "epoch": 1.9564228779890558, "grad_norm": 2.203125, "learning_rate": 2.65795779650105e-06, "loss": 1.1063, "step": 9866 }, { "epoch": 1.9566227730441517, "grad_norm": 2.15625, "learning_rate": 2.657026802318957e-06, "loss": 0.9581, "step": 9867 }, { "epoch": 1.9568226680992478, "grad_norm": 2.171875, "learning_rate": 2.6560959122114815e-06, "loss": 1.0477, "step": 9868 }, { "epoch": 1.9570225631543439, "grad_norm": 2.0625, "learning_rate": 2.6551651262199773e-06, "loss": 0.9407, "step": 9869 }, { "epoch": 1.95722245820944, "grad_norm": 2.1875, "learning_rate": 2.6542344443857874e-06, "loss": 0.9956, "step": 9870 }, { "epoch": 1.957422353264536, "grad_norm": 2.09375, "learning_rate": 2.653303866750253e-06, "loss": 0.9238, "step": 9871 }, { "epoch": 1.9576222483196322, "grad_norm": 2.046875, "learning_rate": 2.652373393354709e-06, "loss": 0.932, "step": 9872 }, { "epoch": 1.9578221433747283, "grad_norm": 2.1875, "learning_rate": 2.651443024240489e-06, "loss": 1.0184, "step": 9873 }, { "epoch": 1.9580220384298244, "grad_norm": 2.1875, "learning_rate": 2.65051275944892e-06, "loss": 0.9246, "step": 9874 }, { "epoch": 1.9582219334849205, "grad_norm": 2.25, "learning_rate": 2.6495825990213208e-06, "loss": 0.9082, "step": 9875 }, { "epoch": 1.9584218285400166, "grad_norm": 2.125, "learning_rate": 2.6486525429990133e-06, "loss": 0.9802, "step": 9876 }, { "epoch": 1.9586217235951127, "grad_norm": 2.1875, "learning_rate": 2.647722591423309e-06, "loss": 0.9473, "step": 9877 }, { "epoch": 1.9588216186502088, "grad_norm": 2.171875, "learning_rate": 2.646792744335514e-06, "loss": 0.9576, "step": 9878 }, { "epoch": 1.9590215137053049, "grad_norm": 2.21875, "learning_rate": 2.645863001776936e-06, "loss": 0.9337, "step": 9879 }, { "epoch": 1.9592214087604007, "grad_norm": 2.109375, "learning_rate": 2.6449333637888717e-06, "loss": 0.9523, "step": 9880 }, { "epoch": 1.9594213038154968, "grad_norm": 2.125, "learning_rate": 2.644003830412614e-06, "loss": 0.95, "step": 9881 }, { "epoch": 1.959621198870593, "grad_norm": 2.078125, "learning_rate": 2.643074401689457e-06, "loss": 0.9245, "step": 9882 }, { "epoch": 1.959821093925689, "grad_norm": 2.15625, "learning_rate": 2.6421450776606827e-06, "loss": 0.9675, "step": 9883 }, { "epoch": 1.9600209889807851, "grad_norm": 2.203125, "learning_rate": 2.6412158583675707e-06, "loss": 1.0271, "step": 9884 }, { "epoch": 1.960220884035881, "grad_norm": 2.140625, "learning_rate": 2.6402867438514e-06, "loss": 0.9307, "step": 9885 }, { "epoch": 1.9604207790909771, "grad_norm": 2.171875, "learning_rate": 2.63935773415344e-06, "loss": 1.0636, "step": 9886 }, { "epoch": 1.9606206741460732, "grad_norm": 2.03125, "learning_rate": 2.6384288293149572e-06, "loss": 0.9815, "step": 9887 }, { "epoch": 1.9608205692011693, "grad_norm": 2.265625, "learning_rate": 2.6375000293772144e-06, "loss": 1.0368, "step": 9888 }, { "epoch": 1.9610204642562654, "grad_norm": 2.046875, "learning_rate": 2.636571334381467e-06, "loss": 0.9559, "step": 9889 }, { "epoch": 1.9612203593113615, "grad_norm": 2.0625, "learning_rate": 2.635642744368967e-06, "loss": 0.9255, "step": 9890 }, { "epoch": 1.9614202543664576, "grad_norm": 2.421875, "learning_rate": 2.634714259380966e-06, "loss": 0.9569, "step": 9891 }, { "epoch": 1.9616201494215537, "grad_norm": 2.15625, "learning_rate": 2.6337858794587046e-06, "loss": 0.8442, "step": 9892 }, { "epoch": 1.9618200444766498, "grad_norm": 2.21875, "learning_rate": 2.63285760464342e-06, "loss": 0.9336, "step": 9893 }, { "epoch": 1.962019939531746, "grad_norm": 2.203125, "learning_rate": 2.6319294349763495e-06, "loss": 0.9557, "step": 9894 }, { "epoch": 1.962219834586842, "grad_norm": 2.203125, "learning_rate": 2.6310013704987207e-06, "loss": 0.956, "step": 9895 }, { "epoch": 1.9624197296419381, "grad_norm": 2.078125, "learning_rate": 2.6300734112517562e-06, "loss": 0.9001, "step": 9896 }, { "epoch": 1.9626196246970342, "grad_norm": 2.1875, "learning_rate": 2.6291455572766794e-06, "loss": 0.8764, "step": 9897 }, { "epoch": 1.96281951975213, "grad_norm": 2.125, "learning_rate": 2.6282178086147036e-06, "loss": 0.9871, "step": 9898 }, { "epoch": 1.9630194148072262, "grad_norm": 2.25, "learning_rate": 2.6272901653070397e-06, "loss": 1.0325, "step": 9899 }, { "epoch": 1.9632193098623223, "grad_norm": 2.1875, "learning_rate": 2.626362627394892e-06, "loss": 0.9382, "step": 9900 }, { "epoch": 1.9634192049174184, "grad_norm": 2.21875, "learning_rate": 2.6254351949194634e-06, "loss": 1.0445, "step": 9901 }, { "epoch": 1.9636190999725143, "grad_norm": 2.28125, "learning_rate": 2.6245078679219503e-06, "loss": 0.9785, "step": 9902 }, { "epoch": 1.9638189950276104, "grad_norm": 2.09375, "learning_rate": 2.6235806464435425e-06, "loss": 1.0077, "step": 9903 }, { "epoch": 1.9640188900827065, "grad_norm": 2.171875, "learning_rate": 2.6226535305254303e-06, "loss": 0.9668, "step": 9904 }, { "epoch": 1.9642187851378026, "grad_norm": 2.171875, "learning_rate": 2.6217265202087944e-06, "loss": 0.9459, "step": 9905 }, { "epoch": 1.9644186801928987, "grad_norm": 2.65625, "learning_rate": 2.62079961553481e-06, "loss": 0.9683, "step": 9906 }, { "epoch": 1.9646185752479948, "grad_norm": 2.234375, "learning_rate": 2.619872816544655e-06, "loss": 0.909, "step": 9907 }, { "epoch": 1.9648184703030909, "grad_norm": 2.09375, "learning_rate": 2.6189461232794956e-06, "loss": 0.882, "step": 9908 }, { "epoch": 1.965018365358187, "grad_norm": 2.203125, "learning_rate": 2.6180195357804926e-06, "loss": 1.0062, "step": 9909 }, { "epoch": 1.965218260413283, "grad_norm": 2.125, "learning_rate": 2.6170930540888096e-06, "loss": 0.9332, "step": 9910 }, { "epoch": 1.9654181554683792, "grad_norm": 2.125, "learning_rate": 2.6161666782455986e-06, "loss": 0.9635, "step": 9911 }, { "epoch": 1.9656180505234753, "grad_norm": 2.046875, "learning_rate": 2.615240408292007e-06, "loss": 0.9407, "step": 9912 }, { "epoch": 1.9658179455785714, "grad_norm": 2.171875, "learning_rate": 2.614314244269184e-06, "loss": 1.0038, "step": 9913 }, { "epoch": 1.9660178406336675, "grad_norm": 2.046875, "learning_rate": 2.6133881862182676e-06, "loss": 0.9732, "step": 9914 }, { "epoch": 1.9662177356887633, "grad_norm": 2.25, "learning_rate": 2.612462234180391e-06, "loss": 1.0319, "step": 9915 }, { "epoch": 1.9664176307438594, "grad_norm": 2.09375, "learning_rate": 2.611536388196688e-06, "loss": 0.9009, "step": 9916 }, { "epoch": 1.9666175257989555, "grad_norm": 2.125, "learning_rate": 2.610610648308285e-06, "loss": 1.0454, "step": 9917 }, { "epoch": 1.9668174208540516, "grad_norm": 2.109375, "learning_rate": 2.6096850145563014e-06, "loss": 0.9378, "step": 9918 }, { "epoch": 1.9670173159091477, "grad_norm": 2.109375, "learning_rate": 2.608759486981853e-06, "loss": 0.9732, "step": 9919 }, { "epoch": 1.9672172109642436, "grad_norm": 2.25, "learning_rate": 2.6078340656260535e-06, "loss": 1.0111, "step": 9920 }, { "epoch": 1.9674171060193397, "grad_norm": 2.203125, "learning_rate": 2.606908750530008e-06, "loss": 1.0066, "step": 9921 }, { "epoch": 1.9676170010744358, "grad_norm": 2.390625, "learning_rate": 2.605983541734822e-06, "loss": 0.9409, "step": 9922 }, { "epoch": 1.967816896129532, "grad_norm": 2.3125, "learning_rate": 2.605058439281591e-06, "loss": 0.9418, "step": 9923 }, { "epoch": 1.968016791184628, "grad_norm": 2.0625, "learning_rate": 2.6041334432114064e-06, "loss": 0.9253, "step": 9924 }, { "epoch": 1.9682166862397241, "grad_norm": 2.0625, "learning_rate": 2.6032085535653605e-06, "loss": 0.9287, "step": 9925 }, { "epoch": 1.9684165812948202, "grad_norm": 2.15625, "learning_rate": 2.6022837703845346e-06, "loss": 0.9976, "step": 9926 }, { "epoch": 1.9686164763499163, "grad_norm": 2.1875, "learning_rate": 2.6013590937100054e-06, "loss": 0.9212, "step": 9927 }, { "epoch": 1.9688163714050124, "grad_norm": 2.0625, "learning_rate": 2.600434523582851e-06, "loss": 0.9482, "step": 9928 }, { "epoch": 1.9690162664601085, "grad_norm": 2.15625, "learning_rate": 2.5995100600441392e-06, "loss": 0.9942, "step": 9929 }, { "epoch": 1.9692161615152046, "grad_norm": 2.25, "learning_rate": 2.598585703134934e-06, "loss": 0.9624, "step": 9930 }, { "epoch": 1.9694160565703007, "grad_norm": 2.171875, "learning_rate": 2.597661452896293e-06, "loss": 0.9544, "step": 9931 }, { "epoch": 1.9696159516253968, "grad_norm": 2.203125, "learning_rate": 2.596737309369276e-06, "loss": 1.0619, "step": 9932 }, { "epoch": 1.9698158466804927, "grad_norm": 2.265625, "learning_rate": 2.5958132725949314e-06, "loss": 1.0108, "step": 9933 }, { "epoch": 1.9700157417355888, "grad_norm": 2.25, "learning_rate": 2.5948893426143018e-06, "loss": 1.0122, "step": 9934 }, { "epoch": 1.970215636790685, "grad_norm": 2.046875, "learning_rate": 2.5939655194684334e-06, "loss": 0.9055, "step": 9935 }, { "epoch": 1.970415531845781, "grad_norm": 2.125, "learning_rate": 2.593041803198359e-06, "loss": 1.0203, "step": 9936 }, { "epoch": 1.9706154269008769, "grad_norm": 2.203125, "learning_rate": 2.592118193845109e-06, "loss": 1.0048, "step": 9937 }, { "epoch": 1.970815321955973, "grad_norm": 2.234375, "learning_rate": 2.5911946914497133e-06, "loss": 1.0231, "step": 9938 }, { "epoch": 1.971015217011069, "grad_norm": 2.125, "learning_rate": 2.590271296053193e-06, "loss": 0.9282, "step": 9939 }, { "epoch": 1.9712151120661652, "grad_norm": 2.078125, "learning_rate": 2.5893480076965615e-06, "loss": 0.9355, "step": 9940 }, { "epoch": 1.9714150071212613, "grad_norm": 2.21875, "learning_rate": 2.588424826420836e-06, "loss": 1.033, "step": 9941 }, { "epoch": 1.9716149021763574, "grad_norm": 2.25, "learning_rate": 2.5875017522670227e-06, "loss": 0.9113, "step": 9942 }, { "epoch": 1.9718147972314535, "grad_norm": 2.171875, "learning_rate": 2.5865787852761217e-06, "loss": 1.0371, "step": 9943 }, { "epoch": 1.9720146922865496, "grad_norm": 2.125, "learning_rate": 2.585655925489135e-06, "loss": 0.9781, "step": 9944 }, { "epoch": 1.9722145873416457, "grad_norm": 2.1875, "learning_rate": 2.584733172947055e-06, "loss": 0.9384, "step": 9945 }, { "epoch": 1.9724144823967418, "grad_norm": 2.1875, "learning_rate": 2.5838105276908667e-06, "loss": 1.029, "step": 9946 }, { "epoch": 1.9726143774518379, "grad_norm": 2.234375, "learning_rate": 2.5828879897615587e-06, "loss": 1.0109, "step": 9947 }, { "epoch": 1.972814272506934, "grad_norm": 2.234375, "learning_rate": 2.581965559200108e-06, "loss": 0.9886, "step": 9948 }, { "epoch": 1.97301416756203, "grad_norm": 2.1875, "learning_rate": 2.58104323604749e-06, "loss": 0.9924, "step": 9949 }, { "epoch": 1.9732140626171262, "grad_norm": 2.09375, "learning_rate": 2.5801210203446718e-06, "loss": 0.9618, "step": 9950 }, { "epoch": 1.973413957672222, "grad_norm": 2.09375, "learning_rate": 2.57919891213262e-06, "loss": 0.9703, "step": 9951 }, { "epoch": 1.9736138527273182, "grad_norm": 2.1875, "learning_rate": 2.578276911452292e-06, "loss": 0.9618, "step": 9952 }, { "epoch": 1.9738137477824143, "grad_norm": 2.171875, "learning_rate": 2.5773550183446465e-06, "loss": 0.9932, "step": 9953 }, { "epoch": 1.9740136428375104, "grad_norm": 2.171875, "learning_rate": 2.5764332328506327e-06, "loss": 0.9966, "step": 9954 }, { "epoch": 1.9742135378926062, "grad_norm": 2.0625, "learning_rate": 2.5755115550111942e-06, "loss": 0.986, "step": 9955 }, { "epoch": 1.9744134329477023, "grad_norm": 2.125, "learning_rate": 2.574589984867275e-06, "loss": 0.9848, "step": 9956 }, { "epoch": 1.9746133280027984, "grad_norm": 2.15625, "learning_rate": 2.5736685224598097e-06, "loss": 1.038, "step": 9957 }, { "epoch": 1.9748132230578945, "grad_norm": 2.203125, "learning_rate": 2.5727471678297277e-06, "loss": 0.9692, "step": 9958 }, { "epoch": 1.9750131181129906, "grad_norm": 2.125, "learning_rate": 2.5718259210179588e-06, "loss": 0.9844, "step": 9959 }, { "epoch": 1.9752130131680867, "grad_norm": 2.1875, "learning_rate": 2.5709047820654236e-06, "loss": 0.9823, "step": 9960 }, { "epoch": 1.9754129082231828, "grad_norm": 2.078125, "learning_rate": 2.569983751013039e-06, "loss": 0.9745, "step": 9961 }, { "epoch": 1.975612803278279, "grad_norm": 2.46875, "learning_rate": 2.5690628279017136e-06, "loss": 0.921, "step": 9962 }, { "epoch": 1.975812698333375, "grad_norm": 2.09375, "learning_rate": 2.56814201277236e-06, "loss": 0.9791, "step": 9963 }, { "epoch": 1.9760125933884711, "grad_norm": 1.9921875, "learning_rate": 2.567221305665879e-06, "loss": 0.8148, "step": 9964 }, { "epoch": 1.9762124884435672, "grad_norm": 2.28125, "learning_rate": 2.566300706623165e-06, "loss": 0.9585, "step": 9965 }, { "epoch": 1.9764123834986633, "grad_norm": 2.265625, "learning_rate": 2.5653802156851158e-06, "loss": 0.9921, "step": 9966 }, { "epoch": 1.9766122785537594, "grad_norm": 2.109375, "learning_rate": 2.5644598328926183e-06, "loss": 0.8986, "step": 9967 }, { "epoch": 1.9768121736088553, "grad_norm": 2.1875, "learning_rate": 2.563539558286552e-06, "loss": 0.9104, "step": 9968 }, { "epoch": 1.9770120686639514, "grad_norm": 2.15625, "learning_rate": 2.5626193919078008e-06, "loss": 0.9292, "step": 9969 }, { "epoch": 1.9772119637190475, "grad_norm": 2.234375, "learning_rate": 2.561699333797236e-06, "loss": 0.9468, "step": 9970 }, { "epoch": 1.9774118587741436, "grad_norm": 2.15625, "learning_rate": 2.560779383995724e-06, "loss": 1.0181, "step": 9971 }, { "epoch": 1.9776117538292397, "grad_norm": 2.0625, "learning_rate": 2.559859542544133e-06, "loss": 0.9923, "step": 9972 }, { "epoch": 1.9778116488843356, "grad_norm": 2.171875, "learning_rate": 2.5589398094833205e-06, "loss": 0.9749, "step": 9973 }, { "epoch": 1.9780115439394317, "grad_norm": 2.1875, "learning_rate": 2.55802018485414e-06, "loss": 0.9581, "step": 9974 }, { "epoch": 1.9782114389945278, "grad_norm": 2.203125, "learning_rate": 2.557100668697443e-06, "loss": 0.9778, "step": 9975 }, { "epoch": 1.9784113340496239, "grad_norm": 2.09375, "learning_rate": 2.5561812610540736e-06, "loss": 0.9616, "step": 9976 }, { "epoch": 1.97861122910472, "grad_norm": 2.140625, "learning_rate": 2.555261961964872e-06, "loss": 0.9669, "step": 9977 }, { "epoch": 1.978811124159816, "grad_norm": 2.0, "learning_rate": 2.5543427714706705e-06, "loss": 0.8204, "step": 9978 }, { "epoch": 1.9790110192149122, "grad_norm": 2.21875, "learning_rate": 2.5534236896123043e-06, "loss": 1.0263, "step": 9979 }, { "epoch": 1.9792109142700083, "grad_norm": 2.171875, "learning_rate": 2.552504716430596e-06, "loss": 0.9726, "step": 9980 }, { "epoch": 1.9794108093251044, "grad_norm": 2.21875, "learning_rate": 2.551585851966367e-06, "loss": 0.9656, "step": 9981 }, { "epoch": 1.9796107043802005, "grad_norm": 2.171875, "learning_rate": 2.550667096260433e-06, "loss": 0.9558, "step": 9982 }, { "epoch": 1.9798105994352966, "grad_norm": 2.15625, "learning_rate": 2.549748449353603e-06, "loss": 0.9942, "step": 9983 }, { "epoch": 1.9800104944903927, "grad_norm": 2.203125, "learning_rate": 2.548829911286687e-06, "loss": 0.9188, "step": 9984 }, { "epoch": 1.9802103895454888, "grad_norm": 2.234375, "learning_rate": 2.5479114821004845e-06, "loss": 1.0964, "step": 9985 }, { "epoch": 1.9804102846005847, "grad_norm": 2.296875, "learning_rate": 2.5469931618357907e-06, "loss": 1.0178, "step": 9986 }, { "epoch": 1.9806101796556808, "grad_norm": 2.140625, "learning_rate": 2.5460749505334004e-06, "loss": 0.9592, "step": 9987 }, { "epoch": 1.9808100747107769, "grad_norm": 2.046875, "learning_rate": 2.5451568482340983e-06, "loss": 0.9825, "step": 9988 }, { "epoch": 1.981009969765873, "grad_norm": 2.25, "learning_rate": 2.5442388549786668e-06, "loss": 0.9168, "step": 9989 }, { "epoch": 1.9812098648209688, "grad_norm": 1.9765625, "learning_rate": 2.543320970807882e-06, "loss": 0.8362, "step": 9990 }, { "epoch": 1.981409759876065, "grad_norm": 2.265625, "learning_rate": 2.5424031957625184e-06, "loss": 1.0724, "step": 9991 }, { "epoch": 1.981609654931161, "grad_norm": 2.1875, "learning_rate": 2.5414855298833423e-06, "loss": 0.9505, "step": 9992 }, { "epoch": 1.9818095499862571, "grad_norm": 2.140625, "learning_rate": 2.540567973211115e-06, "loss": 1.0578, "step": 9993 }, { "epoch": 1.9820094450413532, "grad_norm": 2.0625, "learning_rate": 2.539650525786597e-06, "loss": 0.9175, "step": 9994 }, { "epoch": 1.9822093400964493, "grad_norm": 2.078125, "learning_rate": 2.5387331876505405e-06, "loss": 0.952, "step": 9995 }, { "epoch": 1.9824092351515454, "grad_norm": 2.078125, "learning_rate": 2.5378159588436907e-06, "loss": 1.0021, "step": 9996 }, { "epoch": 1.9826091302066415, "grad_norm": 2.21875, "learning_rate": 2.536898839406795e-06, "loss": 0.9979, "step": 9997 }, { "epoch": 1.9828090252617376, "grad_norm": 2.15625, "learning_rate": 2.5359818293805893e-06, "loss": 0.9755, "step": 9998 }, { "epoch": 1.9830089203168337, "grad_norm": 2.15625, "learning_rate": 2.5350649288058065e-06, "loss": 0.9423, "step": 9999 }, { "epoch": 1.9832088153719298, "grad_norm": 2.203125, "learning_rate": 2.534148137723178e-06, "loss": 0.9666, "step": 10000 }, { "epoch": 1.983408710427026, "grad_norm": 2.046875, "learning_rate": 2.5332314561734257e-06, "loss": 0.823, "step": 10001 }, { "epoch": 1.983608605482122, "grad_norm": 2.125, "learning_rate": 2.532314884197267e-06, "loss": 1.0064, "step": 10002 }, { "epoch": 1.983808500537218, "grad_norm": 2.171875, "learning_rate": 2.5313984218354185e-06, "loss": 1.0481, "step": 10003 }, { "epoch": 1.984008395592314, "grad_norm": 2.078125, "learning_rate": 2.530482069128589e-06, "loss": 0.9209, "step": 10004 } ], "logging_steps": 1, "max_steps": 15006, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5002, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.952236882409082e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }