diff --git "a/checkpoint-1268/trainer_state.json" "b/checkpoint-1268/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-1268/trainer_state.json" @@ -0,0 +1,7629 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9983004507500186, + "eval_steps": 500, + "global_step": 1268, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.921568627450981e-07, + "loss": 2.8563, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 7.843137254901962e-07, + "loss": 3.0208, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.1764705882352942e-06, + "loss": 4.0651, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.5686274509803923e-06, + "loss": 3.3154, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 1.96078431372549e-06, + "loss": 3.9875, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.3529411764705885e-06, + "loss": 9.4198, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 2.7450980392156867e-06, + "loss": 3.4956, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.1372549019607846e-06, + "loss": 3.646, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.529411764705883e-06, + "loss": 2.9569, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 3.92156862745098e-06, + "loss": 3.2712, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 4.313725490196079e-06, + "loss": 4.4552, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 4.705882352941177e-06, + "loss": 3.2141, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 5.098039215686274e-06, + "loss": 2.8645, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 5.4901960784313735e-06, + "loss": 2.9299, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 5.882352941176471e-06, + "loss": 3.2333, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 6.274509803921569e-06, + "loss": 3.3361, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 6.666666666666667e-06, + "loss": 2.8326, + "step": 17 + }, + { + "epoch": 0.04, + "learning_rate": 7.058823529411766e-06, + "loss": 5.9936, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 7.450980392156863e-06, + "loss": 2.7551, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 7.84313725490196e-06, + "loss": 2.3296, + "step": 20 + }, + { + "epoch": 0.05, + "learning_rate": 8.23529411764706e-06, + "loss": 2.1901, + "step": 21 + }, + { + "epoch": 0.05, + "learning_rate": 8.627450980392157e-06, + "loss": 1.7673, + "step": 22 + }, + { + "epoch": 0.05, + "learning_rate": 9.019607843137256e-06, + "loss": 1.6721, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 9.411764705882354e-06, + "loss": 1.7151, + "step": 24 + }, + { + "epoch": 0.06, + "learning_rate": 9.803921568627451e-06, + "loss": 1.7576, + "step": 25 + }, + { + "epoch": 0.06, + "learning_rate": 1.0196078431372549e-05, + "loss": 1.4461, + "step": 26 + }, + { + "epoch": 0.06, + "learning_rate": 1.0588235294117648e-05, + "loss": 3.8357, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 1.0980392156862747e-05, + "loss": 1.5292, + "step": 28 + }, + { + "epoch": 0.07, + "learning_rate": 1.1372549019607844e-05, + "loss": 1.484, + "step": 29 + }, + { + "epoch": 0.07, + "learning_rate": 1.1764705882352942e-05, + "loss": 1.2995, + "step": 30 + }, + { + "epoch": 0.07, + "learning_rate": 1.215686274509804e-05, + "loss": 1.1459, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 1.2549019607843138e-05, + "loss": 1.1017, + "step": 32 + }, + { + "epoch": 0.08, + "learning_rate": 1.2941176470588238e-05, + "loss": 1.2681, + "step": 33 + }, + { + "epoch": 0.08, + "learning_rate": 1.3333333333333333e-05, + "loss": 1.7749, + "step": 34 + }, + { + "epoch": 0.08, + "learning_rate": 1.3725490196078432e-05, + "loss": 1.4039, + "step": 35 + }, + { + "epoch": 0.09, + "learning_rate": 1.4117647058823532e-05, + "loss": 1.304, + "step": 36 + }, + { + "epoch": 0.09, + "learning_rate": 1.4509803921568629e-05, + "loss": 1.1641, + "step": 37 + }, + { + "epoch": 0.09, + "learning_rate": 1.4901960784313726e-05, + "loss": 1.0715, + "step": 38 + }, + { + "epoch": 0.09, + "learning_rate": 1.5294117647058822e-05, + "loss": 1.0567, + "step": 39 + }, + { + "epoch": 0.09, + "learning_rate": 1.568627450980392e-05, + "loss": 1.3111, + "step": 40 + }, + { + "epoch": 0.1, + "learning_rate": 1.607843137254902e-05, + "loss": 1.4909, + "step": 41 + }, + { + "epoch": 0.1, + "learning_rate": 1.647058823529412e-05, + "loss": 0.961, + "step": 42 + }, + { + "epoch": 0.1, + "learning_rate": 1.686274509803922e-05, + "loss": 1.1519, + "step": 43 + }, + { + "epoch": 0.1, + "learning_rate": 1.7254901960784314e-05, + "loss": 0.9403, + "step": 44 + }, + { + "epoch": 0.11, + "learning_rate": 1.7647058823529414e-05, + "loss": 1.1603, + "step": 45 + }, + { + "epoch": 0.11, + "learning_rate": 1.8039215686274513e-05, + "loss": 0.9909, + "step": 46 + }, + { + "epoch": 0.11, + "learning_rate": 1.843137254901961e-05, + "loss": 1.0872, + "step": 47 + }, + { + "epoch": 0.11, + "learning_rate": 1.8823529411764708e-05, + "loss": 1.0935, + "step": 48 + }, + { + "epoch": 0.12, + "learning_rate": 1.9215686274509807e-05, + "loss": 1.2776, + "step": 49 + }, + { + "epoch": 0.12, + "learning_rate": 1.9607843137254903e-05, + "loss": 1.0812, + "step": 50 + }, + { + "epoch": 0.12, + "learning_rate": 2e-05, + "loss": 1.0436, + "step": 51 + }, + { + "epoch": 0.12, + "learning_rate": 1.9987782529016497e-05, + "loss": 1.275, + "step": 52 + }, + { + "epoch": 0.13, + "learning_rate": 1.997556505803299e-05, + "loss": 1.1309, + "step": 53 + }, + { + "epoch": 0.13, + "learning_rate": 1.9963347587049484e-05, + "loss": 1.0838, + "step": 54 + }, + { + "epoch": 0.13, + "learning_rate": 1.9951130116065975e-05, + "loss": 1.0861, + "step": 55 + }, + { + "epoch": 0.13, + "learning_rate": 1.993891264508247e-05, + "loss": 0.9737, + "step": 56 + }, + { + "epoch": 0.13, + "learning_rate": 1.9926695174098962e-05, + "loss": 0.9726, + "step": 57 + }, + { + "epoch": 0.14, + "learning_rate": 1.9914477703115457e-05, + "loss": 1.1044, + "step": 58 + }, + { + "epoch": 0.14, + "learning_rate": 1.9902260232131952e-05, + "loss": 1.2198, + "step": 59 + }, + { + "epoch": 0.14, + "learning_rate": 1.9890042761148444e-05, + "loss": 1.2454, + "step": 60 + }, + { + "epoch": 0.14, + "learning_rate": 1.987782529016494e-05, + "loss": 1.0945, + "step": 61 + }, + { + "epoch": 0.15, + "learning_rate": 1.986560781918143e-05, + "loss": 1.2243, + "step": 62 + }, + { + "epoch": 0.15, + "learning_rate": 1.9853390348197926e-05, + "loss": 1.0895, + "step": 63 + }, + { + "epoch": 0.15, + "learning_rate": 1.9841172877214418e-05, + "loss": 1.3153, + "step": 64 + }, + { + "epoch": 0.15, + "learning_rate": 1.9828955406230913e-05, + "loss": 1.3891, + "step": 65 + }, + { + "epoch": 0.16, + "learning_rate": 1.9816737935247404e-05, + "loss": 1.1219, + "step": 66 + }, + { + "epoch": 0.16, + "learning_rate": 1.98045204642639e-05, + "loss": 1.0493, + "step": 67 + }, + { + "epoch": 0.16, + "learning_rate": 1.979230299328039e-05, + "loss": 1.0645, + "step": 68 + }, + { + "epoch": 0.16, + "learning_rate": 1.9780085522296886e-05, + "loss": 1.3425, + "step": 69 + }, + { + "epoch": 0.17, + "learning_rate": 1.9767868051313378e-05, + "loss": 1.8974, + "step": 70 + }, + { + "epoch": 0.17, + "learning_rate": 1.9755650580329873e-05, + "loss": 1.0191, + "step": 71 + }, + { + "epoch": 0.17, + "learning_rate": 1.9743433109346365e-05, + "loss": 1.1784, + "step": 72 + }, + { + "epoch": 0.17, + "learning_rate": 1.973121563836286e-05, + "loss": 1.0353, + "step": 73 + }, + { + "epoch": 0.17, + "learning_rate": 1.9718998167379355e-05, + "loss": 1.2755, + "step": 74 + }, + { + "epoch": 0.18, + "learning_rate": 1.970678069639585e-05, + "loss": 0.9851, + "step": 75 + }, + { + "epoch": 0.18, + "learning_rate": 1.9694563225412342e-05, + "loss": 1.2396, + "step": 76 + }, + { + "epoch": 0.18, + "learning_rate": 1.9682345754428837e-05, + "loss": 1.2519, + "step": 77 + }, + { + "epoch": 0.18, + "learning_rate": 1.967012828344533e-05, + "loss": 0.9313, + "step": 78 + }, + { + "epoch": 0.19, + "learning_rate": 1.9657910812461824e-05, + "loss": 1.1745, + "step": 79 + }, + { + "epoch": 0.19, + "learning_rate": 1.9645693341478315e-05, + "loss": 1.0521, + "step": 80 + }, + { + "epoch": 0.19, + "learning_rate": 1.963347587049481e-05, + "loss": 0.9781, + "step": 81 + }, + { + "epoch": 0.19, + "learning_rate": 1.9621258399511302e-05, + "loss": 1.0292, + "step": 82 + }, + { + "epoch": 0.2, + "learning_rate": 1.9609040928527797e-05, + "loss": 0.851, + "step": 83 + }, + { + "epoch": 0.2, + "learning_rate": 1.959682345754429e-05, + "loss": 1.1486, + "step": 84 + }, + { + "epoch": 0.2, + "learning_rate": 1.9584605986560784e-05, + "loss": 1.0732, + "step": 85 + }, + { + "epoch": 0.2, + "learning_rate": 1.9572388515577276e-05, + "loss": 1.0466, + "step": 86 + }, + { + "epoch": 0.21, + "learning_rate": 1.956017104459377e-05, + "loss": 0.9275, + "step": 87 + }, + { + "epoch": 0.21, + "learning_rate": 1.9547953573610263e-05, + "loss": 1.7509, + "step": 88 + }, + { + "epoch": 0.21, + "learning_rate": 1.9535736102626758e-05, + "loss": 1.3038, + "step": 89 + }, + { + "epoch": 0.21, + "learning_rate": 1.9523518631643253e-05, + "loss": 0.8664, + "step": 90 + }, + { + "epoch": 0.22, + "learning_rate": 1.9511301160659744e-05, + "loss": 1.1555, + "step": 91 + }, + { + "epoch": 0.22, + "learning_rate": 1.949908368967624e-05, + "loss": 1.1517, + "step": 92 + }, + { + "epoch": 0.22, + "learning_rate": 1.948686621869273e-05, + "loss": 1.0223, + "step": 93 + }, + { + "epoch": 0.22, + "learning_rate": 1.9474648747709226e-05, + "loss": 1.0649, + "step": 94 + }, + { + "epoch": 0.22, + "learning_rate": 1.9462431276725718e-05, + "loss": 1.0147, + "step": 95 + }, + { + "epoch": 0.23, + "learning_rate": 1.9450213805742213e-05, + "loss": 1.3624, + "step": 96 + }, + { + "epoch": 0.23, + "learning_rate": 1.9437996334758705e-05, + "loss": 1.1294, + "step": 97 + }, + { + "epoch": 0.23, + "learning_rate": 1.94257788637752e-05, + "loss": 1.1903, + "step": 98 + }, + { + "epoch": 0.23, + "learning_rate": 1.941356139279169e-05, + "loss": 0.8732, + "step": 99 + }, + { + "epoch": 0.24, + "learning_rate": 1.9401343921808187e-05, + "loss": 1.1793, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 1.938912645082468e-05, + "loss": 0.9008, + "step": 101 + }, + { + "epoch": 0.24, + "learning_rate": 1.9376908979841174e-05, + "loss": 1.2374, + "step": 102 + }, + { + "epoch": 0.24, + "learning_rate": 1.936469150885767e-05, + "loss": 0.9163, + "step": 103 + }, + { + "epoch": 0.25, + "learning_rate": 1.9352474037874164e-05, + "loss": 0.9702, + "step": 104 + }, + { + "epoch": 0.25, + "learning_rate": 1.9340256566890655e-05, + "loss": 1.0813, + "step": 105 + }, + { + "epoch": 0.25, + "learning_rate": 1.932803909590715e-05, + "loss": 1.1164, + "step": 106 + }, + { + "epoch": 0.25, + "learning_rate": 1.9315821624923642e-05, + "loss": 0.9403, + "step": 107 + }, + { + "epoch": 0.26, + "learning_rate": 1.9303604153940137e-05, + "loss": 1.074, + "step": 108 + }, + { + "epoch": 0.26, + "learning_rate": 1.929138668295663e-05, + "loss": 1.1077, + "step": 109 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279169211973124e-05, + "loss": 1.0153, + "step": 110 + }, + { + "epoch": 0.26, + "learning_rate": 1.9266951740989616e-05, + "loss": 1.0281, + "step": 111 + }, + { + "epoch": 0.26, + "learning_rate": 1.925473427000611e-05, + "loss": 0.9075, + "step": 112 + }, + { + "epoch": 0.27, + "learning_rate": 1.9242516799022603e-05, + "loss": 1.2191, + "step": 113 + }, + { + "epoch": 0.27, + "learning_rate": 1.9230299328039098e-05, + "loss": 1.0649, + "step": 114 + }, + { + "epoch": 0.27, + "learning_rate": 1.921808185705559e-05, + "loss": 0.9901, + "step": 115 + }, + { + "epoch": 0.27, + "learning_rate": 1.9205864386072085e-05, + "loss": 1.0329, + "step": 116 + }, + { + "epoch": 0.28, + "learning_rate": 1.9193646915088576e-05, + "loss": 1.4406, + "step": 117 + }, + { + "epoch": 0.28, + "learning_rate": 1.918142944410507e-05, + "loss": 1.138, + "step": 118 + }, + { + "epoch": 0.28, + "learning_rate": 1.9169211973121567e-05, + "loss": 1.1755, + "step": 119 + }, + { + "epoch": 0.28, + "learning_rate": 1.9156994502138058e-05, + "loss": 0.9568, + "step": 120 + }, + { + "epoch": 0.29, + "learning_rate": 1.9144777031154553e-05, + "loss": 1.0204, + "step": 121 + }, + { + "epoch": 0.29, + "learning_rate": 1.913255956017105e-05, + "loss": 0.9391, + "step": 122 + }, + { + "epoch": 0.29, + "learning_rate": 1.912034208918754e-05, + "loss": 1.109, + "step": 123 + }, + { + "epoch": 0.29, + "learning_rate": 1.9108124618204035e-05, + "loss": 1.2183, + "step": 124 + }, + { + "epoch": 0.3, + "learning_rate": 1.9095907147220527e-05, + "loss": 0.9377, + "step": 125 + }, + { + "epoch": 0.3, + "learning_rate": 1.9083689676237022e-05, + "loss": 1.0865, + "step": 126 + }, + { + "epoch": 0.3, + "learning_rate": 1.9071472205253514e-05, + "loss": 1.2078, + "step": 127 + }, + { + "epoch": 0.3, + "learning_rate": 1.905925473427001e-05, + "loss": 1.0149, + "step": 128 + }, + { + "epoch": 0.31, + "learning_rate": 1.90470372632865e-05, + "loss": 1.1191, + "step": 129 + }, + { + "epoch": 0.31, + "learning_rate": 1.9034819792302996e-05, + "loss": 1.0933, + "step": 130 + }, + { + "epoch": 0.31, + "learning_rate": 1.9022602321319487e-05, + "loss": 0.9312, + "step": 131 + }, + { + "epoch": 0.31, + "learning_rate": 1.9010384850335982e-05, + "loss": 0.794, + "step": 132 + }, + { + "epoch": 0.31, + "learning_rate": 1.8998167379352474e-05, + "loss": 0.8745, + "step": 133 + }, + { + "epoch": 0.32, + "learning_rate": 1.898594990836897e-05, + "loss": 0.9092, + "step": 134 + }, + { + "epoch": 0.32, + "learning_rate": 1.8973732437385464e-05, + "loss": 0.7504, + "step": 135 + }, + { + "epoch": 0.32, + "learning_rate": 1.8961514966401956e-05, + "loss": 1.2165, + "step": 136 + }, + { + "epoch": 0.32, + "learning_rate": 1.894929749541845e-05, + "loss": 0.9727, + "step": 137 + }, + { + "epoch": 0.33, + "learning_rate": 1.8937080024434943e-05, + "loss": 1.0236, + "step": 138 + }, + { + "epoch": 0.33, + "learning_rate": 1.8924862553451438e-05, + "loss": 0.9628, + "step": 139 + }, + { + "epoch": 0.33, + "learning_rate": 1.891264508246793e-05, + "loss": 0.8762, + "step": 140 + }, + { + "epoch": 0.33, + "learning_rate": 1.8900427611484425e-05, + "loss": 1.0712, + "step": 141 + }, + { + "epoch": 0.34, + "learning_rate": 1.8888210140500916e-05, + "loss": 0.7999, + "step": 142 + }, + { + "epoch": 0.34, + "learning_rate": 1.887599266951741e-05, + "loss": 0.9812, + "step": 143 + }, + { + "epoch": 0.34, + "learning_rate": 1.8863775198533903e-05, + "loss": 1.8394, + "step": 144 + }, + { + "epoch": 0.34, + "learning_rate": 1.88515577275504e-05, + "loss": 0.9033, + "step": 145 + }, + { + "epoch": 0.35, + "learning_rate": 1.883934025656689e-05, + "loss": 0.9813, + "step": 146 + }, + { + "epoch": 0.35, + "learning_rate": 1.8827122785583385e-05, + "loss": 0.7722, + "step": 147 + }, + { + "epoch": 0.35, + "learning_rate": 1.8814905314599877e-05, + "loss": 0.9991, + "step": 148 + }, + { + "epoch": 0.35, + "learning_rate": 1.8802687843616375e-05, + "loss": 0.9531, + "step": 149 + }, + { + "epoch": 0.35, + "learning_rate": 1.8790470372632867e-05, + "loss": 0.9034, + "step": 150 + }, + { + "epoch": 0.36, + "learning_rate": 1.8778252901649362e-05, + "loss": 0.8741, + "step": 151 + }, + { + "epoch": 0.36, + "learning_rate": 1.8766035430665854e-05, + "loss": 1.1228, + "step": 152 + }, + { + "epoch": 0.36, + "learning_rate": 1.875381795968235e-05, + "loss": 0.7849, + "step": 153 + }, + { + "epoch": 0.36, + "learning_rate": 1.874160048869884e-05, + "loss": 0.9642, + "step": 154 + }, + { + "epoch": 0.37, + "learning_rate": 1.8729383017715336e-05, + "loss": 1.0823, + "step": 155 + }, + { + "epoch": 0.37, + "learning_rate": 1.8717165546731827e-05, + "loss": 1.2156, + "step": 156 + }, + { + "epoch": 0.37, + "learning_rate": 1.8704948075748323e-05, + "loss": 1.4682, + "step": 157 + }, + { + "epoch": 0.37, + "learning_rate": 1.8692730604764814e-05, + "loss": 1.2043, + "step": 158 + }, + { + "epoch": 0.38, + "learning_rate": 1.868051313378131e-05, + "loss": 0.9802, + "step": 159 + }, + { + "epoch": 0.38, + "learning_rate": 1.86682956627978e-05, + "loss": 1.1539, + "step": 160 + }, + { + "epoch": 0.38, + "learning_rate": 1.8656078191814296e-05, + "loss": 1.1245, + "step": 161 + }, + { + "epoch": 0.38, + "learning_rate": 1.8643860720830788e-05, + "loss": 1.099, + "step": 162 + }, + { + "epoch": 0.39, + "learning_rate": 1.8631643249847283e-05, + "loss": 1.0272, + "step": 163 + }, + { + "epoch": 0.39, + "learning_rate": 1.8619425778863778e-05, + "loss": 1.0072, + "step": 164 + }, + { + "epoch": 0.39, + "learning_rate": 1.860720830788027e-05, + "loss": 1.0215, + "step": 165 + }, + { + "epoch": 0.39, + "learning_rate": 1.8594990836896765e-05, + "loss": 1.0221, + "step": 166 + }, + { + "epoch": 0.39, + "learning_rate": 1.8582773365913257e-05, + "loss": 1.0754, + "step": 167 + }, + { + "epoch": 0.4, + "learning_rate": 1.857055589492975e-05, + "loss": 0.9724, + "step": 168 + }, + { + "epoch": 0.4, + "learning_rate": 1.8558338423946243e-05, + "loss": 1.031, + "step": 169 + }, + { + "epoch": 0.4, + "learning_rate": 1.854612095296274e-05, + "loss": 1.157, + "step": 170 + }, + { + "epoch": 0.4, + "learning_rate": 1.853390348197923e-05, + "loss": 0.9819, + "step": 171 + }, + { + "epoch": 0.41, + "learning_rate": 1.8521686010995725e-05, + "loss": 1.1156, + "step": 172 + }, + { + "epoch": 0.41, + "learning_rate": 1.850946854001222e-05, + "loss": 1.0699, + "step": 173 + }, + { + "epoch": 0.41, + "learning_rate": 1.8497251069028712e-05, + "loss": 1.1092, + "step": 174 + }, + { + "epoch": 0.41, + "learning_rate": 1.8485033598045207e-05, + "loss": 1.0092, + "step": 175 + }, + { + "epoch": 0.42, + "learning_rate": 1.84728161270617e-05, + "loss": 1.0403, + "step": 176 + }, + { + "epoch": 0.42, + "learning_rate": 1.8460598656078194e-05, + "loss": 0.8809, + "step": 177 + }, + { + "epoch": 0.42, + "learning_rate": 1.8448381185094686e-05, + "loss": 1.0139, + "step": 178 + }, + { + "epoch": 0.42, + "learning_rate": 1.843616371411118e-05, + "loss": 1.0701, + "step": 179 + }, + { + "epoch": 0.43, + "learning_rate": 1.8423946243127676e-05, + "loss": 0.9199, + "step": 180 + }, + { + "epoch": 0.43, + "learning_rate": 1.8411728772144168e-05, + "loss": 0.8542, + "step": 181 + }, + { + "epoch": 0.43, + "learning_rate": 1.8399511301160663e-05, + "loss": 1.0369, + "step": 182 + }, + { + "epoch": 0.43, + "learning_rate": 1.8387293830177154e-05, + "loss": 0.9847, + "step": 183 + }, + { + "epoch": 0.44, + "learning_rate": 1.837507635919365e-05, + "loss": 0.8046, + "step": 184 + }, + { + "epoch": 0.44, + "learning_rate": 1.836285888821014e-05, + "loss": 1.2821, + "step": 185 + }, + { + "epoch": 0.44, + "learning_rate": 1.8350641417226636e-05, + "loss": 1.0127, + "step": 186 + }, + { + "epoch": 0.44, + "learning_rate": 1.8338423946243128e-05, + "loss": 0.8535, + "step": 187 + }, + { + "epoch": 0.44, + "learning_rate": 1.8326206475259623e-05, + "loss": 0.984, + "step": 188 + }, + { + "epoch": 0.45, + "learning_rate": 1.8313989004276115e-05, + "loss": 0.771, + "step": 189 + }, + { + "epoch": 0.45, + "learning_rate": 1.830177153329261e-05, + "loss": 2.1482, + "step": 190 + }, + { + "epoch": 0.45, + "learning_rate": 1.82895540623091e-05, + "loss": 0.7799, + "step": 191 + }, + { + "epoch": 0.45, + "learning_rate": 1.8277336591325597e-05, + "loss": 1.1474, + "step": 192 + }, + { + "epoch": 0.46, + "learning_rate": 1.826511912034209e-05, + "loss": 1.0967, + "step": 193 + }, + { + "epoch": 0.46, + "learning_rate": 1.8252901649358587e-05, + "loss": 0.8824, + "step": 194 + }, + { + "epoch": 0.46, + "learning_rate": 1.824068417837508e-05, + "loss": 0.9202, + "step": 195 + }, + { + "epoch": 0.46, + "learning_rate": 1.8228466707391574e-05, + "loss": 0.8055, + "step": 196 + }, + { + "epoch": 0.47, + "learning_rate": 1.8216249236408065e-05, + "loss": 0.7309, + "step": 197 + }, + { + "epoch": 0.47, + "learning_rate": 1.820403176542456e-05, + "loss": 1.1121, + "step": 198 + }, + { + "epoch": 0.47, + "learning_rate": 1.8191814294441052e-05, + "loss": 1.5694, + "step": 199 + }, + { + "epoch": 0.47, + "learning_rate": 1.8179596823457547e-05, + "loss": 1.2677, + "step": 200 + }, + { + "epoch": 0.48, + "learning_rate": 1.816737935247404e-05, + "loss": 1.1242, + "step": 201 + }, + { + "epoch": 0.48, + "learning_rate": 1.8155161881490534e-05, + "loss": 0.8419, + "step": 202 + }, + { + "epoch": 0.48, + "learning_rate": 1.8142944410507026e-05, + "loss": 1.1399, + "step": 203 + }, + { + "epoch": 0.48, + "learning_rate": 1.813072693952352e-05, + "loss": 0.7964, + "step": 204 + }, + { + "epoch": 0.48, + "learning_rate": 1.8118509468540013e-05, + "loss": 0.9321, + "step": 205 + }, + { + "epoch": 0.49, + "learning_rate": 1.8106291997556508e-05, + "loss": 0.8461, + "step": 206 + }, + { + "epoch": 0.49, + "learning_rate": 1.8094074526573e-05, + "loss": 1.0457, + "step": 207 + }, + { + "epoch": 0.49, + "learning_rate": 1.8081857055589494e-05, + "loss": 1.085, + "step": 208 + }, + { + "epoch": 0.49, + "learning_rate": 1.806963958460599e-05, + "loss": 0.9105, + "step": 209 + }, + { + "epoch": 0.5, + "learning_rate": 1.805742211362248e-05, + "loss": 0.9143, + "step": 210 + }, + { + "epoch": 0.5, + "learning_rate": 1.8045204642638976e-05, + "loss": 1.0978, + "step": 211 + }, + { + "epoch": 0.5, + "learning_rate": 1.8032987171655468e-05, + "loss": 0.9884, + "step": 212 + }, + { + "epoch": 0.5, + "learning_rate": 1.8020769700671963e-05, + "loss": 1.0179, + "step": 213 + }, + { + "epoch": 0.51, + "learning_rate": 1.8008552229688455e-05, + "loss": 1.2135, + "step": 214 + }, + { + "epoch": 0.51, + "learning_rate": 1.799633475870495e-05, + "loss": 0.9051, + "step": 215 + }, + { + "epoch": 0.51, + "learning_rate": 1.7984117287721442e-05, + "loss": 0.8954, + "step": 216 + }, + { + "epoch": 0.51, + "learning_rate": 1.7971899816737937e-05, + "loss": 1.1429, + "step": 217 + }, + { + "epoch": 0.52, + "learning_rate": 1.795968234575443e-05, + "loss": 1.0803, + "step": 218 + }, + { + "epoch": 0.52, + "learning_rate": 1.7947464874770924e-05, + "loss": 1.0558, + "step": 219 + }, + { + "epoch": 0.52, + "learning_rate": 1.7935247403787415e-05, + "loss": 1.0904, + "step": 220 + }, + { + "epoch": 0.52, + "learning_rate": 1.792302993280391e-05, + "loss": 1.0909, + "step": 221 + }, + { + "epoch": 0.52, + "learning_rate": 1.7910812461820402e-05, + "loss": 1.1515, + "step": 222 + }, + { + "epoch": 0.53, + "learning_rate": 1.7898594990836897e-05, + "loss": 1.0393, + "step": 223 + }, + { + "epoch": 0.53, + "learning_rate": 1.7886377519853392e-05, + "loss": 1.0072, + "step": 224 + }, + { + "epoch": 0.53, + "learning_rate": 1.7874160048869887e-05, + "loss": 0.8387, + "step": 225 + }, + { + "epoch": 0.53, + "learning_rate": 1.786194257788638e-05, + "loss": 0.8908, + "step": 226 + }, + { + "epoch": 0.54, + "learning_rate": 1.7849725106902874e-05, + "loss": 0.875, + "step": 227 + }, + { + "epoch": 0.54, + "learning_rate": 1.7837507635919366e-05, + "loss": 1.0477, + "step": 228 + }, + { + "epoch": 0.54, + "learning_rate": 1.782529016493586e-05, + "loss": 1.0117, + "step": 229 + }, + { + "epoch": 0.54, + "learning_rate": 1.7813072693952353e-05, + "loss": 1.1684, + "step": 230 + }, + { + "epoch": 0.55, + "learning_rate": 1.7800855222968848e-05, + "loss": 1.1475, + "step": 231 + }, + { + "epoch": 0.55, + "learning_rate": 1.778863775198534e-05, + "loss": 1.2069, + "step": 232 + }, + { + "epoch": 0.55, + "learning_rate": 1.7776420281001835e-05, + "loss": 1.1107, + "step": 233 + }, + { + "epoch": 0.55, + "learning_rate": 1.7764202810018326e-05, + "loss": 0.9738, + "step": 234 + }, + { + "epoch": 0.56, + "learning_rate": 1.775198533903482e-05, + "loss": 0.8838, + "step": 235 + }, + { + "epoch": 0.56, + "learning_rate": 1.7739767868051313e-05, + "loss": 1.0103, + "step": 236 + }, + { + "epoch": 0.56, + "learning_rate": 1.7727550397067808e-05, + "loss": 0.9279, + "step": 237 + }, + { + "epoch": 0.56, + "learning_rate": 1.77153329260843e-05, + "loss": 0.9682, + "step": 238 + }, + { + "epoch": 0.57, + "learning_rate": 1.7703115455100795e-05, + "loss": 1.2049, + "step": 239 + }, + { + "epoch": 0.57, + "learning_rate": 1.769089798411729e-05, + "loss": 1.0387, + "step": 240 + }, + { + "epoch": 0.57, + "learning_rate": 1.7678680513133785e-05, + "loss": 0.9754, + "step": 241 + }, + { + "epoch": 0.57, + "learning_rate": 1.7666463042150277e-05, + "loss": 1.0202, + "step": 242 + }, + { + "epoch": 0.57, + "learning_rate": 1.7654245571166772e-05, + "loss": 1.0224, + "step": 243 + }, + { + "epoch": 0.58, + "learning_rate": 1.7642028100183264e-05, + "loss": 0.8577, + "step": 244 + }, + { + "epoch": 0.58, + "learning_rate": 1.762981062919976e-05, + "loss": 0.9112, + "step": 245 + }, + { + "epoch": 0.58, + "learning_rate": 1.761759315821625e-05, + "loss": 0.9395, + "step": 246 + }, + { + "epoch": 0.58, + "learning_rate": 1.7605375687232746e-05, + "loss": 1.1198, + "step": 247 + }, + { + "epoch": 0.59, + "learning_rate": 1.7593158216249237e-05, + "loss": 1.4934, + "step": 248 + }, + { + "epoch": 0.59, + "learning_rate": 1.7580940745265732e-05, + "loss": 1.0408, + "step": 249 + }, + { + "epoch": 0.59, + "learning_rate": 1.7568723274282224e-05, + "loss": 0.9475, + "step": 250 + }, + { + "epoch": 0.59, + "learning_rate": 1.755650580329872e-05, + "loss": 1.1368, + "step": 251 + }, + { + "epoch": 0.6, + "learning_rate": 1.754428833231521e-05, + "loss": 0.9984, + "step": 252 + }, + { + "epoch": 0.6, + "learning_rate": 1.7532070861331706e-05, + "loss": 1.1552, + "step": 253 + }, + { + "epoch": 0.6, + "learning_rate": 1.75198533903482e-05, + "loss": 1.0785, + "step": 254 + }, + { + "epoch": 0.6, + "learning_rate": 1.7507635919364693e-05, + "loss": 0.8089, + "step": 255 + }, + { + "epoch": 0.61, + "learning_rate": 1.7495418448381188e-05, + "loss": 1.0439, + "step": 256 + }, + { + "epoch": 0.61, + "learning_rate": 1.748320097739768e-05, + "loss": 0.7603, + "step": 257 + }, + { + "epoch": 0.61, + "learning_rate": 1.7470983506414175e-05, + "loss": 0.8235, + "step": 258 + }, + { + "epoch": 0.61, + "learning_rate": 1.7458766035430666e-05, + "loss": 1.7352, + "step": 259 + }, + { + "epoch": 0.61, + "learning_rate": 1.744654856444716e-05, + "loss": 0.9731, + "step": 260 + }, + { + "epoch": 0.62, + "learning_rate": 1.7434331093463653e-05, + "loss": 0.8848, + "step": 261 + }, + { + "epoch": 0.62, + "learning_rate": 1.742211362248015e-05, + "loss": 0.8874, + "step": 262 + }, + { + "epoch": 0.62, + "learning_rate": 1.740989615149664e-05, + "loss": 1.0125, + "step": 263 + }, + { + "epoch": 0.62, + "learning_rate": 1.7397678680513135e-05, + "loss": 1.0552, + "step": 264 + }, + { + "epoch": 0.63, + "learning_rate": 1.7385461209529627e-05, + "loss": 0.9488, + "step": 265 + }, + { + "epoch": 0.63, + "learning_rate": 1.7373243738546122e-05, + "loss": 0.794, + "step": 266 + }, + { + "epoch": 0.63, + "learning_rate": 1.7361026267562614e-05, + "loss": 1.0277, + "step": 267 + }, + { + "epoch": 0.63, + "learning_rate": 1.734880879657911e-05, + "loss": 0.835, + "step": 268 + }, + { + "epoch": 0.64, + "learning_rate": 1.7336591325595604e-05, + "loss": 1.2321, + "step": 269 + }, + { + "epoch": 0.64, + "learning_rate": 1.73243738546121e-05, + "loss": 0.9804, + "step": 270 + }, + { + "epoch": 0.64, + "learning_rate": 1.731215638362859e-05, + "loss": 1.1228, + "step": 271 + }, + { + "epoch": 0.64, + "learning_rate": 1.7299938912645086e-05, + "loss": 1.1392, + "step": 272 + }, + { + "epoch": 0.65, + "learning_rate": 1.7287721441661577e-05, + "loss": 0.8711, + "step": 273 + }, + { + "epoch": 0.65, + "learning_rate": 1.7275503970678073e-05, + "loss": 0.9201, + "step": 274 + }, + { + "epoch": 0.65, + "learning_rate": 1.7263286499694564e-05, + "loss": 1.008, + "step": 275 + }, + { + "epoch": 0.65, + "learning_rate": 1.725106902871106e-05, + "loss": 1.0344, + "step": 276 + }, + { + "epoch": 0.65, + "learning_rate": 1.723885155772755e-05, + "loss": 0.9427, + "step": 277 + }, + { + "epoch": 0.66, + "learning_rate": 1.7226634086744046e-05, + "loss": 0.8963, + "step": 278 + }, + { + "epoch": 0.66, + "learning_rate": 1.7214416615760538e-05, + "loss": 1.3487, + "step": 279 + }, + { + "epoch": 0.66, + "learning_rate": 1.7202199144777033e-05, + "loss": 1.0002, + "step": 280 + }, + { + "epoch": 0.66, + "learning_rate": 1.7189981673793525e-05, + "loss": 0.961, + "step": 281 + }, + { + "epoch": 0.67, + "learning_rate": 1.717776420281002e-05, + "loss": 0.9015, + "step": 282 + }, + { + "epoch": 0.67, + "learning_rate": 1.716554673182651e-05, + "loss": 0.9172, + "step": 283 + }, + { + "epoch": 0.67, + "learning_rate": 1.7153329260843007e-05, + "loss": 0.9849, + "step": 284 + }, + { + "epoch": 0.67, + "learning_rate": 1.71411117898595e-05, + "loss": 0.9166, + "step": 285 + }, + { + "epoch": 0.68, + "learning_rate": 1.7128894318875993e-05, + "loss": 1.0685, + "step": 286 + }, + { + "epoch": 0.68, + "learning_rate": 1.711667684789249e-05, + "loss": 1.0196, + "step": 287 + }, + { + "epoch": 0.68, + "learning_rate": 1.710445937690898e-05, + "loss": 0.8409, + "step": 288 + }, + { + "epoch": 0.68, + "learning_rate": 1.7092241905925475e-05, + "loss": 0.9327, + "step": 289 + }, + { + "epoch": 0.69, + "learning_rate": 1.7080024434941967e-05, + "loss": 0.8675, + "step": 290 + }, + { + "epoch": 0.69, + "learning_rate": 1.7067806963958462e-05, + "loss": 1.1109, + "step": 291 + }, + { + "epoch": 0.69, + "learning_rate": 1.7055589492974954e-05, + "loss": 0.8427, + "step": 292 + }, + { + "epoch": 0.69, + "learning_rate": 1.704337202199145e-05, + "loss": 1.1322, + "step": 293 + }, + { + "epoch": 0.7, + "learning_rate": 1.7031154551007944e-05, + "loss": 0.9094, + "step": 294 + }, + { + "epoch": 0.7, + "learning_rate": 1.7018937080024436e-05, + "loss": 1.1168, + "step": 295 + }, + { + "epoch": 0.7, + "learning_rate": 1.700671960904093e-05, + "loss": 0.9982, + "step": 296 + }, + { + "epoch": 0.7, + "learning_rate": 1.6994502138057422e-05, + "loss": 0.7812, + "step": 297 + }, + { + "epoch": 0.7, + "learning_rate": 1.6982284667073918e-05, + "loss": 0.8858, + "step": 298 + }, + { + "epoch": 0.71, + "learning_rate": 1.6970067196090413e-05, + "loss": 0.9693, + "step": 299 + }, + { + "epoch": 0.71, + "learning_rate": 1.6957849725106904e-05, + "loss": 0.9097, + "step": 300 + }, + { + "epoch": 0.71, + "learning_rate": 1.69456322541234e-05, + "loss": 0.9178, + "step": 301 + }, + { + "epoch": 0.71, + "learning_rate": 1.693341478313989e-05, + "loss": 1.0581, + "step": 302 + }, + { + "epoch": 0.72, + "learning_rate": 1.6921197312156386e-05, + "loss": 1.1521, + "step": 303 + }, + { + "epoch": 0.72, + "learning_rate": 1.6908979841172878e-05, + "loss": 1.065, + "step": 304 + }, + { + "epoch": 0.72, + "learning_rate": 1.6896762370189373e-05, + "loss": 0.8954, + "step": 305 + }, + { + "epoch": 0.72, + "learning_rate": 1.6884544899205865e-05, + "loss": 0.7037, + "step": 306 + }, + { + "epoch": 0.73, + "learning_rate": 1.687232742822236e-05, + "loss": 0.9347, + "step": 307 + }, + { + "epoch": 0.73, + "learning_rate": 1.686010995723885e-05, + "loss": 0.9681, + "step": 308 + }, + { + "epoch": 0.73, + "learning_rate": 1.6847892486255347e-05, + "loss": 0.8554, + "step": 309 + }, + { + "epoch": 0.73, + "learning_rate": 1.683567501527184e-05, + "loss": 0.8793, + "step": 310 + }, + { + "epoch": 0.74, + "learning_rate": 1.6823457544288334e-05, + "loss": 0.8881, + "step": 311 + }, + { + "epoch": 0.74, + "learning_rate": 1.6811240073304825e-05, + "loss": 0.9284, + "step": 312 + }, + { + "epoch": 0.74, + "learning_rate": 1.679902260232132e-05, + "loss": 0.9632, + "step": 313 + }, + { + "epoch": 0.74, + "learning_rate": 1.6786805131337815e-05, + "loss": 1.0815, + "step": 314 + }, + { + "epoch": 0.74, + "learning_rate": 1.677458766035431e-05, + "loss": 0.9314, + "step": 315 + }, + { + "epoch": 0.75, + "learning_rate": 1.6762370189370802e-05, + "loss": 0.7965, + "step": 316 + }, + { + "epoch": 0.75, + "learning_rate": 1.6750152718387297e-05, + "loss": 0.8365, + "step": 317 + }, + { + "epoch": 0.75, + "learning_rate": 1.673793524740379e-05, + "loss": 1.2019, + "step": 318 + }, + { + "epoch": 0.75, + "learning_rate": 1.6725717776420284e-05, + "loss": 0.9525, + "step": 319 + }, + { + "epoch": 0.76, + "learning_rate": 1.6713500305436776e-05, + "loss": 0.7911, + "step": 320 + }, + { + "epoch": 0.76, + "learning_rate": 1.670128283445327e-05, + "loss": 0.9614, + "step": 321 + }, + { + "epoch": 0.76, + "learning_rate": 1.6689065363469763e-05, + "loss": 1.0212, + "step": 322 + }, + { + "epoch": 0.76, + "learning_rate": 1.6676847892486258e-05, + "loss": 1.128, + "step": 323 + }, + { + "epoch": 0.77, + "learning_rate": 1.666463042150275e-05, + "loss": 0.9668, + "step": 324 + }, + { + "epoch": 0.77, + "learning_rate": 1.6652412950519245e-05, + "loss": 0.9095, + "step": 325 + }, + { + "epoch": 0.77, + "learning_rate": 1.6640195479535736e-05, + "loss": 0.8985, + "step": 326 + }, + { + "epoch": 0.77, + "learning_rate": 1.662797800855223e-05, + "loss": 0.8876, + "step": 327 + }, + { + "epoch": 0.78, + "learning_rate": 1.6615760537568723e-05, + "loss": 0.9104, + "step": 328 + }, + { + "epoch": 0.78, + "learning_rate": 1.6603543066585218e-05, + "loss": 0.9719, + "step": 329 + }, + { + "epoch": 0.78, + "learning_rate": 1.6591325595601713e-05, + "loss": 1.2485, + "step": 330 + }, + { + "epoch": 0.78, + "learning_rate": 1.6579108124618205e-05, + "loss": 0.8247, + "step": 331 + }, + { + "epoch": 0.79, + "learning_rate": 1.65668906536347e-05, + "loss": 1.1209, + "step": 332 + }, + { + "epoch": 0.79, + "learning_rate": 1.6554673182651192e-05, + "loss": 1.0325, + "step": 333 + }, + { + "epoch": 0.79, + "learning_rate": 1.6542455711667687e-05, + "loss": 1.0139, + "step": 334 + }, + { + "epoch": 0.79, + "learning_rate": 1.653023824068418e-05, + "loss": 1.0233, + "step": 335 + }, + { + "epoch": 0.79, + "learning_rate": 1.6518020769700674e-05, + "loss": 0.8545, + "step": 336 + }, + { + "epoch": 0.8, + "learning_rate": 1.6505803298717165e-05, + "loss": 1.2186, + "step": 337 + }, + { + "epoch": 0.8, + "learning_rate": 1.649358582773366e-05, + "loss": 0.9952, + "step": 338 + }, + { + "epoch": 0.8, + "learning_rate": 1.6481368356750152e-05, + "loss": 1.2843, + "step": 339 + }, + { + "epoch": 0.8, + "learning_rate": 1.6469150885766647e-05, + "loss": 1.2914, + "step": 340 + }, + { + "epoch": 0.81, + "learning_rate": 1.645693341478314e-05, + "loss": 0.7621, + "step": 341 + }, + { + "epoch": 0.81, + "learning_rate": 1.6444715943799634e-05, + "loss": 1.0924, + "step": 342 + }, + { + "epoch": 0.81, + "learning_rate": 1.6432498472816126e-05, + "loss": 1.1418, + "step": 343 + }, + { + "epoch": 0.81, + "learning_rate": 1.6420281001832624e-05, + "loss": 0.9116, + "step": 344 + }, + { + "epoch": 0.82, + "learning_rate": 1.6408063530849116e-05, + "loss": 1.7392, + "step": 345 + }, + { + "epoch": 0.82, + "learning_rate": 1.639584605986561e-05, + "loss": 0.7273, + "step": 346 + }, + { + "epoch": 0.82, + "learning_rate": 1.6383628588882103e-05, + "loss": 0.9469, + "step": 347 + }, + { + "epoch": 0.82, + "learning_rate": 1.6371411117898598e-05, + "loss": 0.7986, + "step": 348 + }, + { + "epoch": 0.83, + "learning_rate": 1.635919364691509e-05, + "loss": 0.8359, + "step": 349 + }, + { + "epoch": 0.83, + "learning_rate": 1.6346976175931585e-05, + "loss": 0.7852, + "step": 350 + }, + { + "epoch": 0.83, + "learning_rate": 1.6334758704948076e-05, + "loss": 1.1206, + "step": 351 + }, + { + "epoch": 0.83, + "learning_rate": 1.632254123396457e-05, + "loss": 0.9291, + "step": 352 + }, + { + "epoch": 0.83, + "learning_rate": 1.6310323762981063e-05, + "loss": 1.0176, + "step": 353 + }, + { + "epoch": 0.84, + "learning_rate": 1.6298106291997558e-05, + "loss": 1.018, + "step": 354 + }, + { + "epoch": 0.84, + "learning_rate": 1.628588882101405e-05, + "loss": 0.7873, + "step": 355 + }, + { + "epoch": 0.84, + "learning_rate": 1.6273671350030545e-05, + "loss": 0.9801, + "step": 356 + }, + { + "epoch": 0.84, + "learning_rate": 1.6261453879047037e-05, + "loss": 0.752, + "step": 357 + }, + { + "epoch": 0.85, + "learning_rate": 1.6249236408063532e-05, + "loss": 0.8991, + "step": 358 + }, + { + "epoch": 0.85, + "learning_rate": 1.6237018937080027e-05, + "loss": 0.8771, + "step": 359 + }, + { + "epoch": 0.85, + "learning_rate": 1.622480146609652e-05, + "loss": 0.8734, + "step": 360 + }, + { + "epoch": 0.85, + "learning_rate": 1.6212583995113014e-05, + "loss": 0.9302, + "step": 361 + }, + { + "epoch": 0.86, + "learning_rate": 1.620036652412951e-05, + "loss": 0.9362, + "step": 362 + }, + { + "epoch": 0.86, + "learning_rate": 1.6188149053146e-05, + "loss": 0.9994, + "step": 363 + }, + { + "epoch": 0.86, + "learning_rate": 1.6175931582162496e-05, + "loss": 0.7499, + "step": 364 + }, + { + "epoch": 0.86, + "learning_rate": 1.6163714111178987e-05, + "loss": 1.7184, + "step": 365 + }, + { + "epoch": 0.87, + "learning_rate": 1.6151496640195482e-05, + "loss": 1.0205, + "step": 366 + }, + { + "epoch": 0.87, + "learning_rate": 1.6139279169211974e-05, + "loss": 0.8763, + "step": 367 + }, + { + "epoch": 0.87, + "learning_rate": 1.612706169822847e-05, + "loss": 1.0264, + "step": 368 + }, + { + "epoch": 0.87, + "learning_rate": 1.611484422724496e-05, + "loss": 0.9088, + "step": 369 + }, + { + "epoch": 0.87, + "learning_rate": 1.6102626756261456e-05, + "loss": 0.7762, + "step": 370 + }, + { + "epoch": 0.88, + "learning_rate": 1.6090409285277948e-05, + "loss": 0.876, + "step": 371 + }, + { + "epoch": 0.88, + "learning_rate": 1.6078191814294443e-05, + "loss": 1.0591, + "step": 372 + }, + { + "epoch": 0.88, + "learning_rate": 1.6065974343310935e-05, + "loss": 0.8602, + "step": 373 + }, + { + "epoch": 0.88, + "learning_rate": 1.605375687232743e-05, + "loss": 0.8489, + "step": 374 + }, + { + "epoch": 0.89, + "learning_rate": 1.6041539401343925e-05, + "loss": 0.9205, + "step": 375 + }, + { + "epoch": 0.89, + "learning_rate": 1.6029321930360416e-05, + "loss": 0.926, + "step": 376 + }, + { + "epoch": 0.89, + "learning_rate": 1.601710445937691e-05, + "loss": 0.8983, + "step": 377 + }, + { + "epoch": 0.89, + "learning_rate": 1.6004886988393403e-05, + "loss": 1.0779, + "step": 378 + }, + { + "epoch": 0.9, + "learning_rate": 1.59926695174099e-05, + "loss": 0.9446, + "step": 379 + }, + { + "epoch": 0.9, + "learning_rate": 1.598045204642639e-05, + "loss": 1.1564, + "step": 380 + }, + { + "epoch": 0.9, + "learning_rate": 1.5968234575442885e-05, + "loss": 1.0101, + "step": 381 + }, + { + "epoch": 0.9, + "learning_rate": 1.5956017104459377e-05, + "loss": 1.0597, + "step": 382 + }, + { + "epoch": 0.91, + "learning_rate": 1.5943799633475872e-05, + "loss": 0.9408, + "step": 383 + }, + { + "epoch": 0.91, + "learning_rate": 1.5931582162492364e-05, + "loss": 1.1833, + "step": 384 + }, + { + "epoch": 0.91, + "learning_rate": 1.591936469150886e-05, + "loss": 1.0106, + "step": 385 + }, + { + "epoch": 0.91, + "learning_rate": 1.590714722052535e-05, + "loss": 1.6698, + "step": 386 + }, + { + "epoch": 0.92, + "learning_rate": 1.5894929749541846e-05, + "loss": 1.0123, + "step": 387 + }, + { + "epoch": 0.92, + "learning_rate": 1.5882712278558337e-05, + "loss": 0.9237, + "step": 388 + }, + { + "epoch": 0.92, + "learning_rate": 1.5870494807574836e-05, + "loss": 0.9182, + "step": 389 + }, + { + "epoch": 0.92, + "learning_rate": 1.5858277336591327e-05, + "loss": 0.9832, + "step": 390 + }, + { + "epoch": 0.92, + "learning_rate": 1.5846059865607823e-05, + "loss": 0.7884, + "step": 391 + }, + { + "epoch": 0.93, + "learning_rate": 1.5833842394624314e-05, + "loss": 1.0028, + "step": 392 + }, + { + "epoch": 0.93, + "learning_rate": 1.582162492364081e-05, + "loss": 1.0835, + "step": 393 + }, + { + "epoch": 0.93, + "learning_rate": 1.58094074526573e-05, + "loss": 0.9394, + "step": 394 + }, + { + "epoch": 0.93, + "learning_rate": 1.5797189981673796e-05, + "loss": 1.25, + "step": 395 + }, + { + "epoch": 0.94, + "learning_rate": 1.5784972510690288e-05, + "loss": 1.0046, + "step": 396 + }, + { + "epoch": 0.94, + "learning_rate": 1.5772755039706783e-05, + "loss": 0.6862, + "step": 397 + }, + { + "epoch": 0.94, + "learning_rate": 1.5760537568723275e-05, + "loss": 0.9672, + "step": 398 + }, + { + "epoch": 0.94, + "learning_rate": 1.574832009773977e-05, + "loss": 0.847, + "step": 399 + }, + { + "epoch": 0.95, + "learning_rate": 1.573610262675626e-05, + "loss": 1.1316, + "step": 400 + }, + { + "epoch": 0.95, + "learning_rate": 1.5723885155772757e-05, + "loss": 1.1577, + "step": 401 + }, + { + "epoch": 0.95, + "learning_rate": 1.5711667684789248e-05, + "loss": 0.7909, + "step": 402 + }, + { + "epoch": 0.95, + "learning_rate": 1.5699450213805743e-05, + "loss": 0.9476, + "step": 403 + }, + { + "epoch": 0.96, + "learning_rate": 1.568723274282224e-05, + "loss": 0.8651, + "step": 404 + }, + { + "epoch": 0.96, + "learning_rate": 1.567501527183873e-05, + "loss": 0.8349, + "step": 405 + }, + { + "epoch": 0.96, + "learning_rate": 1.5662797800855225e-05, + "loss": 1.0023, + "step": 406 + }, + { + "epoch": 0.96, + "learning_rate": 1.5650580329871717e-05, + "loss": 0.9267, + "step": 407 + }, + { + "epoch": 0.96, + "learning_rate": 1.5638362858888212e-05, + "loss": 0.7568, + "step": 408 + }, + { + "epoch": 0.97, + "learning_rate": 1.5626145387904704e-05, + "loss": 0.9849, + "step": 409 + }, + { + "epoch": 0.97, + "learning_rate": 1.56139279169212e-05, + "loss": 0.8452, + "step": 410 + }, + { + "epoch": 0.97, + "learning_rate": 1.560171044593769e-05, + "loss": 0.9507, + "step": 411 + }, + { + "epoch": 0.97, + "learning_rate": 1.5589492974954186e-05, + "loss": 0.9459, + "step": 412 + }, + { + "epoch": 0.98, + "learning_rate": 1.557727550397068e-05, + "loss": 0.9053, + "step": 413 + }, + { + "epoch": 0.98, + "learning_rate": 1.5565058032987173e-05, + "loss": 1.0029, + "step": 414 + }, + { + "epoch": 0.98, + "learning_rate": 1.5552840562003668e-05, + "loss": 0.7987, + "step": 415 + }, + { + "epoch": 0.98, + "learning_rate": 1.554062309102016e-05, + "loss": 0.7423, + "step": 416 + }, + { + "epoch": 0.99, + "learning_rate": 1.5528405620036654e-05, + "loss": 1.0905, + "step": 417 + }, + { + "epoch": 0.99, + "learning_rate": 1.5516188149053146e-05, + "loss": 0.829, + "step": 418 + }, + { + "epoch": 0.99, + "learning_rate": 1.550397067806964e-05, + "loss": 0.698, + "step": 419 + }, + { + "epoch": 0.99, + "learning_rate": 1.5491753207086136e-05, + "loss": 0.8893, + "step": 420 + }, + { + "epoch": 1.0, + "learning_rate": 1.5479535736102628e-05, + "loss": 0.992, + "step": 421 + }, + { + "epoch": 1.0, + "learning_rate": 1.5467318265119123e-05, + "loss": 1.0658, + "step": 422 + }, + { + "epoch": 1.0, + "learning_rate": 1.5455100794135615e-05, + "loss": 1.2668, + "step": 423 + }, + { + "epoch": 1.0, + "learning_rate": 1.544288332315211e-05, + "loss": 0.8961, + "step": 424 + }, + { + "epoch": 1.0, + "learning_rate": 1.54306658521686e-05, + "loss": 0.8881, + "step": 425 + }, + { + "epoch": 1.01, + "learning_rate": 1.5418448381185097e-05, + "loss": 0.8983, + "step": 426 + }, + { + "epoch": 1.01, + "learning_rate": 1.540623091020159e-05, + "loss": 0.7649, + "step": 427 + }, + { + "epoch": 1.01, + "learning_rate": 1.5394013439218084e-05, + "loss": 0.7264, + "step": 428 + }, + { + "epoch": 1.01, + "learning_rate": 1.5381795968234575e-05, + "loss": 0.7437, + "step": 429 + }, + { + "epoch": 1.02, + "learning_rate": 1.536957849725107e-05, + "loss": 1.01, + "step": 430 + }, + { + "epoch": 1.02, + "learning_rate": 1.5357361026267562e-05, + "loss": 0.9565, + "step": 431 + }, + { + "epoch": 1.02, + "learning_rate": 1.5345143555284057e-05, + "loss": 0.7581, + "step": 432 + }, + { + "epoch": 1.02, + "learning_rate": 1.533292608430055e-05, + "loss": 1.0394, + "step": 433 + }, + { + "epoch": 1.03, + "learning_rate": 1.5320708613317047e-05, + "loss": 1.0098, + "step": 434 + }, + { + "epoch": 1.03, + "learning_rate": 1.530849114233354e-05, + "loss": 1.007, + "step": 435 + }, + { + "epoch": 1.03, + "learning_rate": 1.5296273671350034e-05, + "loss": 1.0359, + "step": 436 + }, + { + "epoch": 1.03, + "learning_rate": 1.5284056200366526e-05, + "loss": 1.0258, + "step": 437 + }, + { + "epoch": 1.04, + "learning_rate": 1.527183872938302e-05, + "loss": 1.0074, + "step": 438 + }, + { + "epoch": 1.04, + "learning_rate": 1.5259621258399513e-05, + "loss": 0.8713, + "step": 439 + }, + { + "epoch": 1.04, + "learning_rate": 1.5247403787416006e-05, + "loss": 0.858, + "step": 440 + }, + { + "epoch": 1.04, + "learning_rate": 1.52351863164325e-05, + "loss": 0.9483, + "step": 441 + }, + { + "epoch": 1.05, + "learning_rate": 1.5222968845448993e-05, + "loss": 0.8927, + "step": 442 + }, + { + "epoch": 1.05, + "learning_rate": 1.5210751374465486e-05, + "loss": 1.05, + "step": 443 + }, + { + "epoch": 1.05, + "learning_rate": 1.519853390348198e-05, + "loss": 0.9705, + "step": 444 + }, + { + "epoch": 1.05, + "learning_rate": 1.5186316432498473e-05, + "loss": 0.6877, + "step": 445 + }, + { + "epoch": 1.05, + "learning_rate": 1.5174098961514966e-05, + "loss": 1.1924, + "step": 446 + }, + { + "epoch": 1.06, + "learning_rate": 1.516188149053146e-05, + "loss": 1.0023, + "step": 447 + }, + { + "epoch": 1.06, + "learning_rate": 1.5149664019547953e-05, + "loss": 0.8875, + "step": 448 + }, + { + "epoch": 1.06, + "learning_rate": 1.513744654856445e-05, + "loss": 0.5401, + "step": 449 + }, + { + "epoch": 1.06, + "learning_rate": 1.5125229077580943e-05, + "loss": 1.6992, + "step": 450 + }, + { + "epoch": 1.07, + "learning_rate": 1.5113011606597437e-05, + "loss": 0.9052, + "step": 451 + }, + { + "epoch": 1.07, + "learning_rate": 1.510079413561393e-05, + "loss": 1.1827, + "step": 452 + }, + { + "epoch": 1.07, + "learning_rate": 1.5088576664630424e-05, + "loss": 0.9645, + "step": 453 + }, + { + "epoch": 1.07, + "learning_rate": 1.5076359193646917e-05, + "loss": 0.8014, + "step": 454 + }, + { + "epoch": 1.08, + "learning_rate": 1.506414172266341e-05, + "loss": 0.9795, + "step": 455 + }, + { + "epoch": 1.08, + "learning_rate": 1.5051924251679904e-05, + "loss": 1.0001, + "step": 456 + }, + { + "epoch": 1.08, + "learning_rate": 1.5039706780696397e-05, + "loss": 0.9216, + "step": 457 + }, + { + "epoch": 1.08, + "learning_rate": 1.502748930971289e-05, + "loss": 0.7942, + "step": 458 + }, + { + "epoch": 1.09, + "learning_rate": 1.5015271838729384e-05, + "loss": 0.6439, + "step": 459 + }, + { + "epoch": 1.09, + "learning_rate": 1.5003054367745877e-05, + "loss": 0.8977, + "step": 460 + }, + { + "epoch": 1.09, + "learning_rate": 1.4990836896762371e-05, + "loss": 0.6901, + "step": 461 + }, + { + "epoch": 1.09, + "learning_rate": 1.4978619425778864e-05, + "loss": 1.0185, + "step": 462 + }, + { + "epoch": 1.09, + "learning_rate": 1.4966401954795358e-05, + "loss": 0.6792, + "step": 463 + }, + { + "epoch": 1.1, + "learning_rate": 1.4954184483811851e-05, + "loss": 0.8694, + "step": 464 + }, + { + "epoch": 1.1, + "learning_rate": 1.4941967012828346e-05, + "loss": 0.859, + "step": 465 + }, + { + "epoch": 1.1, + "learning_rate": 1.492974954184484e-05, + "loss": 0.8606, + "step": 466 + }, + { + "epoch": 1.1, + "learning_rate": 1.4917532070861333e-05, + "loss": 1.0097, + "step": 467 + }, + { + "epoch": 1.11, + "learning_rate": 1.4905314599877826e-05, + "loss": 0.7981, + "step": 468 + }, + { + "epoch": 1.11, + "learning_rate": 1.489309712889432e-05, + "loss": 0.9595, + "step": 469 + }, + { + "epoch": 1.11, + "learning_rate": 1.4880879657910813e-05, + "loss": 0.9864, + "step": 470 + }, + { + "epoch": 1.11, + "learning_rate": 1.4868662186927307e-05, + "loss": 0.7855, + "step": 471 + }, + { + "epoch": 1.12, + "learning_rate": 1.48564447159438e-05, + "loss": 1.098, + "step": 472 + }, + { + "epoch": 1.12, + "learning_rate": 1.4844227244960293e-05, + "loss": 0.9994, + "step": 473 + }, + { + "epoch": 1.12, + "learning_rate": 1.4832009773976788e-05, + "loss": 0.6699, + "step": 474 + }, + { + "epoch": 1.12, + "learning_rate": 1.4819792302993282e-05, + "loss": 0.7458, + "step": 475 + }, + { + "epoch": 1.13, + "learning_rate": 1.4807574832009775e-05, + "loss": 0.6966, + "step": 476 + }, + { + "epoch": 1.13, + "learning_rate": 1.4795357361026269e-05, + "loss": 0.7422, + "step": 477 + }, + { + "epoch": 1.13, + "learning_rate": 1.4783139890042762e-05, + "loss": 0.8659, + "step": 478 + }, + { + "epoch": 1.13, + "learning_rate": 1.4770922419059255e-05, + "loss": 1.1142, + "step": 479 + }, + { + "epoch": 1.14, + "learning_rate": 1.475870494807575e-05, + "loss": 0.9528, + "step": 480 + }, + { + "epoch": 1.14, + "learning_rate": 1.4746487477092244e-05, + "loss": 0.9859, + "step": 481 + }, + { + "epoch": 1.14, + "learning_rate": 1.4734270006108737e-05, + "loss": 0.8913, + "step": 482 + }, + { + "epoch": 1.14, + "learning_rate": 1.472205253512523e-05, + "loss": 0.7271, + "step": 483 + }, + { + "epoch": 1.14, + "learning_rate": 1.4709835064141724e-05, + "loss": 0.883, + "step": 484 + }, + { + "epoch": 1.15, + "learning_rate": 1.4697617593158218e-05, + "loss": 0.9758, + "step": 485 + }, + { + "epoch": 1.15, + "learning_rate": 1.4685400122174711e-05, + "loss": 0.9754, + "step": 486 + }, + { + "epoch": 1.15, + "learning_rate": 1.4673182651191204e-05, + "loss": 0.8249, + "step": 487 + }, + { + "epoch": 1.15, + "learning_rate": 1.4660965180207698e-05, + "loss": 0.665, + "step": 488 + }, + { + "epoch": 1.16, + "learning_rate": 1.4648747709224191e-05, + "loss": 0.8274, + "step": 489 + }, + { + "epoch": 1.16, + "learning_rate": 1.4636530238240685e-05, + "loss": 0.9918, + "step": 490 + }, + { + "epoch": 1.16, + "learning_rate": 1.4624312767257178e-05, + "loss": 0.9642, + "step": 491 + }, + { + "epoch": 1.16, + "learning_rate": 1.4612095296273671e-05, + "loss": 0.9325, + "step": 492 + }, + { + "epoch": 1.17, + "learning_rate": 1.4599877825290165e-05, + "loss": 0.9546, + "step": 493 + }, + { + "epoch": 1.17, + "learning_rate": 1.4587660354306658e-05, + "loss": 0.7223, + "step": 494 + }, + { + "epoch": 1.17, + "learning_rate": 1.4575442883323155e-05, + "loss": 0.7956, + "step": 495 + }, + { + "epoch": 1.17, + "learning_rate": 1.4563225412339648e-05, + "loss": 0.8982, + "step": 496 + }, + { + "epoch": 1.18, + "learning_rate": 1.4551007941356142e-05, + "loss": 1.0453, + "step": 497 + }, + { + "epoch": 1.18, + "learning_rate": 1.4538790470372635e-05, + "loss": 0.7084, + "step": 498 + }, + { + "epoch": 1.18, + "learning_rate": 1.4526572999389129e-05, + "loss": 0.7952, + "step": 499 + }, + { + "epoch": 1.18, + "learning_rate": 1.4514355528405622e-05, + "loss": 0.7469, + "step": 500 + }, + { + "epoch": 1.18, + "learning_rate": 1.4502138057422115e-05, + "loss": 1.045, + "step": 501 + }, + { + "epoch": 1.19, + "learning_rate": 1.4489920586438609e-05, + "loss": 0.9179, + "step": 502 + }, + { + "epoch": 1.19, + "learning_rate": 1.4477703115455102e-05, + "loss": 0.8948, + "step": 503 + }, + { + "epoch": 1.19, + "learning_rate": 1.4465485644471596e-05, + "loss": 0.9697, + "step": 504 + }, + { + "epoch": 1.19, + "learning_rate": 1.4453268173488089e-05, + "loss": 0.8727, + "step": 505 + }, + { + "epoch": 1.2, + "learning_rate": 1.4441050702504582e-05, + "loss": 0.8557, + "step": 506 + }, + { + "epoch": 1.2, + "learning_rate": 1.4428833231521076e-05, + "loss": 0.7547, + "step": 507 + }, + { + "epoch": 1.2, + "learning_rate": 1.441661576053757e-05, + "loss": 0.7066, + "step": 508 + }, + { + "epoch": 1.2, + "learning_rate": 1.4404398289554063e-05, + "loss": 0.8879, + "step": 509 + }, + { + "epoch": 1.21, + "learning_rate": 1.4392180818570558e-05, + "loss": 0.9328, + "step": 510 + }, + { + "epoch": 1.21, + "learning_rate": 1.4379963347587051e-05, + "loss": 0.697, + "step": 511 + }, + { + "epoch": 1.21, + "learning_rate": 1.4367745876603545e-05, + "loss": 1.0393, + "step": 512 + }, + { + "epoch": 1.21, + "learning_rate": 1.4355528405620038e-05, + "loss": 0.9015, + "step": 513 + }, + { + "epoch": 1.22, + "learning_rate": 1.4343310934636531e-05, + "loss": 1.9402, + "step": 514 + }, + { + "epoch": 1.22, + "learning_rate": 1.4331093463653025e-05, + "loss": 0.9255, + "step": 515 + }, + { + "epoch": 1.22, + "learning_rate": 1.4318875992669518e-05, + "loss": 0.9368, + "step": 516 + }, + { + "epoch": 1.22, + "learning_rate": 1.4306658521686012e-05, + "loss": 1.0178, + "step": 517 + }, + { + "epoch": 1.22, + "learning_rate": 1.4294441050702505e-05, + "loss": 1.0455, + "step": 518 + }, + { + "epoch": 1.23, + "learning_rate": 1.4282223579718998e-05, + "loss": 0.8832, + "step": 519 + }, + { + "epoch": 1.23, + "learning_rate": 1.4270006108735492e-05, + "loss": 0.9344, + "step": 520 + }, + { + "epoch": 1.23, + "learning_rate": 1.4257788637751985e-05, + "loss": 0.8651, + "step": 521 + }, + { + "epoch": 1.23, + "learning_rate": 1.4245571166768479e-05, + "loss": 0.679, + "step": 522 + }, + { + "epoch": 1.24, + "learning_rate": 1.4233353695784972e-05, + "loss": 1.012, + "step": 523 + }, + { + "epoch": 1.24, + "learning_rate": 1.4221136224801465e-05, + "loss": 0.9372, + "step": 524 + }, + { + "epoch": 1.24, + "learning_rate": 1.4208918753817962e-05, + "loss": 0.8254, + "step": 525 + }, + { + "epoch": 1.24, + "learning_rate": 1.4196701282834456e-05, + "loss": 0.8761, + "step": 526 + }, + { + "epoch": 1.25, + "learning_rate": 1.4184483811850949e-05, + "loss": 1.3707, + "step": 527 + }, + { + "epoch": 1.25, + "learning_rate": 1.4172266340867442e-05, + "loss": 1.0044, + "step": 528 + }, + { + "epoch": 1.25, + "learning_rate": 1.4160048869883936e-05, + "loss": 0.9218, + "step": 529 + }, + { + "epoch": 1.25, + "learning_rate": 1.4147831398900429e-05, + "loss": 0.9495, + "step": 530 + }, + { + "epoch": 1.26, + "learning_rate": 1.4135613927916923e-05, + "loss": 0.8675, + "step": 531 + }, + { + "epoch": 1.26, + "learning_rate": 1.4123396456933416e-05, + "loss": 0.9436, + "step": 532 + }, + { + "epoch": 1.26, + "learning_rate": 1.411117898594991e-05, + "loss": 0.8323, + "step": 533 + }, + { + "epoch": 1.26, + "learning_rate": 1.4098961514966403e-05, + "loss": 0.7682, + "step": 534 + }, + { + "epoch": 1.27, + "learning_rate": 1.4086744043982896e-05, + "loss": 0.8088, + "step": 535 + }, + { + "epoch": 1.27, + "learning_rate": 1.407452657299939e-05, + "loss": 0.7954, + "step": 536 + }, + { + "epoch": 1.27, + "learning_rate": 1.4062309102015883e-05, + "loss": 0.8871, + "step": 537 + }, + { + "epoch": 1.27, + "learning_rate": 1.4050091631032376e-05, + "loss": 0.8467, + "step": 538 + }, + { + "epoch": 1.27, + "learning_rate": 1.403787416004887e-05, + "loss": 0.9207, + "step": 539 + }, + { + "epoch": 1.28, + "learning_rate": 1.4025656689065365e-05, + "loss": 0.8185, + "step": 540 + }, + { + "epoch": 1.28, + "learning_rate": 1.4013439218081858e-05, + "loss": 0.7643, + "step": 541 + }, + { + "epoch": 1.28, + "learning_rate": 1.4001221747098352e-05, + "loss": 0.9263, + "step": 542 + }, + { + "epoch": 1.28, + "learning_rate": 1.3989004276114847e-05, + "loss": 0.9368, + "step": 543 + }, + { + "epoch": 1.29, + "learning_rate": 1.397678680513134e-05, + "loss": 1.3515, + "step": 544 + }, + { + "epoch": 1.29, + "learning_rate": 1.3964569334147834e-05, + "loss": 0.6968, + "step": 545 + }, + { + "epoch": 1.29, + "learning_rate": 1.3952351863164327e-05, + "loss": 0.8393, + "step": 546 + }, + { + "epoch": 1.29, + "learning_rate": 1.394013439218082e-05, + "loss": 0.8813, + "step": 547 + }, + { + "epoch": 1.3, + "learning_rate": 1.3927916921197314e-05, + "loss": 0.9748, + "step": 548 + }, + { + "epoch": 1.3, + "learning_rate": 1.3915699450213807e-05, + "loss": 0.9541, + "step": 549 + }, + { + "epoch": 1.3, + "learning_rate": 1.39034819792303e-05, + "loss": 0.849, + "step": 550 + }, + { + "epoch": 1.3, + "learning_rate": 1.3891264508246794e-05, + "loss": 0.8708, + "step": 551 + }, + { + "epoch": 1.31, + "learning_rate": 1.3879047037263287e-05, + "loss": 0.8054, + "step": 552 + }, + { + "epoch": 1.31, + "learning_rate": 1.386682956627978e-05, + "loss": 0.7952, + "step": 553 + }, + { + "epoch": 1.31, + "learning_rate": 1.3854612095296274e-05, + "loss": 0.8001, + "step": 554 + }, + { + "epoch": 1.31, + "learning_rate": 1.384239462431277e-05, + "loss": 0.9775, + "step": 555 + }, + { + "epoch": 1.31, + "learning_rate": 1.3830177153329263e-05, + "loss": 0.9602, + "step": 556 + }, + { + "epoch": 1.32, + "learning_rate": 1.3817959682345756e-05, + "loss": 0.8274, + "step": 557 + }, + { + "epoch": 1.32, + "learning_rate": 1.380574221136225e-05, + "loss": 0.7411, + "step": 558 + }, + { + "epoch": 1.32, + "learning_rate": 1.3793524740378743e-05, + "loss": 0.9579, + "step": 559 + }, + { + "epoch": 1.32, + "learning_rate": 1.3781307269395236e-05, + "loss": 0.9866, + "step": 560 + }, + { + "epoch": 1.33, + "learning_rate": 1.376908979841173e-05, + "loss": 0.8351, + "step": 561 + }, + { + "epoch": 1.33, + "learning_rate": 1.3756872327428223e-05, + "loss": 0.8235, + "step": 562 + }, + { + "epoch": 1.33, + "learning_rate": 1.3744654856444716e-05, + "loss": 0.9059, + "step": 563 + }, + { + "epoch": 1.33, + "learning_rate": 1.373243738546121e-05, + "loss": 0.7818, + "step": 564 + }, + { + "epoch": 1.34, + "learning_rate": 1.3720219914477703e-05, + "loss": 0.9199, + "step": 565 + }, + { + "epoch": 1.34, + "learning_rate": 1.3708002443494197e-05, + "loss": 0.9934, + "step": 566 + }, + { + "epoch": 1.34, + "learning_rate": 1.369578497251069e-05, + "loss": 0.7839, + "step": 567 + }, + { + "epoch": 1.34, + "learning_rate": 1.3683567501527183e-05, + "loss": 0.8362, + "step": 568 + }, + { + "epoch": 1.35, + "learning_rate": 1.3671350030543677e-05, + "loss": 0.7191, + "step": 569 + }, + { + "epoch": 1.35, + "learning_rate": 1.3659132559560174e-05, + "loss": 0.8777, + "step": 570 + }, + { + "epoch": 1.35, + "learning_rate": 1.3646915088576667e-05, + "loss": 0.8456, + "step": 571 + }, + { + "epoch": 1.35, + "learning_rate": 1.363469761759316e-05, + "loss": 1.1737, + "step": 572 + }, + { + "epoch": 1.35, + "learning_rate": 1.3622480146609654e-05, + "loss": 0.8631, + "step": 573 + }, + { + "epoch": 1.36, + "learning_rate": 1.3610262675626147e-05, + "loss": 0.7261, + "step": 574 + }, + { + "epoch": 1.36, + "learning_rate": 1.359804520464264e-05, + "loss": 0.8019, + "step": 575 + }, + { + "epoch": 1.36, + "learning_rate": 1.3585827733659134e-05, + "loss": 1.0148, + "step": 576 + }, + { + "epoch": 1.36, + "learning_rate": 1.3573610262675627e-05, + "loss": 0.8092, + "step": 577 + }, + { + "epoch": 1.37, + "learning_rate": 1.3561392791692121e-05, + "loss": 0.8525, + "step": 578 + }, + { + "epoch": 1.37, + "learning_rate": 1.3549175320708614e-05, + "loss": 0.9003, + "step": 579 + }, + { + "epoch": 1.37, + "learning_rate": 1.3536957849725108e-05, + "loss": 0.9122, + "step": 580 + }, + { + "epoch": 1.37, + "learning_rate": 1.3524740378741601e-05, + "loss": 0.6651, + "step": 581 + }, + { + "epoch": 1.38, + "learning_rate": 1.3512522907758094e-05, + "loss": 0.9718, + "step": 582 + }, + { + "epoch": 1.38, + "learning_rate": 1.3500305436774588e-05, + "loss": 0.9697, + "step": 583 + }, + { + "epoch": 1.38, + "learning_rate": 1.3488087965791081e-05, + "loss": 0.8414, + "step": 584 + }, + { + "epoch": 1.38, + "learning_rate": 1.3475870494807576e-05, + "loss": 0.9784, + "step": 585 + }, + { + "epoch": 1.39, + "learning_rate": 1.346365302382407e-05, + "loss": 0.8626, + "step": 586 + }, + { + "epoch": 1.39, + "learning_rate": 1.3451435552840563e-05, + "loss": 0.7948, + "step": 587 + }, + { + "epoch": 1.39, + "learning_rate": 1.3439218081857057e-05, + "loss": 0.9505, + "step": 588 + }, + { + "epoch": 1.39, + "learning_rate": 1.342700061087355e-05, + "loss": 0.7771, + "step": 589 + }, + { + "epoch": 1.4, + "learning_rate": 1.3414783139890043e-05, + "loss": 1.0326, + "step": 590 + }, + { + "epoch": 1.4, + "learning_rate": 1.3402565668906537e-05, + "loss": 0.7854, + "step": 591 + }, + { + "epoch": 1.4, + "learning_rate": 1.339034819792303e-05, + "loss": 0.8514, + "step": 592 + }, + { + "epoch": 1.4, + "learning_rate": 1.3378130726939524e-05, + "loss": 1.008, + "step": 593 + }, + { + "epoch": 1.4, + "learning_rate": 1.3365913255956019e-05, + "loss": 1.05, + "step": 594 + }, + { + "epoch": 1.41, + "learning_rate": 1.3353695784972512e-05, + "loss": 0.8007, + "step": 595 + }, + { + "epoch": 1.41, + "learning_rate": 1.3341478313989005e-05, + "loss": 0.8751, + "step": 596 + }, + { + "epoch": 1.41, + "learning_rate": 1.3329260843005499e-05, + "loss": 0.9423, + "step": 597 + }, + { + "epoch": 1.41, + "learning_rate": 1.3317043372021992e-05, + "loss": 0.7228, + "step": 598 + }, + { + "epoch": 1.42, + "learning_rate": 1.3304825901038486e-05, + "loss": 1.3136, + "step": 599 + }, + { + "epoch": 1.42, + "learning_rate": 1.329260843005498e-05, + "loss": 0.7924, + "step": 600 + }, + { + "epoch": 1.42, + "learning_rate": 1.3280390959071474e-05, + "loss": 0.9465, + "step": 601 + }, + { + "epoch": 1.42, + "learning_rate": 1.3268173488087968e-05, + "loss": 1.057, + "step": 602 + }, + { + "epoch": 1.43, + "learning_rate": 1.3255956017104461e-05, + "loss": 1.4896, + "step": 603 + }, + { + "epoch": 1.43, + "learning_rate": 1.3243738546120954e-05, + "loss": 1.4226, + "step": 604 + }, + { + "epoch": 1.43, + "learning_rate": 1.3231521075137448e-05, + "loss": 0.7483, + "step": 605 + }, + { + "epoch": 1.43, + "learning_rate": 1.3219303604153941e-05, + "loss": 0.7319, + "step": 606 + }, + { + "epoch": 1.44, + "learning_rate": 1.3207086133170435e-05, + "loss": 0.9093, + "step": 607 + }, + { + "epoch": 1.44, + "learning_rate": 1.3194868662186928e-05, + "loss": 0.6625, + "step": 608 + }, + { + "epoch": 1.44, + "learning_rate": 1.3182651191203421e-05, + "loss": 0.8354, + "step": 609 + }, + { + "epoch": 1.44, + "learning_rate": 1.3170433720219915e-05, + "loss": 0.7068, + "step": 610 + }, + { + "epoch": 1.44, + "learning_rate": 1.3158216249236408e-05, + "loss": 0.9007, + "step": 611 + }, + { + "epoch": 1.45, + "learning_rate": 1.3145998778252902e-05, + "loss": 0.7585, + "step": 612 + }, + { + "epoch": 1.45, + "learning_rate": 1.3133781307269395e-05, + "loss": 0.9246, + "step": 613 + }, + { + "epoch": 1.45, + "learning_rate": 1.3121563836285888e-05, + "loss": 1.0258, + "step": 614 + }, + { + "epoch": 1.45, + "learning_rate": 1.3109346365302385e-05, + "loss": 0.8766, + "step": 615 + }, + { + "epoch": 1.46, + "learning_rate": 1.3097128894318879e-05, + "loss": 0.7533, + "step": 616 + }, + { + "epoch": 1.46, + "learning_rate": 1.3084911423335372e-05, + "loss": 0.9276, + "step": 617 + }, + { + "epoch": 1.46, + "learning_rate": 1.3072693952351865e-05, + "loss": 0.8266, + "step": 618 + }, + { + "epoch": 1.46, + "learning_rate": 1.3060476481368359e-05, + "loss": 0.8333, + "step": 619 + }, + { + "epoch": 1.47, + "learning_rate": 1.3048259010384852e-05, + "loss": 0.9279, + "step": 620 + }, + { + "epoch": 1.47, + "learning_rate": 1.3036041539401346e-05, + "loss": 0.8234, + "step": 621 + }, + { + "epoch": 1.47, + "learning_rate": 1.3023824068417839e-05, + "loss": 0.9139, + "step": 622 + }, + { + "epoch": 1.47, + "learning_rate": 1.3011606597434332e-05, + "loss": 1.0677, + "step": 623 + }, + { + "epoch": 1.48, + "learning_rate": 1.2999389126450826e-05, + "loss": 0.8633, + "step": 624 + }, + { + "epoch": 1.48, + "learning_rate": 1.298717165546732e-05, + "loss": 0.973, + "step": 625 + }, + { + "epoch": 1.48, + "learning_rate": 1.2974954184483813e-05, + "loss": 0.7714, + "step": 626 + }, + { + "epoch": 1.48, + "learning_rate": 1.2962736713500306e-05, + "loss": 0.8985, + "step": 627 + }, + { + "epoch": 1.48, + "learning_rate": 1.29505192425168e-05, + "loss": 0.7919, + "step": 628 + }, + { + "epoch": 1.49, + "learning_rate": 1.2938301771533293e-05, + "loss": 0.7971, + "step": 629 + }, + { + "epoch": 1.49, + "learning_rate": 1.2926084300549788e-05, + "loss": 0.7678, + "step": 630 + }, + { + "epoch": 1.49, + "learning_rate": 1.2913866829566281e-05, + "loss": 1.0978, + "step": 631 + }, + { + "epoch": 1.49, + "learning_rate": 1.2901649358582775e-05, + "loss": 1.046, + "step": 632 + }, + { + "epoch": 1.5, + "learning_rate": 1.2889431887599268e-05, + "loss": 0.7762, + "step": 633 + }, + { + "epoch": 1.5, + "learning_rate": 1.2877214416615762e-05, + "loss": 0.9706, + "step": 634 + }, + { + "epoch": 1.5, + "learning_rate": 1.2864996945632255e-05, + "loss": 0.7892, + "step": 635 + }, + { + "epoch": 1.5, + "learning_rate": 1.2852779474648748e-05, + "loss": 0.8744, + "step": 636 + }, + { + "epoch": 1.51, + "learning_rate": 1.2840562003665242e-05, + "loss": 0.8939, + "step": 637 + }, + { + "epoch": 1.51, + "learning_rate": 1.2828344532681735e-05, + "loss": 0.9054, + "step": 638 + }, + { + "epoch": 1.51, + "learning_rate": 1.2816127061698229e-05, + "loss": 0.8555, + "step": 639 + }, + { + "epoch": 1.51, + "learning_rate": 1.2803909590714722e-05, + "loss": 1.0563, + "step": 640 + }, + { + "epoch": 1.52, + "learning_rate": 1.2791692119731215e-05, + "loss": 1.0961, + "step": 641 + }, + { + "epoch": 1.52, + "learning_rate": 1.2779474648747709e-05, + "loss": 1.0154, + "step": 642 + }, + { + "epoch": 1.52, + "learning_rate": 1.2767257177764202e-05, + "loss": 0.8368, + "step": 643 + }, + { + "epoch": 1.52, + "learning_rate": 1.2755039706780696e-05, + "loss": 0.8919, + "step": 644 + }, + { + "epoch": 1.53, + "learning_rate": 1.2742822235797192e-05, + "loss": 0.7917, + "step": 645 + }, + { + "epoch": 1.53, + "learning_rate": 1.2730604764813686e-05, + "loss": 0.7751, + "step": 646 + }, + { + "epoch": 1.53, + "learning_rate": 1.2718387293830179e-05, + "loss": 0.8739, + "step": 647 + }, + { + "epoch": 1.53, + "learning_rate": 1.2706169822846673e-05, + "loss": 0.8074, + "step": 648 + }, + { + "epoch": 1.53, + "learning_rate": 1.2693952351863166e-05, + "loss": 0.8114, + "step": 649 + }, + { + "epoch": 1.54, + "learning_rate": 1.268173488087966e-05, + "loss": 0.7754, + "step": 650 + }, + { + "epoch": 1.54, + "learning_rate": 1.2669517409896153e-05, + "loss": 0.9155, + "step": 651 + }, + { + "epoch": 1.54, + "learning_rate": 1.2657299938912646e-05, + "loss": 0.7397, + "step": 652 + }, + { + "epoch": 1.54, + "learning_rate": 1.264508246792914e-05, + "loss": 0.8083, + "step": 653 + }, + { + "epoch": 1.55, + "learning_rate": 1.2632864996945633e-05, + "loss": 0.7667, + "step": 654 + }, + { + "epoch": 1.55, + "learning_rate": 1.2620647525962126e-05, + "loss": 0.7873, + "step": 655 + }, + { + "epoch": 1.55, + "learning_rate": 1.260843005497862e-05, + "loss": 0.7984, + "step": 656 + }, + { + "epoch": 1.55, + "learning_rate": 1.2596212583995113e-05, + "loss": 0.9473, + "step": 657 + }, + { + "epoch": 1.56, + "learning_rate": 1.2583995113011607e-05, + "loss": 1.0814, + "step": 658 + }, + { + "epoch": 1.56, + "learning_rate": 1.25717776420281e-05, + "loss": 0.8885, + "step": 659 + }, + { + "epoch": 1.56, + "learning_rate": 1.2559560171044595e-05, + "loss": 0.9122, + "step": 660 + }, + { + "epoch": 1.56, + "learning_rate": 1.2547342700061088e-05, + "loss": 0.908, + "step": 661 + }, + { + "epoch": 1.57, + "learning_rate": 1.2535125229077582e-05, + "loss": 0.8649, + "step": 662 + }, + { + "epoch": 1.57, + "learning_rate": 1.2522907758094075e-05, + "loss": 1.025, + "step": 663 + }, + { + "epoch": 1.57, + "learning_rate": 1.251069028711057e-05, + "loss": 0.861, + "step": 664 + }, + { + "epoch": 1.57, + "learning_rate": 1.2498472816127064e-05, + "loss": 0.9673, + "step": 665 + }, + { + "epoch": 1.57, + "learning_rate": 1.2486255345143557e-05, + "loss": 0.8577, + "step": 666 + }, + { + "epoch": 1.58, + "learning_rate": 1.247403787416005e-05, + "loss": 0.861, + "step": 667 + }, + { + "epoch": 1.58, + "learning_rate": 1.2461820403176544e-05, + "loss": 0.928, + "step": 668 + }, + { + "epoch": 1.58, + "learning_rate": 1.2449602932193037e-05, + "loss": 0.6446, + "step": 669 + }, + { + "epoch": 1.58, + "learning_rate": 1.243738546120953e-05, + "loss": 0.7685, + "step": 670 + }, + { + "epoch": 1.59, + "learning_rate": 1.2425167990226024e-05, + "loss": 0.765, + "step": 671 + }, + { + "epoch": 1.59, + "learning_rate": 1.2412950519242518e-05, + "loss": 0.7625, + "step": 672 + }, + { + "epoch": 1.59, + "learning_rate": 1.2400733048259011e-05, + "loss": 1.0833, + "step": 673 + }, + { + "epoch": 1.59, + "learning_rate": 1.2388515577275504e-05, + "loss": 0.9833, + "step": 674 + }, + { + "epoch": 1.6, + "learning_rate": 1.2376298106292e-05, + "loss": 0.8189, + "step": 675 + }, + { + "epoch": 1.6, + "learning_rate": 1.2364080635308493e-05, + "loss": 0.8686, + "step": 676 + }, + { + "epoch": 1.6, + "learning_rate": 1.2351863164324986e-05, + "loss": 1.1008, + "step": 677 + }, + { + "epoch": 1.6, + "learning_rate": 1.233964569334148e-05, + "loss": 0.8749, + "step": 678 + }, + { + "epoch": 1.61, + "learning_rate": 1.2327428222357973e-05, + "loss": 0.9367, + "step": 679 + }, + { + "epoch": 1.61, + "learning_rate": 1.2315210751374466e-05, + "loss": 0.8399, + "step": 680 + }, + { + "epoch": 1.61, + "learning_rate": 1.230299328039096e-05, + "loss": 0.8422, + "step": 681 + }, + { + "epoch": 1.61, + "learning_rate": 1.2290775809407453e-05, + "loss": 0.9767, + "step": 682 + }, + { + "epoch": 1.62, + "learning_rate": 1.2278558338423947e-05, + "loss": 0.9665, + "step": 683 + }, + { + "epoch": 1.62, + "learning_rate": 1.226634086744044e-05, + "loss": 0.6738, + "step": 684 + }, + { + "epoch": 1.62, + "learning_rate": 1.2254123396456933e-05, + "loss": 0.822, + "step": 685 + }, + { + "epoch": 1.62, + "learning_rate": 1.2241905925473427e-05, + "loss": 0.8574, + "step": 686 + }, + { + "epoch": 1.62, + "learning_rate": 1.222968845448992e-05, + "loss": 0.9515, + "step": 687 + }, + { + "epoch": 1.63, + "learning_rate": 1.2217470983506414e-05, + "loss": 0.7583, + "step": 688 + }, + { + "epoch": 1.63, + "learning_rate": 1.2205253512522907e-05, + "loss": 0.7396, + "step": 689 + }, + { + "epoch": 1.63, + "learning_rate": 1.2193036041539404e-05, + "loss": 1.0343, + "step": 690 + }, + { + "epoch": 1.63, + "learning_rate": 1.2180818570555897e-05, + "loss": 0.8694, + "step": 691 + }, + { + "epoch": 1.64, + "learning_rate": 1.216860109957239e-05, + "loss": 1.0403, + "step": 692 + }, + { + "epoch": 1.64, + "learning_rate": 1.2156383628588884e-05, + "loss": 0.657, + "step": 693 + }, + { + "epoch": 1.64, + "learning_rate": 1.2144166157605377e-05, + "loss": 0.8841, + "step": 694 + }, + { + "epoch": 1.64, + "learning_rate": 1.2131948686621871e-05, + "loss": 0.9936, + "step": 695 + }, + { + "epoch": 1.65, + "learning_rate": 1.2119731215638364e-05, + "loss": 0.9932, + "step": 696 + }, + { + "epoch": 1.65, + "learning_rate": 1.2107513744654858e-05, + "loss": 0.7702, + "step": 697 + }, + { + "epoch": 1.65, + "learning_rate": 1.2095296273671351e-05, + "loss": 1.8591, + "step": 698 + }, + { + "epoch": 1.65, + "learning_rate": 1.2083078802687844e-05, + "loss": 1.1554, + "step": 699 + }, + { + "epoch": 1.66, + "learning_rate": 1.2070861331704338e-05, + "loss": 0.7643, + "step": 700 + }, + { + "epoch": 1.66, + "learning_rate": 1.2058643860720831e-05, + "loss": 1.0038, + "step": 701 + }, + { + "epoch": 1.66, + "learning_rate": 1.2046426389737325e-05, + "loss": 0.9304, + "step": 702 + }, + { + "epoch": 1.66, + "learning_rate": 1.2034208918753818e-05, + "loss": 0.9719, + "step": 703 + }, + { + "epoch": 1.66, + "learning_rate": 1.2021991447770312e-05, + "loss": 0.7793, + "step": 704 + }, + { + "epoch": 1.67, + "learning_rate": 1.2009773976786807e-05, + "loss": 1.3826, + "step": 705 + }, + { + "epoch": 1.67, + "learning_rate": 1.19975565058033e-05, + "loss": 0.9339, + "step": 706 + }, + { + "epoch": 1.67, + "learning_rate": 1.1985339034819793e-05, + "loss": 0.7452, + "step": 707 + }, + { + "epoch": 1.67, + "learning_rate": 1.1973121563836287e-05, + "loss": 0.9882, + "step": 708 + }, + { + "epoch": 1.68, + "learning_rate": 1.196090409285278e-05, + "loss": 0.9004, + "step": 709 + }, + { + "epoch": 1.68, + "learning_rate": 1.1948686621869274e-05, + "loss": 1.3377, + "step": 710 + }, + { + "epoch": 1.68, + "learning_rate": 1.1936469150885767e-05, + "loss": 0.9779, + "step": 711 + }, + { + "epoch": 1.68, + "learning_rate": 1.192425167990226e-05, + "loss": 0.9391, + "step": 712 + }, + { + "epoch": 1.69, + "learning_rate": 1.1912034208918754e-05, + "loss": 1.4261, + "step": 713 + }, + { + "epoch": 1.69, + "learning_rate": 1.1899816737935247e-05, + "loss": 0.8363, + "step": 714 + }, + { + "epoch": 1.69, + "learning_rate": 1.1887599266951742e-05, + "loss": 0.8793, + "step": 715 + }, + { + "epoch": 1.69, + "learning_rate": 1.1875381795968236e-05, + "loss": 0.8424, + "step": 716 + }, + { + "epoch": 1.7, + "learning_rate": 1.1863164324984729e-05, + "loss": 0.7472, + "step": 717 + }, + { + "epoch": 1.7, + "learning_rate": 1.1850946854001223e-05, + "loss": 0.9502, + "step": 718 + }, + { + "epoch": 1.7, + "learning_rate": 1.1838729383017716e-05, + "loss": 1.3596, + "step": 719 + }, + { + "epoch": 1.7, + "learning_rate": 1.1826511912034211e-05, + "loss": 0.7848, + "step": 720 + }, + { + "epoch": 1.7, + "learning_rate": 1.1814294441050704e-05, + "loss": 0.8574, + "step": 721 + }, + { + "epoch": 1.71, + "learning_rate": 1.1802076970067198e-05, + "loss": 1.5207, + "step": 722 + }, + { + "epoch": 1.71, + "learning_rate": 1.1789859499083691e-05, + "loss": 0.9117, + "step": 723 + }, + { + "epoch": 1.71, + "learning_rate": 1.1777642028100185e-05, + "loss": 0.8671, + "step": 724 + }, + { + "epoch": 1.71, + "learning_rate": 1.1765424557116678e-05, + "loss": 0.8095, + "step": 725 + }, + { + "epoch": 1.72, + "learning_rate": 1.1753207086133171e-05, + "loss": 0.8219, + "step": 726 + }, + { + "epoch": 1.72, + "learning_rate": 1.1740989615149665e-05, + "loss": 1.3958, + "step": 727 + }, + { + "epoch": 1.72, + "learning_rate": 1.1728772144166158e-05, + "loss": 0.7198, + "step": 728 + }, + { + "epoch": 1.72, + "learning_rate": 1.1716554673182652e-05, + "loss": 0.8341, + "step": 729 + }, + { + "epoch": 1.73, + "learning_rate": 1.1704337202199145e-05, + "loss": 1.0228, + "step": 730 + }, + { + "epoch": 1.73, + "learning_rate": 1.1692119731215638e-05, + "loss": 0.8357, + "step": 731 + }, + { + "epoch": 1.73, + "learning_rate": 1.1679902260232132e-05, + "loss": 0.9477, + "step": 732 + }, + { + "epoch": 1.73, + "learning_rate": 1.1667684789248625e-05, + "loss": 0.6738, + "step": 733 + }, + { + "epoch": 1.74, + "learning_rate": 1.1655467318265119e-05, + "loss": 1.0358, + "step": 734 + }, + { + "epoch": 1.74, + "learning_rate": 1.1643249847281615e-05, + "loss": 1.1283, + "step": 735 + }, + { + "epoch": 1.74, + "learning_rate": 1.1631032376298109e-05, + "loss": 0.9415, + "step": 736 + }, + { + "epoch": 1.74, + "learning_rate": 1.1618814905314602e-05, + "loss": 0.8025, + "step": 737 + }, + { + "epoch": 1.75, + "learning_rate": 1.1606597434331096e-05, + "loss": 0.9122, + "step": 738 + }, + { + "epoch": 1.75, + "learning_rate": 1.1594379963347589e-05, + "loss": 0.8259, + "step": 739 + }, + { + "epoch": 1.75, + "learning_rate": 1.1582162492364082e-05, + "loss": 0.8689, + "step": 740 + }, + { + "epoch": 1.75, + "learning_rate": 1.1569945021380576e-05, + "loss": 0.8513, + "step": 741 + }, + { + "epoch": 1.75, + "learning_rate": 1.155772755039707e-05, + "loss": 0.9923, + "step": 742 + }, + { + "epoch": 1.76, + "learning_rate": 1.1545510079413563e-05, + "loss": 0.9475, + "step": 743 + }, + { + "epoch": 1.76, + "learning_rate": 1.1533292608430056e-05, + "loss": 0.8622, + "step": 744 + }, + { + "epoch": 1.76, + "learning_rate": 1.152107513744655e-05, + "loss": 0.8522, + "step": 745 + }, + { + "epoch": 1.76, + "learning_rate": 1.1508857666463043e-05, + "loss": 0.9629, + "step": 746 + }, + { + "epoch": 1.77, + "learning_rate": 1.1496640195479536e-05, + "loss": 1.0155, + "step": 747 + }, + { + "epoch": 1.77, + "learning_rate": 1.148442272449603e-05, + "loss": 1.0732, + "step": 748 + }, + { + "epoch": 1.77, + "learning_rate": 1.1472205253512523e-05, + "loss": 0.8798, + "step": 749 + }, + { + "epoch": 1.77, + "learning_rate": 1.1459987782529018e-05, + "loss": 0.9328, + "step": 750 + }, + { + "epoch": 1.78, + "learning_rate": 1.1447770311545512e-05, + "loss": 1.0042, + "step": 751 + }, + { + "epoch": 1.78, + "learning_rate": 1.1435552840562005e-05, + "loss": 0.7392, + "step": 752 + }, + { + "epoch": 1.78, + "learning_rate": 1.1423335369578498e-05, + "loss": 0.7546, + "step": 753 + }, + { + "epoch": 1.78, + "learning_rate": 1.1411117898594992e-05, + "loss": 0.8252, + "step": 754 + }, + { + "epoch": 1.79, + "learning_rate": 1.1398900427611485e-05, + "loss": 0.8449, + "step": 755 + }, + { + "epoch": 1.79, + "learning_rate": 1.1386682956627979e-05, + "loss": 0.9371, + "step": 756 + }, + { + "epoch": 1.79, + "learning_rate": 1.1374465485644472e-05, + "loss": 0.8685, + "step": 757 + }, + { + "epoch": 1.79, + "learning_rate": 1.1362248014660965e-05, + "loss": 0.8455, + "step": 758 + }, + { + "epoch": 1.79, + "learning_rate": 1.1350030543677459e-05, + "loss": 0.8107, + "step": 759 + }, + { + "epoch": 1.8, + "learning_rate": 1.1337813072693952e-05, + "loss": 0.7724, + "step": 760 + }, + { + "epoch": 1.8, + "learning_rate": 1.1325595601710446e-05, + "loss": 0.9819, + "step": 761 + }, + { + "epoch": 1.8, + "learning_rate": 1.1313378130726939e-05, + "loss": 0.8345, + "step": 762 + }, + { + "epoch": 1.8, + "learning_rate": 1.1301160659743432e-05, + "loss": 0.7377, + "step": 763 + }, + { + "epoch": 1.81, + "learning_rate": 1.1288943188759926e-05, + "loss": 0.8958, + "step": 764 + }, + { + "epoch": 1.81, + "learning_rate": 1.1276725717776423e-05, + "loss": 0.7704, + "step": 765 + }, + { + "epoch": 1.81, + "learning_rate": 1.1264508246792916e-05, + "loss": 1.0557, + "step": 766 + }, + { + "epoch": 1.81, + "learning_rate": 1.125229077580941e-05, + "loss": 0.5956, + "step": 767 + }, + { + "epoch": 1.82, + "learning_rate": 1.1240073304825903e-05, + "loss": 0.92, + "step": 768 + }, + { + "epoch": 1.82, + "learning_rate": 1.1227855833842396e-05, + "loss": 1.0114, + "step": 769 + }, + { + "epoch": 1.82, + "learning_rate": 1.121563836285889e-05, + "loss": 0.9246, + "step": 770 + }, + { + "epoch": 1.82, + "learning_rate": 1.1203420891875383e-05, + "loss": 0.8278, + "step": 771 + }, + { + "epoch": 1.83, + "learning_rate": 1.1191203420891876e-05, + "loss": 0.9015, + "step": 772 + }, + { + "epoch": 1.83, + "learning_rate": 1.117898594990837e-05, + "loss": 0.9138, + "step": 773 + }, + { + "epoch": 1.83, + "learning_rate": 1.1166768478924863e-05, + "loss": 0.7906, + "step": 774 + }, + { + "epoch": 1.83, + "learning_rate": 1.1154551007941357e-05, + "loss": 0.8134, + "step": 775 + }, + { + "epoch": 1.83, + "learning_rate": 1.114233353695785e-05, + "loss": 1.4783, + "step": 776 + }, + { + "epoch": 1.84, + "learning_rate": 1.1130116065974343e-05, + "loss": 1.1252, + "step": 777 + }, + { + "epoch": 1.84, + "learning_rate": 1.1117898594990837e-05, + "loss": 0.9041, + "step": 778 + }, + { + "epoch": 1.84, + "learning_rate": 1.110568112400733e-05, + "loss": 0.785, + "step": 779 + }, + { + "epoch": 1.84, + "learning_rate": 1.1093463653023825e-05, + "loss": 0.8366, + "step": 780 + }, + { + "epoch": 1.85, + "learning_rate": 1.1081246182040319e-05, + "loss": 0.9032, + "step": 781 + }, + { + "epoch": 1.85, + "learning_rate": 1.1069028711056812e-05, + "loss": 0.92, + "step": 782 + }, + { + "epoch": 1.85, + "learning_rate": 1.1056811240073305e-05, + "loss": 0.8575, + "step": 783 + }, + { + "epoch": 1.85, + "learning_rate": 1.10445937690898e-05, + "loss": 0.726, + "step": 784 + }, + { + "epoch": 1.86, + "learning_rate": 1.1032376298106294e-05, + "loss": 0.9062, + "step": 785 + }, + { + "epoch": 1.86, + "learning_rate": 1.1020158827122787e-05, + "loss": 0.708, + "step": 786 + }, + { + "epoch": 1.86, + "learning_rate": 1.100794135613928e-05, + "loss": 0.7986, + "step": 787 + }, + { + "epoch": 1.86, + "learning_rate": 1.0995723885155774e-05, + "loss": 1.0306, + "step": 788 + }, + { + "epoch": 1.87, + "learning_rate": 1.0983506414172268e-05, + "loss": 0.7421, + "step": 789 + }, + { + "epoch": 1.87, + "learning_rate": 1.0971288943188761e-05, + "loss": 0.9103, + "step": 790 + }, + { + "epoch": 1.87, + "learning_rate": 1.0959071472205254e-05, + "loss": 1.7107, + "step": 791 + }, + { + "epoch": 1.87, + "learning_rate": 1.0946854001221748e-05, + "loss": 0.887, + "step": 792 + }, + { + "epoch": 1.88, + "learning_rate": 1.0934636530238241e-05, + "loss": 0.8573, + "step": 793 + }, + { + "epoch": 1.88, + "learning_rate": 1.0922419059254735e-05, + "loss": 0.7963, + "step": 794 + }, + { + "epoch": 1.88, + "learning_rate": 1.091020158827123e-05, + "loss": 0.9545, + "step": 795 + }, + { + "epoch": 1.88, + "learning_rate": 1.0897984117287723e-05, + "loss": 0.7702, + "step": 796 + }, + { + "epoch": 1.88, + "learning_rate": 1.0885766646304216e-05, + "loss": 1.0585, + "step": 797 + }, + { + "epoch": 1.89, + "learning_rate": 1.087354917532071e-05, + "loss": 0.8233, + "step": 798 + }, + { + "epoch": 1.89, + "learning_rate": 1.0861331704337203e-05, + "loss": 0.7882, + "step": 799 + }, + { + "epoch": 1.89, + "learning_rate": 1.0849114233353697e-05, + "loss": 0.7589, + "step": 800 + }, + { + "epoch": 1.89, + "learning_rate": 1.083689676237019e-05, + "loss": 0.9228, + "step": 801 + }, + { + "epoch": 1.9, + "learning_rate": 1.0824679291386684e-05, + "loss": 0.9933, + "step": 802 + }, + { + "epoch": 1.9, + "learning_rate": 1.0812461820403177e-05, + "loss": 0.8297, + "step": 803 + }, + { + "epoch": 1.9, + "learning_rate": 1.080024434941967e-05, + "loss": 0.7904, + "step": 804 + }, + { + "epoch": 1.9, + "learning_rate": 1.0788026878436164e-05, + "loss": 1.239, + "step": 805 + }, + { + "epoch": 1.91, + "learning_rate": 1.0775809407452657e-05, + "loss": 0.7186, + "step": 806 + }, + { + "epoch": 1.91, + "learning_rate": 1.076359193646915e-05, + "loss": 1.0742, + "step": 807 + }, + { + "epoch": 1.91, + "learning_rate": 1.0751374465485644e-05, + "loss": 0.6076, + "step": 808 + }, + { + "epoch": 1.91, + "learning_rate": 1.0739156994502137e-05, + "loss": 0.7574, + "step": 809 + }, + { + "epoch": 1.92, + "learning_rate": 1.0726939523518634e-05, + "loss": 0.7058, + "step": 810 + }, + { + "epoch": 1.92, + "learning_rate": 1.0714722052535128e-05, + "loss": 0.7694, + "step": 811 + }, + { + "epoch": 1.92, + "learning_rate": 1.0702504581551621e-05, + "loss": 0.6717, + "step": 812 + }, + { + "epoch": 1.92, + "learning_rate": 1.0690287110568114e-05, + "loss": 1.0446, + "step": 813 + }, + { + "epoch": 1.92, + "learning_rate": 1.0678069639584608e-05, + "loss": 0.8143, + "step": 814 + }, + { + "epoch": 1.93, + "learning_rate": 1.0665852168601101e-05, + "loss": 0.8326, + "step": 815 + }, + { + "epoch": 1.93, + "learning_rate": 1.0653634697617595e-05, + "loss": 1.0398, + "step": 816 + }, + { + "epoch": 1.93, + "learning_rate": 1.0641417226634088e-05, + "loss": 0.6769, + "step": 817 + }, + { + "epoch": 1.93, + "learning_rate": 1.0629199755650581e-05, + "loss": 0.8909, + "step": 818 + }, + { + "epoch": 1.94, + "learning_rate": 1.0616982284667075e-05, + "loss": 0.7196, + "step": 819 + }, + { + "epoch": 1.94, + "learning_rate": 1.0604764813683568e-05, + "loss": 0.6081, + "step": 820 + }, + { + "epoch": 1.94, + "learning_rate": 1.0592547342700062e-05, + "loss": 0.8479, + "step": 821 + }, + { + "epoch": 1.94, + "learning_rate": 1.0580329871716555e-05, + "loss": 0.6626, + "step": 822 + }, + { + "epoch": 1.95, + "learning_rate": 1.0568112400733048e-05, + "loss": 0.7727, + "step": 823 + }, + { + "epoch": 1.95, + "learning_rate": 1.0555894929749542e-05, + "loss": 0.835, + "step": 824 + }, + { + "epoch": 1.95, + "learning_rate": 1.0543677458766037e-05, + "loss": 1.5359, + "step": 825 + }, + { + "epoch": 1.95, + "learning_rate": 1.053145998778253e-05, + "loss": 0.8521, + "step": 826 + }, + { + "epoch": 1.96, + "learning_rate": 1.0519242516799024e-05, + "loss": 0.7732, + "step": 827 + }, + { + "epoch": 1.96, + "learning_rate": 1.0507025045815517e-05, + "loss": 0.749, + "step": 828 + }, + { + "epoch": 1.96, + "learning_rate": 1.049480757483201e-05, + "loss": 0.9086, + "step": 829 + }, + { + "epoch": 1.96, + "learning_rate": 1.0482590103848504e-05, + "loss": 0.8064, + "step": 830 + }, + { + "epoch": 1.96, + "learning_rate": 1.0470372632864997e-05, + "loss": 0.8561, + "step": 831 + }, + { + "epoch": 1.97, + "learning_rate": 1.045815516188149e-05, + "loss": 1.0735, + "step": 832 + }, + { + "epoch": 1.97, + "learning_rate": 1.0445937690897984e-05, + "loss": 0.7255, + "step": 833 + }, + { + "epoch": 1.97, + "learning_rate": 1.0433720219914477e-05, + "loss": 1.054, + "step": 834 + }, + { + "epoch": 1.97, + "learning_rate": 1.0421502748930973e-05, + "loss": 0.9361, + "step": 835 + }, + { + "epoch": 1.98, + "learning_rate": 1.0409285277947466e-05, + "loss": 0.8865, + "step": 836 + }, + { + "epoch": 1.98, + "learning_rate": 1.039706780696396e-05, + "loss": 0.916, + "step": 837 + }, + { + "epoch": 1.98, + "learning_rate": 1.0384850335980453e-05, + "loss": 0.8838, + "step": 838 + }, + { + "epoch": 1.98, + "learning_rate": 1.0372632864996946e-05, + "loss": 0.6332, + "step": 839 + }, + { + "epoch": 1.99, + "learning_rate": 1.0360415394013441e-05, + "loss": 0.7289, + "step": 840 + }, + { + "epoch": 1.99, + "learning_rate": 1.0348197923029935e-05, + "loss": 1.024, + "step": 841 + }, + { + "epoch": 1.99, + "learning_rate": 1.0335980452046428e-05, + "loss": 0.8003, + "step": 842 + }, + { + "epoch": 1.99, + "learning_rate": 1.0323762981062921e-05, + "loss": 0.8745, + "step": 843 + }, + { + "epoch": 2.0, + "learning_rate": 1.0311545510079415e-05, + "loss": 0.7543, + "step": 844 + }, + { + "epoch": 2.0, + "learning_rate": 1.0299328039095908e-05, + "loss": 0.9404, + "step": 845 + }, + { + "epoch": 2.0, + "learning_rate": 1.0287110568112402e-05, + "loss": 0.7756, + "step": 846 + }, + { + "epoch": 2.0, + "learning_rate": 1.0274893097128895e-05, + "loss": 0.7677, + "step": 847 + }, + { + "epoch": 2.01, + "learning_rate": 1.0262675626145388e-05, + "loss": 0.8376, + "step": 848 + }, + { + "epoch": 2.01, + "learning_rate": 1.0250458155161882e-05, + "loss": 0.9447, + "step": 849 + }, + { + "epoch": 2.01, + "learning_rate": 1.0238240684178375e-05, + "loss": 0.6205, + "step": 850 + }, + { + "epoch": 2.01, + "learning_rate": 1.0226023213194869e-05, + "loss": 0.8075, + "step": 851 + }, + { + "epoch": 2.01, + "learning_rate": 1.0213805742211362e-05, + "loss": 0.8011, + "step": 852 + }, + { + "epoch": 2.02, + "learning_rate": 1.0201588271227855e-05, + "loss": 0.7242, + "step": 853 + }, + { + "epoch": 2.02, + "learning_rate": 1.0189370800244349e-05, + "loss": 0.7785, + "step": 854 + }, + { + "epoch": 2.02, + "learning_rate": 1.0177153329260846e-05, + "loss": 0.8389, + "step": 855 + }, + { + "epoch": 2.02, + "learning_rate": 1.0164935858277339e-05, + "loss": 0.9529, + "step": 856 + }, + { + "epoch": 2.03, + "learning_rate": 1.0152718387293832e-05, + "loss": 0.7059, + "step": 857 + }, + { + "epoch": 2.03, + "learning_rate": 1.0140500916310326e-05, + "loss": 1.4115, + "step": 858 + }, + { + "epoch": 2.03, + "learning_rate": 1.012828344532682e-05, + "loss": 0.8459, + "step": 859 + }, + { + "epoch": 2.03, + "learning_rate": 1.0116065974343313e-05, + "loss": 1.2616, + "step": 860 + }, + { + "epoch": 2.04, + "learning_rate": 1.0103848503359806e-05, + "loss": 0.8513, + "step": 861 + }, + { + "epoch": 2.04, + "learning_rate": 1.00916310323763e-05, + "loss": 0.7815, + "step": 862 + }, + { + "epoch": 2.04, + "learning_rate": 1.0079413561392793e-05, + "loss": 0.7323, + "step": 863 + }, + { + "epoch": 2.04, + "learning_rate": 1.0067196090409286e-05, + "loss": 0.789, + "step": 864 + }, + { + "epoch": 2.05, + "learning_rate": 1.005497861942578e-05, + "loss": 1.5127, + "step": 865 + }, + { + "epoch": 2.05, + "learning_rate": 1.0042761148442273e-05, + "loss": 1.5117, + "step": 866 + }, + { + "epoch": 2.05, + "learning_rate": 1.0030543677458766e-05, + "loss": 0.6835, + "step": 867 + }, + { + "epoch": 2.05, + "learning_rate": 1.001832620647526e-05, + "loss": 0.7692, + "step": 868 + }, + { + "epoch": 2.05, + "learning_rate": 1.0006108735491753e-05, + "loss": 0.6304, + "step": 869 + }, + { + "epoch": 2.06, + "learning_rate": 9.993891264508248e-06, + "loss": 0.6418, + "step": 870 + }, + { + "epoch": 2.06, + "learning_rate": 9.981673793524742e-06, + "loss": 0.8208, + "step": 871 + }, + { + "epoch": 2.06, + "learning_rate": 9.969456322541235e-06, + "loss": 0.9958, + "step": 872 + }, + { + "epoch": 2.06, + "learning_rate": 9.957238851557729e-06, + "loss": 0.9278, + "step": 873 + }, + { + "epoch": 2.07, + "learning_rate": 9.945021380574222e-06, + "loss": 1.0567, + "step": 874 + }, + { + "epoch": 2.07, + "learning_rate": 9.932803909590715e-06, + "loss": 0.8011, + "step": 875 + }, + { + "epoch": 2.07, + "learning_rate": 9.920586438607209e-06, + "loss": 0.8224, + "step": 876 + }, + { + "epoch": 2.07, + "learning_rate": 9.908368967623702e-06, + "loss": 0.8474, + "step": 877 + }, + { + "epoch": 2.08, + "learning_rate": 9.896151496640196e-06, + "loss": 0.7681, + "step": 878 + }, + { + "epoch": 2.08, + "learning_rate": 9.883934025656689e-06, + "loss": 0.7248, + "step": 879 + }, + { + "epoch": 2.08, + "learning_rate": 9.871716554673182e-06, + "loss": 0.7187, + "step": 880 + }, + { + "epoch": 2.08, + "learning_rate": 9.859499083689677e-06, + "loss": 0.7447, + "step": 881 + }, + { + "epoch": 2.09, + "learning_rate": 9.847281612706171e-06, + "loss": 1.015, + "step": 882 + }, + { + "epoch": 2.09, + "learning_rate": 9.835064141722664e-06, + "loss": 0.6698, + "step": 883 + }, + { + "epoch": 2.09, + "learning_rate": 9.822846670739158e-06, + "loss": 0.7435, + "step": 884 + }, + { + "epoch": 2.09, + "learning_rate": 9.810629199755651e-06, + "loss": 0.7527, + "step": 885 + }, + { + "epoch": 2.1, + "learning_rate": 9.798411728772144e-06, + "loss": 0.5812, + "step": 886 + }, + { + "epoch": 2.1, + "learning_rate": 9.786194257788638e-06, + "loss": 1.1017, + "step": 887 + }, + { + "epoch": 2.1, + "learning_rate": 9.773976786805131e-06, + "loss": 0.8461, + "step": 888 + }, + { + "epoch": 2.1, + "learning_rate": 9.761759315821626e-06, + "loss": 0.6109, + "step": 889 + }, + { + "epoch": 2.1, + "learning_rate": 9.74954184483812e-06, + "loss": 0.8444, + "step": 890 + }, + { + "epoch": 2.11, + "learning_rate": 9.737324373854613e-06, + "loss": 0.9308, + "step": 891 + }, + { + "epoch": 2.11, + "learning_rate": 9.725106902871107e-06, + "loss": 0.7788, + "step": 892 + }, + { + "epoch": 2.11, + "learning_rate": 9.7128894318876e-06, + "loss": 0.7444, + "step": 893 + }, + { + "epoch": 2.11, + "learning_rate": 9.700671960904093e-06, + "loss": 0.7446, + "step": 894 + }, + { + "epoch": 2.12, + "learning_rate": 9.688454489920587e-06, + "loss": 0.7429, + "step": 895 + }, + { + "epoch": 2.12, + "learning_rate": 9.676237018937082e-06, + "loss": 0.6309, + "step": 896 + }, + { + "epoch": 2.12, + "learning_rate": 9.664019547953575e-06, + "loss": 0.7665, + "step": 897 + }, + { + "epoch": 2.12, + "learning_rate": 9.651802076970069e-06, + "loss": 0.6313, + "step": 898 + }, + { + "epoch": 2.13, + "learning_rate": 9.639584605986562e-06, + "loss": 0.8622, + "step": 899 + }, + { + "epoch": 2.13, + "learning_rate": 9.627367135003055e-06, + "loss": 0.6971, + "step": 900 + }, + { + "epoch": 2.13, + "learning_rate": 9.615149664019549e-06, + "loss": 0.8956, + "step": 901 + }, + { + "epoch": 2.13, + "learning_rate": 9.602932193036042e-06, + "loss": 0.9075, + "step": 902 + }, + { + "epoch": 2.14, + "learning_rate": 9.590714722052536e-06, + "loss": 0.7028, + "step": 903 + }, + { + "epoch": 2.14, + "learning_rate": 9.578497251069029e-06, + "loss": 0.5738, + "step": 904 + }, + { + "epoch": 2.14, + "learning_rate": 9.566279780085524e-06, + "loss": 0.7779, + "step": 905 + }, + { + "epoch": 2.14, + "learning_rate": 9.554062309102018e-06, + "loss": 0.5873, + "step": 906 + }, + { + "epoch": 2.14, + "learning_rate": 9.541844838118511e-06, + "loss": 1.0006, + "step": 907 + }, + { + "epoch": 2.15, + "learning_rate": 9.529627367135004e-06, + "loss": 1.0752, + "step": 908 + }, + { + "epoch": 2.15, + "learning_rate": 9.517409896151498e-06, + "loss": 0.8232, + "step": 909 + }, + { + "epoch": 2.15, + "learning_rate": 9.505192425167991e-06, + "loss": 0.7904, + "step": 910 + }, + { + "epoch": 2.15, + "learning_rate": 9.492974954184485e-06, + "loss": 1.0862, + "step": 911 + }, + { + "epoch": 2.16, + "learning_rate": 9.480757483200978e-06, + "loss": 0.7623, + "step": 912 + }, + { + "epoch": 2.16, + "learning_rate": 9.468540012217471e-06, + "loss": 0.6963, + "step": 913 + }, + { + "epoch": 2.16, + "learning_rate": 9.456322541233965e-06, + "loss": 0.96, + "step": 914 + }, + { + "epoch": 2.16, + "learning_rate": 9.444105070250458e-06, + "loss": 0.5665, + "step": 915 + }, + { + "epoch": 2.17, + "learning_rate": 9.431887599266952e-06, + "loss": 0.8081, + "step": 916 + }, + { + "epoch": 2.17, + "learning_rate": 9.419670128283445e-06, + "loss": 0.7145, + "step": 917 + }, + { + "epoch": 2.17, + "learning_rate": 9.407452657299938e-06, + "loss": 0.8547, + "step": 918 + }, + { + "epoch": 2.17, + "learning_rate": 9.395235186316434e-06, + "loss": 0.9507, + "step": 919 + }, + { + "epoch": 2.18, + "learning_rate": 9.383017715332927e-06, + "loss": 0.9185, + "step": 920 + }, + { + "epoch": 2.18, + "learning_rate": 9.37080024434942e-06, + "loss": 0.6154, + "step": 921 + }, + { + "epoch": 2.18, + "learning_rate": 9.358582773365914e-06, + "loss": 0.5515, + "step": 922 + }, + { + "epoch": 2.18, + "learning_rate": 9.346365302382407e-06, + "loss": 0.6624, + "step": 923 + }, + { + "epoch": 2.18, + "learning_rate": 9.3341478313989e-06, + "loss": 0.5877, + "step": 924 + }, + { + "epoch": 2.19, + "learning_rate": 9.321930360415394e-06, + "loss": 0.7074, + "step": 925 + }, + { + "epoch": 2.19, + "learning_rate": 9.309712889431889e-06, + "loss": 0.9121, + "step": 926 + }, + { + "epoch": 2.19, + "learning_rate": 9.297495418448382e-06, + "loss": 0.8488, + "step": 927 + }, + { + "epoch": 2.19, + "learning_rate": 9.285277947464876e-06, + "loss": 0.7679, + "step": 928 + }, + { + "epoch": 2.2, + "learning_rate": 9.27306047648137e-06, + "loss": 0.8228, + "step": 929 + }, + { + "epoch": 2.2, + "learning_rate": 9.260843005497863e-06, + "loss": 0.875, + "step": 930 + }, + { + "epoch": 2.2, + "learning_rate": 9.248625534514356e-06, + "loss": 0.6674, + "step": 931 + }, + { + "epoch": 2.2, + "learning_rate": 9.23640806353085e-06, + "loss": 0.6769, + "step": 932 + }, + { + "epoch": 2.21, + "learning_rate": 9.224190592547343e-06, + "loss": 0.9607, + "step": 933 + }, + { + "epoch": 2.21, + "learning_rate": 9.211973121563838e-06, + "loss": 0.8768, + "step": 934 + }, + { + "epoch": 2.21, + "learning_rate": 9.199755650580331e-06, + "loss": 1.0233, + "step": 935 + }, + { + "epoch": 2.21, + "learning_rate": 9.187538179596825e-06, + "loss": 0.928, + "step": 936 + }, + { + "epoch": 2.22, + "learning_rate": 9.175320708613318e-06, + "loss": 0.9588, + "step": 937 + }, + { + "epoch": 2.22, + "learning_rate": 9.163103237629812e-06, + "loss": 0.7672, + "step": 938 + }, + { + "epoch": 2.22, + "learning_rate": 9.150885766646305e-06, + "loss": 0.6834, + "step": 939 + }, + { + "epoch": 2.22, + "learning_rate": 9.138668295662798e-06, + "loss": 0.9839, + "step": 940 + }, + { + "epoch": 2.23, + "learning_rate": 9.126450824679293e-06, + "loss": 0.8132, + "step": 941 + }, + { + "epoch": 2.23, + "learning_rate": 9.114233353695787e-06, + "loss": 0.6736, + "step": 942 + }, + { + "epoch": 2.23, + "learning_rate": 9.10201588271228e-06, + "loss": 0.7579, + "step": 943 + }, + { + "epoch": 2.23, + "learning_rate": 9.089798411728774e-06, + "loss": 0.8888, + "step": 944 + }, + { + "epoch": 2.23, + "learning_rate": 9.077580940745267e-06, + "loss": 0.6649, + "step": 945 + }, + { + "epoch": 2.24, + "learning_rate": 9.06536346976176e-06, + "loss": 0.6923, + "step": 946 + }, + { + "epoch": 2.24, + "learning_rate": 9.053145998778254e-06, + "loss": 0.787, + "step": 947 + }, + { + "epoch": 2.24, + "learning_rate": 9.040928527794747e-06, + "loss": 1.0507, + "step": 948 + }, + { + "epoch": 2.24, + "learning_rate": 9.02871105681124e-06, + "loss": 0.7555, + "step": 949 + }, + { + "epoch": 2.25, + "learning_rate": 9.016493585827734e-06, + "loss": 0.6978, + "step": 950 + }, + { + "epoch": 2.25, + "learning_rate": 9.004276114844227e-06, + "loss": 0.9231, + "step": 951 + }, + { + "epoch": 2.25, + "learning_rate": 8.992058643860721e-06, + "loss": 0.9051, + "step": 952 + }, + { + "epoch": 2.25, + "learning_rate": 8.979841172877214e-06, + "loss": 0.7771, + "step": 953 + }, + { + "epoch": 2.26, + "learning_rate": 8.967623701893708e-06, + "loss": 0.7094, + "step": 954 + }, + { + "epoch": 2.26, + "learning_rate": 8.955406230910201e-06, + "loss": 0.7884, + "step": 955 + }, + { + "epoch": 2.26, + "learning_rate": 8.943188759926696e-06, + "loss": 0.6829, + "step": 956 + }, + { + "epoch": 2.26, + "learning_rate": 8.93097128894319e-06, + "loss": 0.8024, + "step": 957 + }, + { + "epoch": 2.27, + "learning_rate": 8.918753817959683e-06, + "loss": 0.6655, + "step": 958 + }, + { + "epoch": 2.27, + "learning_rate": 8.906536346976176e-06, + "loss": 0.9103, + "step": 959 + }, + { + "epoch": 2.27, + "learning_rate": 8.89431887599267e-06, + "loss": 0.7607, + "step": 960 + }, + { + "epoch": 2.27, + "learning_rate": 8.882101405009163e-06, + "loss": 0.6788, + "step": 961 + }, + { + "epoch": 2.27, + "learning_rate": 8.869883934025657e-06, + "loss": 0.5413, + "step": 962 + }, + { + "epoch": 2.28, + "learning_rate": 8.85766646304215e-06, + "loss": 0.76, + "step": 963 + }, + { + "epoch": 2.28, + "learning_rate": 8.845448992058645e-06, + "loss": 0.7689, + "step": 964 + }, + { + "epoch": 2.28, + "learning_rate": 8.833231521075138e-06, + "loss": 0.8106, + "step": 965 + }, + { + "epoch": 2.28, + "learning_rate": 8.821014050091632e-06, + "loss": 0.8543, + "step": 966 + }, + { + "epoch": 2.29, + "learning_rate": 8.808796579108125e-06, + "loss": 0.7431, + "step": 967 + }, + { + "epoch": 2.29, + "learning_rate": 8.796579108124619e-06, + "loss": 0.812, + "step": 968 + }, + { + "epoch": 2.29, + "learning_rate": 8.784361637141112e-06, + "loss": 0.8223, + "step": 969 + }, + { + "epoch": 2.29, + "learning_rate": 8.772144166157605e-06, + "loss": 0.7146, + "step": 970 + }, + { + "epoch": 2.3, + "learning_rate": 8.7599266951741e-06, + "loss": 0.8914, + "step": 971 + }, + { + "epoch": 2.3, + "learning_rate": 8.747709224190594e-06, + "loss": 0.797, + "step": 972 + }, + { + "epoch": 2.3, + "learning_rate": 8.735491753207087e-06, + "loss": 0.6721, + "step": 973 + }, + { + "epoch": 2.3, + "learning_rate": 8.72327428222358e-06, + "loss": 0.9, + "step": 974 + }, + { + "epoch": 2.31, + "learning_rate": 8.711056811240074e-06, + "loss": 0.67, + "step": 975 + }, + { + "epoch": 2.31, + "learning_rate": 8.698839340256568e-06, + "loss": 0.7884, + "step": 976 + }, + { + "epoch": 2.31, + "learning_rate": 8.686621869273061e-06, + "loss": 0.7194, + "step": 977 + }, + { + "epoch": 2.31, + "learning_rate": 8.674404398289554e-06, + "loss": 1.1244, + "step": 978 + }, + { + "epoch": 2.31, + "learning_rate": 8.66218692730605e-06, + "loss": 0.8656, + "step": 979 + }, + { + "epoch": 2.32, + "learning_rate": 8.649969456322543e-06, + "loss": 0.7089, + "step": 980 + }, + { + "epoch": 2.32, + "learning_rate": 8.637751985339036e-06, + "loss": 0.9729, + "step": 981 + }, + { + "epoch": 2.32, + "learning_rate": 8.62553451435553e-06, + "loss": 0.9135, + "step": 982 + }, + { + "epoch": 2.32, + "learning_rate": 8.613317043372023e-06, + "loss": 0.8561, + "step": 983 + }, + { + "epoch": 2.33, + "learning_rate": 8.601099572388516e-06, + "loss": 1.6011, + "step": 984 + }, + { + "epoch": 2.33, + "learning_rate": 8.58888210140501e-06, + "loss": 0.7282, + "step": 985 + }, + { + "epoch": 2.33, + "learning_rate": 8.576664630421503e-06, + "loss": 0.7882, + "step": 986 + }, + { + "epoch": 2.33, + "learning_rate": 8.564447159437997e-06, + "loss": 0.8559, + "step": 987 + }, + { + "epoch": 2.34, + "learning_rate": 8.55222968845449e-06, + "loss": 0.62, + "step": 988 + }, + { + "epoch": 2.34, + "learning_rate": 8.540012217470983e-06, + "loss": 0.6588, + "step": 989 + }, + { + "epoch": 2.34, + "learning_rate": 8.527794746487477e-06, + "loss": 0.7994, + "step": 990 + }, + { + "epoch": 2.34, + "learning_rate": 8.515577275503972e-06, + "loss": 0.9582, + "step": 991 + }, + { + "epoch": 2.35, + "learning_rate": 8.503359804520465e-06, + "loss": 0.9456, + "step": 992 + }, + { + "epoch": 2.35, + "learning_rate": 8.491142333536959e-06, + "loss": 0.7995, + "step": 993 + }, + { + "epoch": 2.35, + "learning_rate": 8.478924862553452e-06, + "loss": 0.8144, + "step": 994 + }, + { + "epoch": 2.35, + "learning_rate": 8.466707391569946e-06, + "loss": 0.851, + "step": 995 + }, + { + "epoch": 2.36, + "learning_rate": 8.454489920586439e-06, + "loss": 1.0865, + "step": 996 + }, + { + "epoch": 2.36, + "learning_rate": 8.442272449602932e-06, + "loss": 0.8716, + "step": 997 + }, + { + "epoch": 2.36, + "learning_rate": 8.430054978619426e-06, + "loss": 1.687, + "step": 998 + }, + { + "epoch": 2.36, + "learning_rate": 8.41783750763592e-06, + "loss": 0.6964, + "step": 999 + }, + { + "epoch": 2.36, + "learning_rate": 8.405620036652413e-06, + "loss": 1.0146, + "step": 1000 + }, + { + "epoch": 2.37, + "learning_rate": 8.393402565668908e-06, + "loss": 0.6478, + "step": 1001 + }, + { + "epoch": 2.37, + "learning_rate": 8.381185094685401e-06, + "loss": 0.5895, + "step": 1002 + }, + { + "epoch": 2.37, + "learning_rate": 8.368967623701895e-06, + "loss": 0.8438, + "step": 1003 + }, + { + "epoch": 2.37, + "learning_rate": 8.356750152718388e-06, + "loss": 0.7302, + "step": 1004 + }, + { + "epoch": 2.38, + "learning_rate": 8.344532681734881e-06, + "loss": 0.9506, + "step": 1005 + }, + { + "epoch": 2.38, + "learning_rate": 8.332315210751375e-06, + "loss": 1.1109, + "step": 1006 + }, + { + "epoch": 2.38, + "learning_rate": 8.320097739767868e-06, + "loss": 0.74, + "step": 1007 + }, + { + "epoch": 2.38, + "learning_rate": 8.307880268784362e-06, + "loss": 0.8107, + "step": 1008 + }, + { + "epoch": 2.39, + "learning_rate": 8.295662797800857e-06, + "loss": 0.7339, + "step": 1009 + }, + { + "epoch": 2.39, + "learning_rate": 8.28344532681735e-06, + "loss": 0.6967, + "step": 1010 + }, + { + "epoch": 2.39, + "learning_rate": 8.271227855833843e-06, + "loss": 0.6767, + "step": 1011 + }, + { + "epoch": 2.39, + "learning_rate": 8.259010384850337e-06, + "loss": 0.7822, + "step": 1012 + }, + { + "epoch": 2.4, + "learning_rate": 8.24679291386683e-06, + "loss": 0.7417, + "step": 1013 + }, + { + "epoch": 2.4, + "learning_rate": 8.234575442883324e-06, + "loss": 0.6725, + "step": 1014 + }, + { + "epoch": 2.4, + "learning_rate": 8.222357971899817e-06, + "loss": 0.7013, + "step": 1015 + }, + { + "epoch": 2.4, + "learning_rate": 8.210140500916312e-06, + "loss": 0.8832, + "step": 1016 + }, + { + "epoch": 2.4, + "learning_rate": 8.197923029932806e-06, + "loss": 0.9696, + "step": 1017 + }, + { + "epoch": 2.41, + "learning_rate": 8.185705558949299e-06, + "loss": 1.0827, + "step": 1018 + }, + { + "epoch": 2.41, + "learning_rate": 8.173488087965792e-06, + "loss": 0.78, + "step": 1019 + }, + { + "epoch": 2.41, + "learning_rate": 8.161270616982286e-06, + "loss": 0.9643, + "step": 1020 + }, + { + "epoch": 2.41, + "learning_rate": 8.149053145998779e-06, + "loss": 0.7627, + "step": 1021 + }, + { + "epoch": 2.42, + "learning_rate": 8.136835675015273e-06, + "loss": 0.6634, + "step": 1022 + }, + { + "epoch": 2.42, + "learning_rate": 8.124618204031766e-06, + "loss": 0.8787, + "step": 1023 + }, + { + "epoch": 2.42, + "learning_rate": 8.11240073304826e-06, + "loss": 0.9677, + "step": 1024 + }, + { + "epoch": 2.42, + "learning_rate": 8.100183262064754e-06, + "loss": 0.8346, + "step": 1025 + }, + { + "epoch": 2.43, + "learning_rate": 8.087965791081248e-06, + "loss": 0.824, + "step": 1026 + }, + { + "epoch": 2.43, + "learning_rate": 8.075748320097741e-06, + "loss": 0.7037, + "step": 1027 + }, + { + "epoch": 2.43, + "learning_rate": 8.063530849114235e-06, + "loss": 0.7988, + "step": 1028 + }, + { + "epoch": 2.43, + "learning_rate": 8.051313378130728e-06, + "loss": 0.7762, + "step": 1029 + }, + { + "epoch": 2.44, + "learning_rate": 8.039095907147221e-06, + "loss": 0.9513, + "step": 1030 + }, + { + "epoch": 2.44, + "learning_rate": 8.026878436163715e-06, + "loss": 0.6713, + "step": 1031 + }, + { + "epoch": 2.44, + "learning_rate": 8.014660965180208e-06, + "loss": 0.6979, + "step": 1032 + }, + { + "epoch": 2.44, + "learning_rate": 8.002443494196702e-06, + "loss": 1.0106, + "step": 1033 + }, + { + "epoch": 2.44, + "learning_rate": 7.990226023213195e-06, + "loss": 0.756, + "step": 1034 + }, + { + "epoch": 2.45, + "learning_rate": 7.978008552229688e-06, + "loss": 0.9929, + "step": 1035 + }, + { + "epoch": 2.45, + "learning_rate": 7.965791081246182e-06, + "loss": 0.8362, + "step": 1036 + }, + { + "epoch": 2.45, + "learning_rate": 7.953573610262675e-06, + "loss": 0.9016, + "step": 1037 + }, + { + "epoch": 2.45, + "learning_rate": 7.941356139279169e-06, + "loss": 0.7216, + "step": 1038 + }, + { + "epoch": 2.46, + "learning_rate": 7.929138668295664e-06, + "loss": 0.8508, + "step": 1039 + }, + { + "epoch": 2.46, + "learning_rate": 7.916921197312157e-06, + "loss": 0.5999, + "step": 1040 + }, + { + "epoch": 2.46, + "learning_rate": 7.90470372632865e-06, + "loss": 0.64, + "step": 1041 + }, + { + "epoch": 2.46, + "learning_rate": 7.892486255345144e-06, + "loss": 1.0262, + "step": 1042 + }, + { + "epoch": 2.47, + "learning_rate": 7.880268784361637e-06, + "loss": 0.7194, + "step": 1043 + }, + { + "epoch": 2.47, + "learning_rate": 7.86805131337813e-06, + "loss": 0.8673, + "step": 1044 + }, + { + "epoch": 2.47, + "learning_rate": 7.855833842394624e-06, + "loss": 0.9482, + "step": 1045 + }, + { + "epoch": 2.47, + "learning_rate": 7.84361637141112e-06, + "loss": 0.9424, + "step": 1046 + }, + { + "epoch": 2.48, + "learning_rate": 7.831398900427613e-06, + "loss": 0.8499, + "step": 1047 + }, + { + "epoch": 2.48, + "learning_rate": 7.819181429444106e-06, + "loss": 0.7288, + "step": 1048 + }, + { + "epoch": 2.48, + "learning_rate": 7.8069639584606e-06, + "loss": 0.6901, + "step": 1049 + }, + { + "epoch": 2.48, + "learning_rate": 7.794746487477093e-06, + "loss": 0.6685, + "step": 1050 + }, + { + "epoch": 2.49, + "learning_rate": 7.782529016493586e-06, + "loss": 1.138, + "step": 1051 + }, + { + "epoch": 2.49, + "learning_rate": 7.77031154551008e-06, + "loss": 0.8116, + "step": 1052 + }, + { + "epoch": 2.49, + "learning_rate": 7.758094074526573e-06, + "loss": 0.8918, + "step": 1053 + }, + { + "epoch": 2.49, + "learning_rate": 7.745876603543068e-06, + "loss": 0.8731, + "step": 1054 + }, + { + "epoch": 2.49, + "learning_rate": 7.733659132559562e-06, + "loss": 0.7784, + "step": 1055 + }, + { + "epoch": 2.5, + "learning_rate": 7.721441661576055e-06, + "loss": 0.6612, + "step": 1056 + }, + { + "epoch": 2.5, + "learning_rate": 7.709224190592548e-06, + "loss": 0.8031, + "step": 1057 + }, + { + "epoch": 2.5, + "learning_rate": 7.697006719609042e-06, + "loss": 0.6725, + "step": 1058 + }, + { + "epoch": 2.5, + "learning_rate": 7.684789248625535e-06, + "loss": 0.8463, + "step": 1059 + }, + { + "epoch": 2.51, + "learning_rate": 7.672571777642029e-06, + "loss": 0.77, + "step": 1060 + }, + { + "epoch": 2.51, + "learning_rate": 7.660354306658524e-06, + "loss": 0.711, + "step": 1061 + }, + { + "epoch": 2.51, + "learning_rate": 7.648136835675017e-06, + "loss": 1.0855, + "step": 1062 + }, + { + "epoch": 2.51, + "learning_rate": 7.63591936469151e-06, + "loss": 0.85, + "step": 1063 + }, + { + "epoch": 2.52, + "learning_rate": 7.623701893708003e-06, + "loss": 0.6199, + "step": 1064 + }, + { + "epoch": 2.52, + "learning_rate": 7.611484422724496e-06, + "loss": 0.6982, + "step": 1065 + }, + { + "epoch": 2.52, + "learning_rate": 7.59926695174099e-06, + "loss": 0.9263, + "step": 1066 + }, + { + "epoch": 2.52, + "learning_rate": 7.587049480757483e-06, + "loss": 0.713, + "step": 1067 + }, + { + "epoch": 2.53, + "learning_rate": 7.574832009773977e-06, + "loss": 1.5352, + "step": 1068 + }, + { + "epoch": 2.53, + "learning_rate": 7.562614538790472e-06, + "loss": 0.6505, + "step": 1069 + }, + { + "epoch": 2.53, + "learning_rate": 7.550397067806965e-06, + "loss": 0.9456, + "step": 1070 + }, + { + "epoch": 2.53, + "learning_rate": 7.5381795968234585e-06, + "loss": 0.6478, + "step": 1071 + }, + { + "epoch": 2.53, + "learning_rate": 7.525962125839952e-06, + "loss": 0.5866, + "step": 1072 + }, + { + "epoch": 2.54, + "learning_rate": 7.513744654856445e-06, + "loss": 0.6796, + "step": 1073 + }, + { + "epoch": 2.54, + "learning_rate": 7.501527183872939e-06, + "loss": 0.8093, + "step": 1074 + }, + { + "epoch": 2.54, + "learning_rate": 7.489309712889432e-06, + "loss": 0.8077, + "step": 1075 + }, + { + "epoch": 2.54, + "learning_rate": 7.4770922419059255e-06, + "loss": 0.7468, + "step": 1076 + }, + { + "epoch": 2.55, + "learning_rate": 7.46487477092242e-06, + "loss": 0.9059, + "step": 1077 + }, + { + "epoch": 2.55, + "learning_rate": 7.452657299938913e-06, + "loss": 1.0124, + "step": 1078 + }, + { + "epoch": 2.55, + "learning_rate": 7.440439828955407e-06, + "loss": 0.8665, + "step": 1079 + }, + { + "epoch": 2.55, + "learning_rate": 7.4282223579719e-06, + "loss": 0.7768, + "step": 1080 + }, + { + "epoch": 2.56, + "learning_rate": 7.416004886988394e-06, + "loss": 0.83, + "step": 1081 + }, + { + "epoch": 2.56, + "learning_rate": 7.403787416004888e-06, + "loss": 0.8671, + "step": 1082 + }, + { + "epoch": 2.56, + "learning_rate": 7.391569945021381e-06, + "loss": 0.9012, + "step": 1083 + }, + { + "epoch": 2.56, + "learning_rate": 7.379352474037875e-06, + "loss": 0.9677, + "step": 1084 + }, + { + "epoch": 2.57, + "learning_rate": 7.367135003054369e-06, + "loss": 0.9414, + "step": 1085 + }, + { + "epoch": 2.57, + "learning_rate": 7.354917532070862e-06, + "loss": 1.2184, + "step": 1086 + }, + { + "epoch": 2.57, + "learning_rate": 7.3427000610873555e-06, + "loss": 0.9328, + "step": 1087 + }, + { + "epoch": 2.57, + "learning_rate": 7.330482590103849e-06, + "loss": 1.4059, + "step": 1088 + }, + { + "epoch": 2.58, + "learning_rate": 7.318265119120342e-06, + "loss": 0.6708, + "step": 1089 + }, + { + "epoch": 2.58, + "learning_rate": 7.306047648136836e-06, + "loss": 0.894, + "step": 1090 + }, + { + "epoch": 2.58, + "learning_rate": 7.293830177153329e-06, + "loss": 0.9659, + "step": 1091 + }, + { + "epoch": 2.58, + "learning_rate": 7.281612706169824e-06, + "loss": 1.1613, + "step": 1092 + }, + { + "epoch": 2.58, + "learning_rate": 7.269395235186318e-06, + "loss": 0.7924, + "step": 1093 + }, + { + "epoch": 2.59, + "learning_rate": 7.257177764202811e-06, + "loss": 0.7787, + "step": 1094 + }, + { + "epoch": 2.59, + "learning_rate": 7.244960293219304e-06, + "loss": 0.7545, + "step": 1095 + }, + { + "epoch": 2.59, + "learning_rate": 7.232742822235798e-06, + "loss": 0.783, + "step": 1096 + }, + { + "epoch": 2.59, + "learning_rate": 7.220525351252291e-06, + "loss": 0.7565, + "step": 1097 + }, + { + "epoch": 2.6, + "learning_rate": 7.208307880268785e-06, + "loss": 0.8512, + "step": 1098 + }, + { + "epoch": 2.6, + "learning_rate": 7.196090409285279e-06, + "loss": 0.9638, + "step": 1099 + }, + { + "epoch": 2.6, + "learning_rate": 7.183872938301772e-06, + "loss": 0.8976, + "step": 1100 + }, + { + "epoch": 2.6, + "learning_rate": 7.171655467318266e-06, + "loss": 0.9743, + "step": 1101 + }, + { + "epoch": 2.61, + "learning_rate": 7.159437996334759e-06, + "loss": 0.8234, + "step": 1102 + }, + { + "epoch": 2.61, + "learning_rate": 7.1472205253512525e-06, + "loss": 0.9169, + "step": 1103 + }, + { + "epoch": 2.61, + "learning_rate": 7.135003054367746e-06, + "loss": 0.9243, + "step": 1104 + }, + { + "epoch": 2.61, + "learning_rate": 7.122785583384239e-06, + "loss": 0.8588, + "step": 1105 + }, + { + "epoch": 2.62, + "learning_rate": 7.110568112400733e-06, + "loss": 1.0318, + "step": 1106 + }, + { + "epoch": 2.62, + "learning_rate": 7.098350641417228e-06, + "loss": 0.7434, + "step": 1107 + }, + { + "epoch": 2.62, + "learning_rate": 7.086133170433721e-06, + "loss": 0.6608, + "step": 1108 + }, + { + "epoch": 2.62, + "learning_rate": 7.0739156994502146e-06, + "loss": 1.1173, + "step": 1109 + }, + { + "epoch": 2.62, + "learning_rate": 7.061698228466708e-06, + "loss": 0.7802, + "step": 1110 + }, + { + "epoch": 2.63, + "learning_rate": 7.049480757483201e-06, + "loss": 0.7761, + "step": 1111 + }, + { + "epoch": 2.63, + "learning_rate": 7.037263286499695e-06, + "loss": 0.8065, + "step": 1112 + }, + { + "epoch": 2.63, + "learning_rate": 7.025045815516188e-06, + "loss": 0.631, + "step": 1113 + }, + { + "epoch": 2.63, + "learning_rate": 7.012828344532682e-06, + "loss": 0.654, + "step": 1114 + }, + { + "epoch": 2.64, + "learning_rate": 7.000610873549176e-06, + "loss": 0.7289, + "step": 1115 + }, + { + "epoch": 2.64, + "learning_rate": 6.98839340256567e-06, + "loss": 0.7963, + "step": 1116 + }, + { + "epoch": 2.64, + "learning_rate": 6.9761759315821635e-06, + "loss": 0.7764, + "step": 1117 + }, + { + "epoch": 2.64, + "learning_rate": 6.963958460598657e-06, + "loss": 0.7577, + "step": 1118 + }, + { + "epoch": 2.65, + "learning_rate": 6.95174098961515e-06, + "loss": 0.9212, + "step": 1119 + }, + { + "epoch": 2.65, + "learning_rate": 6.939523518631644e-06, + "loss": 0.5796, + "step": 1120 + }, + { + "epoch": 2.65, + "learning_rate": 6.927306047648137e-06, + "loss": 0.7637, + "step": 1121 + }, + { + "epoch": 2.65, + "learning_rate": 6.915088576664631e-06, + "loss": 0.8283, + "step": 1122 + }, + { + "epoch": 2.66, + "learning_rate": 6.902871105681125e-06, + "loss": 0.8477, + "step": 1123 + }, + { + "epoch": 2.66, + "learning_rate": 6.890653634697618e-06, + "loss": 0.7342, + "step": 1124 + }, + { + "epoch": 2.66, + "learning_rate": 6.8784361637141115e-06, + "loss": 0.9373, + "step": 1125 + }, + { + "epoch": 2.66, + "learning_rate": 6.866218692730605e-06, + "loss": 0.7735, + "step": 1126 + }, + { + "epoch": 2.66, + "learning_rate": 6.854001221747098e-06, + "loss": 0.691, + "step": 1127 + }, + { + "epoch": 2.67, + "learning_rate": 6.841783750763592e-06, + "loss": 0.6959, + "step": 1128 + }, + { + "epoch": 2.67, + "learning_rate": 6.829566279780087e-06, + "loss": 0.8995, + "step": 1129 + }, + { + "epoch": 2.67, + "learning_rate": 6.81734880879658e-06, + "loss": 0.8236, + "step": 1130 + }, + { + "epoch": 2.67, + "learning_rate": 6.805131337813074e-06, + "loss": 1.0626, + "step": 1131 + }, + { + "epoch": 2.68, + "learning_rate": 6.792913866829567e-06, + "loss": 0.8178, + "step": 1132 + }, + { + "epoch": 2.68, + "learning_rate": 6.7806963958460604e-06, + "loss": 0.6962, + "step": 1133 + }, + { + "epoch": 2.68, + "learning_rate": 6.768478924862554e-06, + "loss": 0.7438, + "step": 1134 + }, + { + "epoch": 2.68, + "learning_rate": 6.756261453879047e-06, + "loss": 0.6812, + "step": 1135 + }, + { + "epoch": 2.69, + "learning_rate": 6.744043982895541e-06, + "loss": 0.7409, + "step": 1136 + }, + { + "epoch": 2.69, + "learning_rate": 6.731826511912035e-06, + "loss": 0.7953, + "step": 1137 + }, + { + "epoch": 2.69, + "learning_rate": 6.719609040928528e-06, + "loss": 0.8024, + "step": 1138 + }, + { + "epoch": 2.69, + "learning_rate": 6.707391569945022e-06, + "loss": 0.887, + "step": 1139 + }, + { + "epoch": 2.7, + "learning_rate": 6.695174098961515e-06, + "loss": 0.6614, + "step": 1140 + }, + { + "epoch": 2.7, + "learning_rate": 6.682956627978009e-06, + "loss": 0.8199, + "step": 1141 + }, + { + "epoch": 2.7, + "learning_rate": 6.670739156994503e-06, + "loss": 0.6941, + "step": 1142 + }, + { + "epoch": 2.7, + "learning_rate": 6.658521686010996e-06, + "loss": 0.7043, + "step": 1143 + }, + { + "epoch": 2.71, + "learning_rate": 6.64630421502749e-06, + "loss": 0.8566, + "step": 1144 + }, + { + "epoch": 2.71, + "learning_rate": 6.634086744043984e-06, + "loss": 0.8391, + "step": 1145 + }, + { + "epoch": 2.71, + "learning_rate": 6.621869273060477e-06, + "loss": 0.8533, + "step": 1146 + }, + { + "epoch": 2.71, + "learning_rate": 6.609651802076971e-06, + "loss": 1.2669, + "step": 1147 + }, + { + "epoch": 2.71, + "learning_rate": 6.597434331093464e-06, + "loss": 0.7494, + "step": 1148 + }, + { + "epoch": 2.72, + "learning_rate": 6.585216860109957e-06, + "loss": 0.8497, + "step": 1149 + }, + { + "epoch": 2.72, + "learning_rate": 6.572999389126451e-06, + "loss": 0.8414, + "step": 1150 + }, + { + "epoch": 2.72, + "learning_rate": 6.560781918142944e-06, + "loss": 0.7205, + "step": 1151 + }, + { + "epoch": 2.72, + "learning_rate": 6.548564447159439e-06, + "loss": 0.9299, + "step": 1152 + }, + { + "epoch": 2.73, + "learning_rate": 6.536346976175933e-06, + "loss": 0.83, + "step": 1153 + }, + { + "epoch": 2.73, + "learning_rate": 6.524129505192426e-06, + "loss": 0.7246, + "step": 1154 + }, + { + "epoch": 2.73, + "learning_rate": 6.5119120342089195e-06, + "loss": 0.8512, + "step": 1155 + }, + { + "epoch": 2.73, + "learning_rate": 6.499694563225413e-06, + "loss": 0.8825, + "step": 1156 + }, + { + "epoch": 2.74, + "learning_rate": 6.487477092241906e-06, + "loss": 0.7143, + "step": 1157 + }, + { + "epoch": 2.74, + "learning_rate": 6.4752596212584e-06, + "loss": 1.5558, + "step": 1158 + }, + { + "epoch": 2.74, + "learning_rate": 6.463042150274894e-06, + "loss": 0.9596, + "step": 1159 + }, + { + "epoch": 2.74, + "learning_rate": 6.450824679291387e-06, + "loss": 0.9285, + "step": 1160 + }, + { + "epoch": 2.75, + "learning_rate": 6.438607208307881e-06, + "loss": 0.8805, + "step": 1161 + }, + { + "epoch": 2.75, + "learning_rate": 6.426389737324374e-06, + "loss": 0.9199, + "step": 1162 + }, + { + "epoch": 2.75, + "learning_rate": 6.4141722663408676e-06, + "loss": 0.743, + "step": 1163 + }, + { + "epoch": 2.75, + "learning_rate": 6.401954795357361e-06, + "loss": 0.7258, + "step": 1164 + }, + { + "epoch": 2.75, + "learning_rate": 6.389737324373854e-06, + "loss": 0.718, + "step": 1165 + }, + { + "epoch": 2.76, + "learning_rate": 6.377519853390348e-06, + "loss": 0.7674, + "step": 1166 + }, + { + "epoch": 2.76, + "learning_rate": 6.365302382406843e-06, + "loss": 0.7796, + "step": 1167 + }, + { + "epoch": 2.76, + "learning_rate": 6.353084911423336e-06, + "loss": 0.787, + "step": 1168 + }, + { + "epoch": 2.76, + "learning_rate": 6.34086744043983e-06, + "loss": 0.9145, + "step": 1169 + }, + { + "epoch": 2.77, + "learning_rate": 6.328649969456323e-06, + "loss": 0.7143, + "step": 1170 + }, + { + "epoch": 2.77, + "learning_rate": 6.3164324984728165e-06, + "loss": 0.657, + "step": 1171 + }, + { + "epoch": 2.77, + "learning_rate": 6.30421502748931e-06, + "loss": 1.8791, + "step": 1172 + }, + { + "epoch": 2.77, + "learning_rate": 6.291997556505803e-06, + "loss": 0.8549, + "step": 1173 + }, + { + "epoch": 2.78, + "learning_rate": 6.2797800855222975e-06, + "loss": 0.7444, + "step": 1174 + }, + { + "epoch": 2.78, + "learning_rate": 6.267562614538791e-06, + "loss": 0.6989, + "step": 1175 + }, + { + "epoch": 2.78, + "learning_rate": 6.255345143555285e-06, + "loss": 0.8571, + "step": 1176 + }, + { + "epoch": 2.78, + "learning_rate": 6.243127672571779e-06, + "loss": 0.9245, + "step": 1177 + }, + { + "epoch": 2.79, + "learning_rate": 6.230910201588272e-06, + "loss": 0.8139, + "step": 1178 + }, + { + "epoch": 2.79, + "learning_rate": 6.218692730604765e-06, + "loss": 0.956, + "step": 1179 + }, + { + "epoch": 2.79, + "learning_rate": 6.206475259621259e-06, + "loss": 0.9109, + "step": 1180 + }, + { + "epoch": 2.79, + "learning_rate": 6.194257788637752e-06, + "loss": 0.872, + "step": 1181 + }, + { + "epoch": 2.79, + "learning_rate": 6.1820403176542464e-06, + "loss": 0.8997, + "step": 1182 + }, + { + "epoch": 2.8, + "learning_rate": 6.16982284667074e-06, + "loss": 0.6255, + "step": 1183 + }, + { + "epoch": 2.8, + "learning_rate": 6.157605375687233e-06, + "loss": 0.858, + "step": 1184 + }, + { + "epoch": 2.8, + "learning_rate": 6.145387904703727e-06, + "loss": 0.5873, + "step": 1185 + }, + { + "epoch": 2.8, + "learning_rate": 6.13317043372022e-06, + "loss": 1.1229, + "step": 1186 + }, + { + "epoch": 2.81, + "learning_rate": 6.1209529627367134e-06, + "loss": 0.7146, + "step": 1187 + }, + { + "epoch": 2.81, + "learning_rate": 6.108735491753207e-06, + "loss": 0.7202, + "step": 1188 + }, + { + "epoch": 2.81, + "learning_rate": 6.096518020769702e-06, + "loss": 0.6488, + "step": 1189 + }, + { + "epoch": 2.81, + "learning_rate": 6.084300549786195e-06, + "loss": 0.7713, + "step": 1190 + }, + { + "epoch": 2.82, + "learning_rate": 6.072083078802689e-06, + "loss": 0.7512, + "step": 1191 + }, + { + "epoch": 2.82, + "learning_rate": 6.059865607819182e-06, + "loss": 0.9244, + "step": 1192 + }, + { + "epoch": 2.82, + "learning_rate": 6.0476481368356755e-06, + "loss": 0.86, + "step": 1193 + }, + { + "epoch": 2.82, + "learning_rate": 6.035430665852169e-06, + "loss": 0.9469, + "step": 1194 + }, + { + "epoch": 2.83, + "learning_rate": 6.023213194868662e-06, + "loss": 0.7904, + "step": 1195 + }, + { + "epoch": 2.83, + "learning_rate": 6.010995723885156e-06, + "loss": 0.7472, + "step": 1196 + }, + { + "epoch": 2.83, + "learning_rate": 5.99877825290165e-06, + "loss": 0.9277, + "step": 1197 + }, + { + "epoch": 2.83, + "learning_rate": 5.986560781918143e-06, + "loss": 0.6782, + "step": 1198 + }, + { + "epoch": 2.84, + "learning_rate": 5.974343310934637e-06, + "loss": 0.8162, + "step": 1199 + }, + { + "epoch": 2.84, + "learning_rate": 5.96212583995113e-06, + "loss": 0.7205, + "step": 1200 + }, + { + "epoch": 2.84, + "learning_rate": 5.949908368967624e-06, + "loss": 0.7009, + "step": 1201 + }, + { + "epoch": 2.84, + "learning_rate": 5.937690897984118e-06, + "loss": 0.8698, + "step": 1202 + }, + { + "epoch": 2.84, + "learning_rate": 5.925473427000611e-06, + "loss": 0.7732, + "step": 1203 + }, + { + "epoch": 2.85, + "learning_rate": 5.9132559560171055e-06, + "loss": 0.9915, + "step": 1204 + }, + { + "epoch": 2.85, + "learning_rate": 5.901038485033599e-06, + "loss": 0.8665, + "step": 1205 + }, + { + "epoch": 2.85, + "learning_rate": 5.888821014050092e-06, + "loss": 0.8814, + "step": 1206 + }, + { + "epoch": 2.85, + "learning_rate": 5.876603543066586e-06, + "loss": 0.7158, + "step": 1207 + }, + { + "epoch": 2.86, + "learning_rate": 5.864386072083079e-06, + "loss": 0.7831, + "step": 1208 + }, + { + "epoch": 2.86, + "learning_rate": 5.8521686010995725e-06, + "loss": 0.77, + "step": 1209 + }, + { + "epoch": 2.86, + "learning_rate": 5.839951130116066e-06, + "loss": 0.5111, + "step": 1210 + }, + { + "epoch": 2.86, + "learning_rate": 5.827733659132559e-06, + "loss": 0.7871, + "step": 1211 + }, + { + "epoch": 2.87, + "learning_rate": 5.815516188149054e-06, + "loss": 0.6702, + "step": 1212 + }, + { + "epoch": 2.87, + "learning_rate": 5.803298717165548e-06, + "loss": 1.0326, + "step": 1213 + }, + { + "epoch": 2.87, + "learning_rate": 5.791081246182041e-06, + "loss": 0.7943, + "step": 1214 + }, + { + "epoch": 2.87, + "learning_rate": 5.778863775198535e-06, + "loss": 0.8289, + "step": 1215 + }, + { + "epoch": 2.88, + "learning_rate": 5.766646304215028e-06, + "loss": 0.7806, + "step": 1216 + }, + { + "epoch": 2.88, + "learning_rate": 5.754428833231521e-06, + "loss": 0.9023, + "step": 1217 + }, + { + "epoch": 2.88, + "learning_rate": 5.742211362248015e-06, + "loss": 0.8285, + "step": 1218 + }, + { + "epoch": 2.88, + "learning_rate": 5.729993891264509e-06, + "loss": 0.9523, + "step": 1219 + }, + { + "epoch": 2.88, + "learning_rate": 5.7177764202810025e-06, + "loss": 0.6429, + "step": 1220 + }, + { + "epoch": 2.89, + "learning_rate": 5.705558949297496e-06, + "loss": 0.9859, + "step": 1221 + }, + { + "epoch": 2.89, + "learning_rate": 5.693341478313989e-06, + "loss": 0.651, + "step": 1222 + }, + { + "epoch": 2.89, + "learning_rate": 5.681124007330483e-06, + "loss": 0.7305, + "step": 1223 + }, + { + "epoch": 2.89, + "learning_rate": 5.668906536346976e-06, + "loss": 0.8412, + "step": 1224 + }, + { + "epoch": 2.9, + "learning_rate": 5.6566890653634695e-06, + "loss": 0.6617, + "step": 1225 + }, + { + "epoch": 2.9, + "learning_rate": 5.644471594379963e-06, + "loss": 0.7505, + "step": 1226 + }, + { + "epoch": 2.9, + "learning_rate": 5.632254123396458e-06, + "loss": 0.7157, + "step": 1227 + }, + { + "epoch": 2.9, + "learning_rate": 5.620036652412951e-06, + "loss": 0.921, + "step": 1228 + }, + { + "epoch": 2.91, + "learning_rate": 5.607819181429445e-06, + "loss": 0.8239, + "step": 1229 + }, + { + "epoch": 2.91, + "learning_rate": 5.595601710445938e-06, + "loss": 0.8042, + "step": 1230 + }, + { + "epoch": 2.91, + "learning_rate": 5.583384239462432e-06, + "loss": 0.7872, + "step": 1231 + }, + { + "epoch": 2.91, + "learning_rate": 5.571166768478925e-06, + "loss": 0.681, + "step": 1232 + }, + { + "epoch": 2.92, + "learning_rate": 5.558949297495418e-06, + "loss": 1.2147, + "step": 1233 + }, + { + "epoch": 2.92, + "learning_rate": 5.546731826511913e-06, + "loss": 0.912, + "step": 1234 + }, + { + "epoch": 2.92, + "learning_rate": 5.534514355528406e-06, + "loss": 0.7043, + "step": 1235 + }, + { + "epoch": 2.92, + "learning_rate": 5.5222968845449e-06, + "loss": 0.7897, + "step": 1236 + }, + { + "epoch": 2.92, + "learning_rate": 5.510079413561394e-06, + "loss": 0.8611, + "step": 1237 + }, + { + "epoch": 2.93, + "learning_rate": 5.497861942577887e-06, + "loss": 0.8145, + "step": 1238 + }, + { + "epoch": 2.93, + "learning_rate": 5.4856444715943805e-06, + "loss": 0.8213, + "step": 1239 + }, + { + "epoch": 2.93, + "learning_rate": 5.473427000610874e-06, + "loss": 0.7965, + "step": 1240 + }, + { + "epoch": 2.93, + "learning_rate": 5.461209529627367e-06, + "loss": 0.9655, + "step": 1241 + }, + { + "epoch": 2.94, + "learning_rate": 5.4489920586438615e-06, + "loss": 0.9367, + "step": 1242 + }, + { + "epoch": 2.94, + "learning_rate": 5.436774587660355e-06, + "loss": 0.8931, + "step": 1243 + }, + { + "epoch": 2.94, + "learning_rate": 5.424557116676848e-06, + "loss": 0.9453, + "step": 1244 + }, + { + "epoch": 2.94, + "learning_rate": 5.412339645693342e-06, + "loss": 1.0025, + "step": 1245 + }, + { + "epoch": 2.95, + "learning_rate": 5.400122174709835e-06, + "loss": 0.8127, + "step": 1246 + }, + { + "epoch": 2.95, + "learning_rate": 5.3879047037263286e-06, + "loss": 1.0161, + "step": 1247 + }, + { + "epoch": 2.95, + "learning_rate": 5.375687232742822e-06, + "loss": 0.8223, + "step": 1248 + }, + { + "epoch": 2.95, + "learning_rate": 5.363469761759317e-06, + "loss": 0.6127, + "step": 1249 + }, + { + "epoch": 2.96, + "learning_rate": 5.3512522907758105e-06, + "loss": 0.5526, + "step": 1250 + }, + { + "epoch": 2.96, + "learning_rate": 5.339034819792304e-06, + "loss": 0.7488, + "step": 1251 + }, + { + "epoch": 2.96, + "learning_rate": 5.326817348808797e-06, + "loss": 0.7872, + "step": 1252 + }, + { + "epoch": 2.96, + "learning_rate": 5.314599877825291e-06, + "loss": 0.8655, + "step": 1253 + }, + { + "epoch": 2.97, + "learning_rate": 5.302382406841784e-06, + "loss": 0.878, + "step": 1254 + }, + { + "epoch": 2.97, + "learning_rate": 5.2901649358582775e-06, + "loss": 0.9551, + "step": 1255 + }, + { + "epoch": 2.97, + "learning_rate": 5.277947464874771e-06, + "loss": 0.7852, + "step": 1256 + }, + { + "epoch": 2.97, + "learning_rate": 5.265729993891265e-06, + "loss": 0.5514, + "step": 1257 + }, + { + "epoch": 2.97, + "learning_rate": 5.2535125229077585e-06, + "loss": 0.8453, + "step": 1258 + }, + { + "epoch": 2.98, + "learning_rate": 5.241295051924252e-06, + "loss": 0.837, + "step": 1259 + }, + { + "epoch": 2.98, + "learning_rate": 5.229077580940745e-06, + "loss": 0.8556, + "step": 1260 + }, + { + "epoch": 2.98, + "learning_rate": 5.216860109957239e-06, + "loss": 0.927, + "step": 1261 + }, + { + "epoch": 2.98, + "learning_rate": 5.204642638973733e-06, + "loss": 0.7956, + "step": 1262 + }, + { + "epoch": 2.99, + "learning_rate": 5.192425167990226e-06, + "loss": 0.7326, + "step": 1263 + }, + { + "epoch": 2.99, + "learning_rate": 5.180207697006721e-06, + "loss": 0.7901, + "step": 1264 + }, + { + "epoch": 2.99, + "learning_rate": 5.167990226023214e-06, + "loss": 0.9212, + "step": 1265 + }, + { + "epoch": 2.99, + "learning_rate": 5.155772755039707e-06, + "loss": 1.024, + "step": 1266 + }, + { + "epoch": 3.0, + "learning_rate": 5.143555284056201e-06, + "loss": 0.4996, + "step": 1267 + }, + { + "epoch": 3.0, + "learning_rate": 5.131337813072694e-06, + "loss": 0.7015, + "step": 1268 + } + ], + "logging_steps": 1.0, + "max_steps": 1688, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "total_flos": 6.092207610386186e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}