diff --git "a/finetuned_ckpt/trainer_state.json" "b/finetuned_ckpt/trainer_state.json" new file mode 100644--- /dev/null +++ "b/finetuned_ckpt/trainer_state.json" @@ -0,0 +1,98818 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.999240929102778, + "eval_steps": 500, + "global_step": 16465, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.818181818181818e-06, + "loss": 1.0437, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 3.636363636363636e-06, + "loss": 1.3714, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 5.454545454545454e-06, + "loss": 1.3415, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 7.272727272727272e-06, + "loss": 1.0657, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 9.090909090909091e-06, + "loss": 1.0078, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.0909090909090907e-05, + "loss": 1.0692, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 1.2727272727272727e-05, + "loss": 1.137, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 1.4545454545454545e-05, + "loss": 1.1838, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 1.6363636363636363e-05, + "loss": 0.9552, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 1.8181818181818182e-05, + "loss": 1.2488, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.9999999999999998e-05, + "loss": 1.1938, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 2.1818181818181814e-05, + "loss": 0.9991, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 2.3636363636363634e-05, + "loss": 0.8313, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 2.5454545454545454e-05, + "loss": 1.1667, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 2.727272727272727e-05, + "loss": 1.1672, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 2.909090909090909e-05, + "loss": 1.0052, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 3.090909090909091e-05, + "loss": 0.9519, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 3.2727272727272725e-05, + "loss": 1.2473, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 3.454545454545454e-05, + "loss": 1.0741, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 3.6363636363636364e-05, + "loss": 0.9493, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 3.8181818181818174e-05, + "loss": 1.0745, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 3.9999999999999996e-05, + "loss": 1.0815, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 4.181818181818182e-05, + "loss": 0.9671, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 4.363636363636363e-05, + "loss": 1.0986, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 4.545454545454545e-05, + "loss": 0.7583, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 4.727272727272727e-05, + "loss": 0.8695, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 4.9090909090909084e-05, + "loss": 0.8064, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 5.090909090909091e-05, + "loss": 1.005, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 5.272727272727272e-05, + "loss": 0.7028, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 5.454545454545454e-05, + "loss": 0.6968, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5.6363636363636356e-05, + "loss": 0.7165, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 5.818181818181818e-05, + "loss": 0.6678, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.9222, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 6.181818181818182e-05, + "loss": 0.7907, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 6.363636363636363e-05, + "loss": 0.7851, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 6.545454545454545e-05, + "loss": 0.7579, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 6.727272727272727e-05, + "loss": 0.6105, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 6.909090909090908e-05, + "loss": 0.64, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 7.09090909090909e-05, + "loss": 0.5102, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 7.272727272727273e-05, + "loss": 0.8755, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 7.454545454545454e-05, + "loss": 0.8683, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 7.636363636363635e-05, + "loss": 0.4924, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 7.818181818181818e-05, + "loss": 0.3446, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 7.999999999999999e-05, + "loss": 0.4318, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 8.18181818181818e-05, + "loss": 0.2453, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 8.363636363636364e-05, + "loss": 0.3709, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 8.545454545454545e-05, + "loss": 0.4465, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 8.727272727272726e-05, + "loss": 0.4609, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 8.90909090909091e-05, + "loss": 0.5246, + "step": 49 + }, + { + "epoch": 0.02, + "learning_rate": 9.09090909090909e-05, + "loss": 0.3516, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 9.272727272727271e-05, + "loss": 0.5925, + "step": 51 + }, + { + "epoch": 0.02, + "learning_rate": 9.454545454545454e-05, + "loss": 0.3211, + "step": 52 + }, + { + "epoch": 0.02, + "learning_rate": 9.636363636363636e-05, + "loss": 0.3742, + "step": 53 + }, + { + "epoch": 0.02, + "learning_rate": 9.818181818181817e-05, + "loss": 0.6121, + "step": 54 + }, + { + "epoch": 0.02, + "learning_rate": 9.999999999999999e-05, + "loss": 0.3881, + "step": 55 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010181818181818181, + "loss": 0.3437, + "step": 56 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010363636363636362, + "loss": 0.5659, + "step": 57 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010545454545454545, + "loss": 0.3096, + "step": 58 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010727272727272727, + "loss": 0.5505, + "step": 59 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010909090909090908, + "loss": 0.5087, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001109090909090909, + "loss": 0.7122, + "step": 61 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011272727272727271, + "loss": 0.2805, + "step": 62 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011454545454545453, + "loss": 0.6002, + "step": 63 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011636363636363636, + "loss": 0.6795, + "step": 64 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011818181818181817, + "loss": 0.395, + "step": 65 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011999999999999999, + "loss": 0.6266, + "step": 66 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012181818181818181, + "loss": 0.4259, + "step": 67 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012363636363636364, + "loss": 0.5388, + "step": 68 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012545454545454543, + "loss": 0.3304, + "step": 69 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012727272727272725, + "loss": 0.3686, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012909090909090908, + "loss": 0.2292, + "step": 71 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001309090909090909, + "loss": 0.4417, + "step": 72 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013272727272727272, + "loss": 0.4536, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013454545454545455, + "loss": 0.1587, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013636363636363634, + "loss": 0.5313, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013818181818181816, + "loss": 0.2263, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014, + "loss": 0.3606, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001418181818181818, + "loss": 0.1051, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001436363636363636, + "loss": 0.4122, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014545454545454546, + "loss": 0.2199, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014727272727272725, + "loss": 0.3692, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014909090909090908, + "loss": 0.4072, + "step": 82 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001509090909090909, + "loss": 0.4437, + "step": 83 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001527272727272727, + "loss": 0.2313, + "step": 84 + }, + { + "epoch": 0.03, + "learning_rate": 0.00015454545454545452, + "loss": 0.5631, + "step": 85 + }, + { + "epoch": 0.03, + "learning_rate": 0.00015636363636363637, + "loss": 0.3966, + "step": 86 + }, + { + "epoch": 0.03, + "learning_rate": 0.00015818181818181816, + "loss": 0.3592, + "step": 87 + }, + { + "epoch": 0.03, + "learning_rate": 0.00015999999999999999, + "loss": 0.6545, + "step": 88 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001618181818181818, + "loss": 0.6373, + "step": 89 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001636363636363636, + "loss": 0.5364, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016545454545454543, + "loss": 0.5372, + "step": 91 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016727272727272728, + "loss": 0.4225, + "step": 92 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016909090909090907, + "loss": 0.3679, + "step": 93 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001709090909090909, + "loss": 0.4635, + "step": 94 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017272727272727272, + "loss": 0.3707, + "step": 95 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017454545454545452, + "loss": 0.3252, + "step": 96 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017636363636363634, + "loss": 0.5124, + "step": 97 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001781818181818182, + "loss": 0.4837, + "step": 98 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017999999999999998, + "loss": 0.6666, + "step": 99 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001818181818181818, + "loss": 0.274, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001836363636363636, + "loss": 0.4441, + "step": 101 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018545454545454543, + "loss": 0.539, + "step": 102 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018727272727272728, + "loss": 0.3286, + "step": 103 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018909090909090907, + "loss": 0.5618, + "step": 104 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001909090909090909, + "loss": 0.3391, + "step": 105 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019272727272727272, + "loss": 0.5338, + "step": 106 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019454545454545451, + "loss": 0.474, + "step": 107 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019636363636363634, + "loss": 0.1881, + "step": 108 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001981818181818182, + "loss": 0.3942, + "step": 109 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999999999999998, + "loss": 0.4205, + "step": 110 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002018181818181818, + "loss": 0.4837, + "step": 111 + }, + { + "epoch": 0.03, + "learning_rate": 0.00020363636363636363, + "loss": 0.5874, + "step": 112 + }, + { + "epoch": 0.03, + "learning_rate": 0.00020545454545454542, + "loss": 0.3381, + "step": 113 + }, + { + "epoch": 0.03, + "learning_rate": 0.00020727272727272725, + "loss": 0.5398, + "step": 114 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002090909090909091, + "loss": 0.5314, + "step": 115 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002109090909090909, + "loss": 0.5965, + "step": 116 + }, + { + "epoch": 0.04, + "learning_rate": 0.00021272727272727272, + "loss": 0.4296, + "step": 117 + }, + { + "epoch": 0.04, + "learning_rate": 0.00021454545454545454, + "loss": 0.4317, + "step": 118 + }, + { + "epoch": 0.04, + "learning_rate": 0.00021636363636363633, + "loss": 0.4354, + "step": 119 + }, + { + "epoch": 0.04, + "learning_rate": 0.00021818181818181816, + "loss": 0.4298, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 0.00021999999999999995, + "loss": 0.4279, + "step": 121 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002218181818181818, + "loss": 0.3701, + "step": 122 + }, + { + "epoch": 0.04, + "learning_rate": 0.00022363636363636363, + "loss": 0.4132, + "step": 123 + }, + { + "epoch": 0.04, + "learning_rate": 0.00022545454545454542, + "loss": 0.6167, + "step": 124 + }, + { + "epoch": 0.04, + "learning_rate": 0.00022727272727272725, + "loss": 0.5235, + "step": 125 + }, + { + "epoch": 0.04, + "learning_rate": 0.00022909090909090907, + "loss": 0.4865, + "step": 126 + }, + { + "epoch": 0.04, + "learning_rate": 0.00023090909090909086, + "loss": 0.4657, + "step": 127 + }, + { + "epoch": 0.04, + "learning_rate": 0.00023272727272727271, + "loss": 0.6469, + "step": 128 + }, + { + "epoch": 0.04, + "learning_rate": 0.00023454545454545454, + "loss": 0.1927, + "step": 129 + }, + { + "epoch": 0.04, + "learning_rate": 0.00023636363636363633, + "loss": 0.3842, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 0.00023818181818181816, + "loss": 0.4845, + "step": 131 + }, + { + "epoch": 0.04, + "learning_rate": 0.00023999999999999998, + "loss": 0.3187, + "step": 132 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002418181818181818, + "loss": 0.3742, + "step": 133 + }, + { + "epoch": 0.04, + "learning_rate": 0.00024363636363636362, + "loss": 0.5076, + "step": 134 + }, + { + "epoch": 0.04, + "learning_rate": 0.00024545454545454545, + "loss": 0.5817, + "step": 135 + }, + { + "epoch": 0.04, + "learning_rate": 0.00024727272727272727, + "loss": 0.5747, + "step": 136 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002490909090909091, + "loss": 0.4138, + "step": 137 + }, + { + "epoch": 0.04, + "learning_rate": 0.00025090909090909086, + "loss": 0.2683, + "step": 138 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002527272727272727, + "loss": 0.4361, + "step": 139 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002545454545454545, + "loss": 0.2895, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00025636363636363633, + "loss": 0.623, + "step": 141 + }, + { + "epoch": 0.04, + "learning_rate": 0.00025818181818181815, + "loss": 0.3903, + "step": 142 + }, + { + "epoch": 0.04, + "learning_rate": 0.00026, + "loss": 0.2136, + "step": 143 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002618181818181818, + "loss": 0.2805, + "step": 144 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002636363636363636, + "loss": 0.5483, + "step": 145 + }, + { + "epoch": 0.04, + "learning_rate": 0.00026545454545454545, + "loss": 0.2948, + "step": 146 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002672727272727272, + "loss": 0.2427, + "step": 147 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002690909090909091, + "loss": 0.4136, + "step": 148 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002709090909090909, + "loss": 0.3174, + "step": 149 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002727272727272727, + "loss": 0.4177, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002745454545454545, + "loss": 0.5528, + "step": 151 + }, + { + "epoch": 0.05, + "learning_rate": 0.00027636363636363633, + "loss": 0.5296, + "step": 152 + }, + { + "epoch": 0.05, + "learning_rate": 0.00027818181818181815, + "loss": 0.3643, + "step": 153 + }, + { + "epoch": 0.05, + "learning_rate": 0.00028, + "loss": 0.3344, + "step": 154 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002818181818181818, + "loss": 0.3172, + "step": 155 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002836363636363636, + "loss": 0.5335, + "step": 156 + }, + { + "epoch": 0.05, + "learning_rate": 0.00028545454545454544, + "loss": 0.2319, + "step": 157 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002872727272727272, + "loss": 0.4038, + "step": 158 + }, + { + "epoch": 0.05, + "learning_rate": 0.00028909090909090904, + "loss": 0.3582, + "step": 159 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002909090909090909, + "loss": 0.417, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002927272727272727, + "loss": 0.1654, + "step": 161 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002945454545454545, + "loss": 0.3233, + "step": 162 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029636363636363633, + "loss": 0.4184, + "step": 163 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029818181818181815, + "loss": 0.509, + "step": 164 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003, + "loss": 0.2922, + "step": 165 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002999999972139699, + "loss": 0.2725, + "step": 166 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002999999888558798, + "loss": 0.6556, + "step": 167 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029999997492573, + "loss": 0.3606, + "step": 168 + }, + { + "epoch": 0.05, + "learning_rate": 0.000299999955423521, + "loss": 0.2122, + "step": 169 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029999993034925354, + "loss": 0.6728, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029999989970292854, + "loss": 0.4107, + "step": 171 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002999998634845471, + "loss": 0.3136, + "step": 172 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029999982169411066, + "loss": 0.2579, + "step": 173 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029999977433162065, + "loss": 0.3546, + "step": 174 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002999997213970789, + "loss": 0.4242, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002999996628904874, + "loss": 0.4885, + "step": 176 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002999995988118483, + "loss": 0.3881, + "step": 177 + }, + { + "epoch": 0.05, + "learning_rate": 0.000299999529161164, + "loss": 0.2773, + "step": 178 + }, + { + "epoch": 0.05, + "learning_rate": 0.000299999453938437, + "loss": 0.5948, + "step": 179 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029999937314367016, + "loss": 0.3865, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002999992867768665, + "loss": 0.5225, + "step": 181 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999991948380292, + "loss": 0.2902, + "step": 182 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999990973271617, + "loss": 0.2447, + "step": 183 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999989942442676, + "loss": 0.4913, + "step": 184 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999888558935065, + "loss": 0.2725, + "step": 185 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999877136241513, + "loss": 0.2087, + "step": 186 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999865156346496, + "loss": 0.3319, + "step": 187 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999985261925049, + "loss": 0.4781, + "step": 188 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999983952495393, + "loss": 0.5818, + "step": 189 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999982587345733, + "loss": 0.4772, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999981166476118, + "loss": 0.3338, + "step": 191 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999979689886601, + "loss": 0.1476, + "step": 192 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999781575772383, + "loss": 0.3143, + "step": 193 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999976569548085, + "loss": 0.5108, + "step": 194 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999974925799201, + "loss": 0.4619, + "step": 195 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999732263306467, + "loss": 0.3785, + "step": 196 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999971471142486, + "loss": 0.6238, + "step": 197 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999696602347835, + "loss": 0.3606, + "step": 198 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999677936076073, + "loss": 0.2799, + "step": 199 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999965871261026, + "loss": 0.3467, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999963893195111, + "loss": 0.4611, + "step": 201 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999618594099363, + "loss": 0.2833, + "step": 202 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999959769905577, + "loss": 0.3678, + "step": 203 + }, + { + "epoch": 0.06, + "learning_rate": 0.000299995762468211, + "loss": 0.354, + "step": 204 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999554237396166, + "loss": 0.3824, + "step": 205 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999953167078178, + "loss": 0.3294, + "step": 206 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999508546978777, + "loss": 0.2361, + "step": 207 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999484865988015, + "loss": 0.2253, + "step": 208 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999460627810377, + "loss": 0.5142, + "step": 209 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999435832446763, + "loss": 0.3787, + "step": 210 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002999941047989809, + "loss": 0.5363, + "step": 211 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999384570165305, + "loss": 0.2896, + "step": 212 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999358103249374, + "loss": 0.3938, + "step": 213 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029999331079151267, + "loss": 0.6212, + "step": 214 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029999303497872, + "loss": 0.3271, + "step": 215 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999927535941259, + "loss": 0.3866, + "step": 216 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029999246663774083, + "loss": 0.4239, + "step": 217 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029999217410957556, + "loss": 0.2669, + "step": 218 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999918760096408, + "loss": 0.4804, + "step": 219 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999915723379477, + "loss": 0.3723, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029999126309450755, + "loss": 0.4723, + "step": 221 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029999094827933183, + "loss": 0.2585, + "step": 222 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999906278924322, + "loss": 0.4599, + "step": 223 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999903019338206, + "loss": 0.1762, + "step": 224 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998997040350914, + "loss": 0.6406, + "step": 225 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999896333015101, + "loss": 0.3653, + "step": 226 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998929062783605, + "loss": 0.4757, + "step": 227 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999889423824997, + "loss": 0.3712, + "step": 228 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998858856551393, + "loss": 0.4202, + "step": 229 + }, + { + "epoch": 0.07, + "learning_rate": 0.000299988229176892, + "loss": 0.3619, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999878642166471, + "loss": 0.4158, + "step": 231 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998749368479296, + "loss": 0.2779, + "step": 232 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999871175813432, + "loss": 0.6044, + "step": 233 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999867359063119, + "loss": 0.3736, + "step": 234 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999863486597132, + "loss": 0.2485, + "step": 235 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998595584156145, + "loss": 0.3877, + "step": 236 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999855574518713, + "loss": 0.4692, + "step": 237 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998515349065753, + "loss": 0.2173, + "step": 238 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998474395793507, + "loss": 0.517, + "step": 239 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998432885371926, + "loss": 0.377, + "step": 240 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998390817802544, + "loss": 0.2557, + "step": 241 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998348193086927, + "loss": 0.2506, + "step": 242 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999830501122665, + "loss": 0.5651, + "step": 243 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999826127222333, + "loss": 0.2954, + "step": 244 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999821697607859, + "loss": 0.3545, + "step": 245 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029998172122794065, + "loss": 0.3171, + "step": 246 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002999812671237143, + "loss": 0.0741, + "step": 247 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029998080744812365, + "loss": 0.2749, + "step": 248 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999803422011859, + "loss": 0.3257, + "step": 249 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999798713829182, + "loss": 0.3338, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999793949933381, + "loss": 0.4425, + "step": 251 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999789130324632, + "loss": 0.3899, + "step": 252 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029997842550031164, + "loss": 0.2719, + "step": 253 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029997793239690127, + "loss": 0.2064, + "step": 254 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029997743372225054, + "loss": 0.2475, + "step": 255 + }, + { + "epoch": 0.08, + "learning_rate": 0.000299976929476378, + "loss": 0.3733, + "step": 256 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999764196593023, + "loss": 0.3868, + "step": 257 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999759042710424, + "loss": 0.402, + "step": 258 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029997538331161743, + "loss": 0.4802, + "step": 259 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999748567810468, + "loss": 0.3362, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029997432467935014, + "loss": 0.2669, + "step": 261 + }, + { + "epoch": 0.08, + "learning_rate": 0.000299973787006547, + "loss": 0.4333, + "step": 262 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029997324376265745, + "loss": 0.3182, + "step": 263 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999726949477018, + "loss": 0.2728, + "step": 264 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999721405617002, + "loss": 0.3919, + "step": 265 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029997158060467345, + "loss": 0.2064, + "step": 266 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999710150766422, + "loss": 0.3011, + "step": 267 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029997044397762755, + "loss": 0.4111, + "step": 268 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029996986730765075, + "loss": 0.3339, + "step": 269 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999692850667331, + "loss": 0.2085, + "step": 270 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999686972548963, + "loss": 0.179, + "step": 271 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999681038721622, + "loss": 0.3568, + "step": 272 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029996750491855277, + "loss": 0.3835, + "step": 273 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999669003940904, + "loss": 0.3924, + "step": 274 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029996629029879735, + "loss": 0.2393, + "step": 275 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029996567463269646, + "loss": 0.28, + "step": 276 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029996505339581047, + "loss": 0.2806, + "step": 277 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002999644265881626, + "loss": 0.1921, + "step": 278 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029996379420977595, + "loss": 0.3723, + "step": 279 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029996315626067415, + "loss": 0.4365, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029996251274088087, + "loss": 0.3499, + "step": 281 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029996186365042, + "loss": 0.146, + "step": 282 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029996120898931566, + "loss": 0.3259, + "step": 283 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029996054875759215, + "loss": 0.0345, + "step": 284 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029995988295527403, + "loss": 0.3186, + "step": 285 + }, + { + "epoch": 0.09, + "learning_rate": 0.000299959211582386, + "loss": 0.3591, + "step": 286 + }, + { + "epoch": 0.09, + "learning_rate": 0.000299958534638953, + "loss": 0.2167, + "step": 287 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999578521250002, + "loss": 0.3219, + "step": 288 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029995716404055295, + "loss": 0.5844, + "step": 289 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999564703856368, + "loss": 0.1811, + "step": 290 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029995577116027753, + "loss": 0.4347, + "step": 291 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029995506636450105, + "loss": 0.3219, + "step": 292 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029995435599833363, + "loss": 0.4494, + "step": 293 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999536400618016, + "loss": 0.3272, + "step": 294 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999529185549316, + "loss": 0.3801, + "step": 295 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999521914777504, + "loss": 0.4697, + "step": 296 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029995145883028503, + "loss": 0.5078, + "step": 297 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999507206125627, + "loss": 0.1618, + "step": 298 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999499768246108, + "loss": 0.2617, + "step": 299 + }, + { + "epoch": 0.09, + "learning_rate": 0.000299949227466457, + "loss": 0.3804, + "step": 300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029994847253812915, + "loss": 0.4561, + "step": 301 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999477120396552, + "loss": 0.4977, + "step": 302 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999469459710635, + "loss": 0.2029, + "step": 303 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999461743323825, + "loss": 0.305, + "step": 304 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999453971236408, + "loss": 0.5059, + "step": 305 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999446143448673, + "loss": 0.3725, + "step": 306 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029994382599609113, + "loss": 0.6375, + "step": 307 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029994303207734154, + "loss": 0.1973, + "step": 308 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002999422325886479, + "loss": 0.2211, + "step": 309 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029994142753004015, + "loss": 0.3343, + "step": 310 + }, + { + "epoch": 0.09, + "learning_rate": 0.000299940616901548, + "loss": 0.1735, + "step": 311 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029993980070320165, + "loss": 0.3668, + "step": 312 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002999389789350314, + "loss": 0.3392, + "step": 313 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029993815159706777, + "loss": 0.1918, + "step": 314 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002999373186893415, + "loss": 0.609, + "step": 315 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029993648021188355, + "loss": 0.4783, + "step": 316 + }, + { + "epoch": 0.1, + "learning_rate": 0.000299935636164725, + "loss": 0.3132, + "step": 317 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002999347865478973, + "loss": 0.3271, + "step": 318 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029993393136143194, + "loss": 0.3123, + "step": 319 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029993307060536065, + "loss": 0.2013, + "step": 320 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002999322042797155, + "loss": 0.3008, + "step": 321 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029993133238452867, + "loss": 0.2861, + "step": 322 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029993045491983245, + "loss": 0.2908, + "step": 323 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029992957188565954, + "loss": 0.1871, + "step": 324 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029992868328204274, + "loss": 0.2227, + "step": 325 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029992778910901496, + "loss": 0.4098, + "step": 326 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029992688936660944, + "loss": 0.2893, + "step": 327 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002999259840548597, + "loss": 0.2273, + "step": 328 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002999250731737993, + "loss": 0.2629, + "step": 329 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002999241567234621, + "loss": 0.297, + "step": 330 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029992323470388204, + "loss": 0.4238, + "step": 331 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029992230711509355, + "loss": 0.4894, + "step": 332 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029992137395713097, + "loss": 0.4424, + "step": 333 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029992043523002895, + "loss": 0.2386, + "step": 334 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029991949093382246, + "loss": 0.2839, + "step": 335 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029991854106854645, + "loss": 0.6452, + "step": 336 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029991758563423633, + "loss": 0.2609, + "step": 337 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029991662463092745, + "loss": 0.42, + "step": 338 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029991565805865573, + "loss": 0.3616, + "step": 339 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029991468591745687, + "loss": 0.3846, + "step": 340 + }, + { + "epoch": 0.1, + "learning_rate": 0.000299913708207367, + "loss": 0.3083, + "step": 341 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002999127249284225, + "loss": 0.2291, + "step": 342 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029991173608065995, + "loss": 0.3025, + "step": 343 + }, + { + "epoch": 0.1, + "learning_rate": 0.000299910741664116, + "loss": 0.3424, + "step": 344 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002999097416788275, + "loss": 0.3925, + "step": 345 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002999087361248318, + "loss": 0.3662, + "step": 346 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002999077250021662, + "loss": 0.1759, + "step": 347 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029990670831086814, + "loss": 0.2067, + "step": 348 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029990568605097547, + "loss": 0.2343, + "step": 349 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029990465822252616, + "loss": 0.2792, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029990362482555844, + "loss": 0.3089, + "step": 351 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002999025858601106, + "loss": 0.3161, + "step": 352 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029990154132622125, + "loss": 0.431, + "step": 353 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002999004912239293, + "loss": 0.4195, + "step": 354 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029989943555327363, + "loss": 0.3545, + "step": 355 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029989837431429353, + "loss": 0.3528, + "step": 356 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998973075070284, + "loss": 0.4253, + "step": 357 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998962351315178, + "loss": 0.1749, + "step": 358 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029989515718780173, + "loss": 0.2562, + "step": 359 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029989407367592006, + "loss": 0.3759, + "step": 360 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998929845959131, + "loss": 0.204, + "step": 361 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029989188994782143, + "loss": 0.264, + "step": 362 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998907897316855, + "loss": 0.5466, + "step": 363 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998896839475463, + "loss": 0.2671, + "step": 364 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998885725954449, + "loss": 0.2672, + "step": 365 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998874556754226, + "loss": 0.1921, + "step": 366 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998863331875209, + "loss": 0.2238, + "step": 367 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029988520513178136, + "loss": 0.1988, + "step": 368 + }, + { + "epoch": 0.11, + "learning_rate": 0.000299884071508246, + "loss": 0.3544, + "step": 369 + }, + { + "epoch": 0.11, + "learning_rate": 0.000299882932316957, + "loss": 0.3866, + "step": 370 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998817875579565, + "loss": 0.4072, + "step": 371 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029988063723128714, + "loss": 0.4454, + "step": 372 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029987948133699166, + "loss": 0.2996, + "step": 373 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998783198751129, + "loss": 0.3298, + "step": 374 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029987715284569415, + "loss": 0.4764, + "step": 375 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029987598024877864, + "loss": 0.3028, + "step": 376 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029987480208440995, + "loss": 0.4967, + "step": 377 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002998736183526319, + "loss": 0.5879, + "step": 378 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029987242905348845, + "loss": 0.3786, + "step": 379 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998712341870237, + "loss": 0.1929, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029987003375328217, + "loss": 0.3589, + "step": 381 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029986882775230833, + "loss": 0.2731, + "step": 382 + }, + { + "epoch": 0.12, + "learning_rate": 0.000299867616184147, + "loss": 0.1609, + "step": 383 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029986639904884326, + "loss": 0.2304, + "step": 384 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998651763464423, + "loss": 0.2167, + "step": 385 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029986394807698946, + "loss": 0.3218, + "step": 386 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998627142405304, + "loss": 0.277, + "step": 387 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029986147483711104, + "loss": 0.5421, + "step": 388 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998602298667773, + "loss": 0.3409, + "step": 389 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029985897932957546, + "loss": 0.4092, + "step": 390 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998577232255521, + "loss": 0.3223, + "step": 391 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998564615547537, + "loss": 0.3169, + "step": 392 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998551943172272, + "loss": 0.3379, + "step": 393 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998539215130197, + "loss": 0.307, + "step": 394 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029985264314217843, + "loss": 0.3132, + "step": 395 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029985135920475097, + "loss": 0.2442, + "step": 396 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998500697007849, + "loss": 0.5513, + "step": 397 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998487746303282, + "loss": 0.0905, + "step": 398 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029984747399342893, + "loss": 0.3658, + "step": 399 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998461677901354, + "loss": 0.3121, + "step": 400 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029984485602049625, + "loss": 0.4953, + "step": 401 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029984353868456006, + "loss": 0.3584, + "step": 402 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998422157823758, + "loss": 0.4015, + "step": 403 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029984088731399263, + "loss": 0.2617, + "step": 404 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998395532794599, + "loss": 0.0714, + "step": 405 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002998382136788272, + "loss": 0.3164, + "step": 406 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029983686851214425, + "loss": 0.1292, + "step": 407 + }, + { + "epoch": 0.12, + "learning_rate": 0.000299835517779461, + "loss": 0.4262, + "step": 408 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029983416148082764, + "loss": 0.2923, + "step": 409 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029983279961629466, + "loss": 0.327, + "step": 410 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029983143218591246, + "loss": 0.2678, + "step": 411 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029983005918973195, + "loss": 0.2347, + "step": 412 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029982868062780415, + "loss": 0.3129, + "step": 413 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029982729650018027, + "loss": 0.5026, + "step": 414 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998259068069116, + "loss": 0.5613, + "step": 415 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029982451154804987, + "loss": 0.3302, + "step": 416 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029982311072364695, + "loss": 0.328, + "step": 417 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029982170433375476, + "loss": 0.3657, + "step": 418 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998202923784256, + "loss": 0.2838, + "step": 419 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029981887485771197, + "loss": 0.3928, + "step": 420 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029981745177166643, + "loss": 0.2999, + "step": 421 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029981602312034196, + "loss": 0.4211, + "step": 422 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998145889037915, + "loss": 0.3738, + "step": 423 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998131491220684, + "loss": 0.4139, + "step": 424 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998117037752261, + "loss": 0.296, + "step": 425 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998102528633184, + "loss": 0.4458, + "step": 426 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998087963863991, + "loss": 0.2338, + "step": 427 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998073343445223, + "loss": 0.2631, + "step": 428 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029980586673774233, + "loss": 0.3304, + "step": 429 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998043935661137, + "loss": 0.2876, + "step": 430 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998029148296912, + "loss": 0.1625, + "step": 431 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998014305285297, + "loss": 0.3688, + "step": 432 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002997999406626843, + "loss": 0.3722, + "step": 433 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029979844523221047, + "loss": 0.3131, + "step": 434 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002997969442371636, + "loss": 0.3079, + "step": 435 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002997954376775996, + "loss": 0.2827, + "step": 436 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002997939255535743, + "loss": 0.2785, + "step": 437 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029979240786514395, + "loss": 0.4122, + "step": 438 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002997908846123649, + "loss": 0.2062, + "step": 439 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029978935579529374, + "loss": 0.4249, + "step": 440 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029978782141398727, + "loss": 0.5095, + "step": 441 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029978628146850255, + "loss": 0.2382, + "step": 442 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002997847359588966, + "loss": 0.2783, + "step": 443 + }, + { + "epoch": 0.13, + "learning_rate": 0.00029978318488522705, + "loss": 0.342, + "step": 444 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029978162824755136, + "loss": 0.2918, + "step": 445 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029978006604592744, + "loss": 0.2921, + "step": 446 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997784982804133, + "loss": 0.2273, + "step": 447 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029977692495106715, + "loss": 0.4954, + "step": 448 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029977534605794744, + "loss": 0.2556, + "step": 449 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997737616011129, + "loss": 0.3203, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029977217158062223, + "loss": 0.1777, + "step": 451 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997705759965347, + "loss": 0.2739, + "step": 452 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997689748489094, + "loss": 0.3715, + "step": 453 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997673681378059, + "loss": 0.1215, + "step": 454 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997657558632838, + "loss": 0.3623, + "step": 455 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997641380254031, + "loss": 0.3727, + "step": 456 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029976251462422384, + "loss": 0.4168, + "step": 457 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997608856598064, + "loss": 0.4712, + "step": 458 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029975925113221114, + "loss": 0.2771, + "step": 459 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997576110414989, + "loss": 0.1335, + "step": 460 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997559653877306, + "loss": 0.1498, + "step": 461 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029975431417096735, + "loss": 0.3372, + "step": 462 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029975265739127046, + "loss": 0.4225, + "step": 463 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997509950487014, + "loss": 0.404, + "step": 464 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997493271433222, + "loss": 0.3955, + "step": 465 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997476536751945, + "loss": 0.6078, + "step": 466 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997459746443806, + "loss": 0.2999, + "step": 467 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997442900509429, + "loss": 0.3965, + "step": 468 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997425998949439, + "loss": 0.3885, + "step": 469 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997409041764465, + "loss": 0.1915, + "step": 470 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029973920289551363, + "loss": 0.2949, + "step": 471 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029973749605220843, + "loss": 0.2247, + "step": 472 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029973578364659437, + "loss": 0.2409, + "step": 473 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029973406567873503, + "loss": 0.4252, + "step": 474 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029973234214869423, + "loss": 0.4664, + "step": 475 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002997306130565361, + "loss": 0.3716, + "step": 476 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029972887840232465, + "loss": 0.1988, + "step": 477 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002997271381861245, + "loss": 0.282, + "step": 478 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002997253924080003, + "loss": 0.2726, + "step": 479 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002997236410680167, + "loss": 0.2146, + "step": 480 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029972188416623895, + "loss": 0.2979, + "step": 481 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002997201217027323, + "loss": 0.1986, + "step": 482 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029971835367756215, + "loss": 0.2256, + "step": 483 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029971658009079425, + "loss": 0.3377, + "step": 484 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029971480094249437, + "loss": 0.3734, + "step": 485 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002997130162327287, + "loss": 0.2543, + "step": 486 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029971122596156356, + "loss": 0.4601, + "step": 487 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029970943012906534, + "loss": 0.1657, + "step": 488 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002997076287353008, + "loss": 0.1734, + "step": 489 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002997058217803369, + "loss": 0.3586, + "step": 490 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002997040092642407, + "loss": 0.3834, + "step": 491 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029970219118707957, + "loss": 0.2606, + "step": 492 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029970036754892106, + "loss": 0.3879, + "step": 493 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029969853834983285, + "loss": 0.3575, + "step": 494 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029969670358988293, + "loss": 0.3558, + "step": 495 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029969486326913953, + "loss": 0.5673, + "step": 496 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029969301738767086, + "loss": 0.351, + "step": 497 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002996911659455456, + "loss": 0.321, + "step": 498 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002996893089428324, + "loss": 0.2921, + "step": 499 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002996874463796004, + "loss": 0.1399, + "step": 500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002996855782559188, + "loss": 0.2705, + "step": 501 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029968370457185683, + "loss": 0.2905, + "step": 502 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002996818253274842, + "loss": 0.2765, + "step": 503 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029967994052287065, + "loss": 0.1394, + "step": 504 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002996780501580863, + "loss": 0.1822, + "step": 505 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002996761542332013, + "loss": 0.2529, + "step": 506 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029967425274828605, + "loss": 0.3515, + "step": 507 + }, + { + "epoch": 0.15, + "learning_rate": 0.00029967234570341133, + "loss": 0.3273, + "step": 508 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002996704330986478, + "loss": 0.3779, + "step": 509 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002996685149340666, + "loss": 0.1201, + "step": 510 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029966659120973896, + "loss": 0.4215, + "step": 511 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996646619257364, + "loss": 0.4635, + "step": 512 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029966272708213047, + "loss": 0.1463, + "step": 513 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996607866789931, + "loss": 0.4531, + "step": 514 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996588407163964, + "loss": 0.1818, + "step": 515 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996568891944127, + "loss": 0.3224, + "step": 516 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996549321131144, + "loss": 0.3723, + "step": 517 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996529694725742, + "loss": 0.2467, + "step": 518 + }, + { + "epoch": 0.16, + "learning_rate": 0.000299651001272865, + "loss": 0.2826, + "step": 519 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029964902751406007, + "loss": 0.5684, + "step": 520 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029964704819623254, + "loss": 0.2713, + "step": 521 + }, + { + "epoch": 0.16, + "learning_rate": 0.000299645063319456, + "loss": 0.2355, + "step": 522 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029964307288380423, + "loss": 0.3478, + "step": 523 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029964107688935113, + "loss": 0.3571, + "step": 524 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029963907533617077, + "loss": 0.4088, + "step": 525 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996370682243376, + "loss": 0.1707, + "step": 526 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996350555539262, + "loss": 0.2044, + "step": 527 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029963303732501124, + "loss": 0.1962, + "step": 528 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029963101353766775, + "loss": 0.2835, + "step": 529 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029962898419197095, + "loss": 0.2094, + "step": 530 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996269492879961, + "loss": 0.1623, + "step": 531 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996249088258188, + "loss": 0.3834, + "step": 532 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029962286280551506, + "loss": 0.1057, + "step": 533 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029962081122716065, + "loss": 0.092, + "step": 534 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029961875409083186, + "loss": 0.4468, + "step": 535 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029961669139660517, + "loss": 0.2699, + "step": 536 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996146231445571, + "loss": 0.2229, + "step": 537 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029961254933476446, + "loss": 0.384, + "step": 538 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996104699673044, + "loss": 0.4219, + "step": 539 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029960838504225414, + "loss": 0.2765, + "step": 540 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996062945596911, + "loss": 0.4226, + "step": 541 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029960419851969284, + "loss": 0.2395, + "step": 542 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002996020969223374, + "loss": 0.3721, + "step": 543 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029959998976770275, + "loss": 0.5709, + "step": 544 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995978770558672, + "loss": 0.2719, + "step": 545 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029959575878690917, + "loss": 0.2691, + "step": 546 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029959363496090744, + "loss": 0.2402, + "step": 547 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995915055779408, + "loss": 0.1479, + "step": 548 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995893706380884, + "loss": 0.2172, + "step": 549 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995872301414296, + "loss": 0.2597, + "step": 550 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029958508408804376, + "loss": 0.3459, + "step": 551 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995829324780108, + "loss": 0.3623, + "step": 552 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995807753114105, + "loss": 0.2283, + "step": 553 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029957861258832306, + "loss": 0.358, + "step": 554 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995764443088288, + "loss": 0.2108, + "step": 555 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029957427047300824, + "loss": 0.1743, + "step": 556 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029957209108094217, + "loss": 0.0729, + "step": 557 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995699061327115, + "loss": 0.3481, + "step": 558 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029956771562839746, + "loss": 0.3423, + "step": 559 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995655195680814, + "loss": 0.386, + "step": 560 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029956331795184484, + "loss": 0.2053, + "step": 561 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029956111077976964, + "loss": 0.1772, + "step": 562 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995588980519378, + "loss": 0.2538, + "step": 563 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995566797684314, + "loss": 0.2372, + "step": 564 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995544559293329, + "loss": 0.2269, + "step": 565 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029955222653472503, + "loss": 0.4162, + "step": 566 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995499915846904, + "loss": 0.364, + "step": 567 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029954775107931224, + "loss": 0.432, + "step": 568 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995455050186736, + "loss": 0.3256, + "step": 569 + }, + { + "epoch": 0.17, + "learning_rate": 0.000299543253402858, + "loss": 0.4145, + "step": 570 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995409962319491, + "loss": 0.3369, + "step": 571 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029953873350603067, + "loss": 0.3079, + "step": 572 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029953646522518687, + "loss": 0.4847, + "step": 573 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029953419138950186, + "loss": 0.3155, + "step": 574 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995319119990601, + "loss": 0.2454, + "step": 575 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002995296270539463, + "loss": 0.3962, + "step": 576 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029952733655424547, + "loss": 0.2893, + "step": 577 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002995250405000424, + "loss": 0.4459, + "step": 578 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002995227388914227, + "loss": 0.4131, + "step": 579 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029952043172847166, + "loss": 0.3026, + "step": 580 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002995181190112751, + "loss": 0.321, + "step": 581 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029951580073991885, + "loss": 0.248, + "step": 582 + }, + { + "epoch": 0.18, + "learning_rate": 0.000299513476914489, + "loss": 0.1454, + "step": 583 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029951114753507197, + "loss": 0.2537, + "step": 584 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029950881260175426, + "loss": 0.233, + "step": 585 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029950647211462257, + "loss": 0.4671, + "step": 586 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029950412607376387, + "loss": 0.2935, + "step": 587 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002995017744792653, + "loss": 0.2588, + "step": 588 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029949941733121424, + "loss": 0.1436, + "step": 589 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994970546296982, + "loss": 0.38, + "step": 590 + }, + { + "epoch": 0.18, + "learning_rate": 0.000299494686374805, + "loss": 0.3124, + "step": 591 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994923125666226, + "loss": 0.2337, + "step": 592 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029948993320523916, + "loss": 0.4887, + "step": 593 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994875482907431, + "loss": 0.2111, + "step": 594 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994851578232229, + "loss": 0.3063, + "step": 595 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994827618027675, + "loss": 0.1798, + "step": 596 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994803602294659, + "loss": 0.2053, + "step": 597 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994779531034072, + "loss": 0.2124, + "step": 598 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994755404246809, + "loss": 0.1837, + "step": 599 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994731221933766, + "loss": 0.224, + "step": 600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994706984095842, + "loss": 0.0651, + "step": 601 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994682690733936, + "loss": 0.383, + "step": 602 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029946583418489516, + "loss": 0.1943, + "step": 603 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029946339374417927, + "loss": 0.2282, + "step": 604 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994609477513366, + "loss": 0.2863, + "step": 605 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029945849620645803, + "loss": 0.4083, + "step": 606 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029945603910963456, + "loss": 0.212, + "step": 607 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994535764609575, + "loss": 0.2627, + "step": 608 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002994511082605184, + "loss": 0.1652, + "step": 609 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994486345084089, + "loss": 0.5203, + "step": 610 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994461552047208, + "loss": 0.3043, + "step": 611 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994436703495464, + "loss": 0.242, + "step": 612 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029944117994297784, + "loss": 0.1837, + "step": 613 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029943868398510766, + "loss": 0.2564, + "step": 614 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994361824760286, + "loss": 0.2774, + "step": 615 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994336754158335, + "loss": 0.2654, + "step": 616 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994311628046157, + "loss": 0.2832, + "step": 617 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029942864464246833, + "loss": 0.258, + "step": 618 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029942612092948506, + "loss": 0.2167, + "step": 619 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994235916657595, + "loss": 0.2065, + "step": 620 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029942105685138576, + "loss": 0.2448, + "step": 621 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994185164864579, + "loss": 0.1594, + "step": 622 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994159705710704, + "loss": 0.1468, + "step": 623 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994134191053176, + "loss": 0.1639, + "step": 624 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994108620892946, + "loss": 0.1069, + "step": 625 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994082995230961, + "loss": 0.1347, + "step": 626 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002994057314068175, + "loss": 0.2825, + "step": 627 + }, + { + "epoch": 0.19, + "learning_rate": 0.000299403157740554, + "loss": 0.3625, + "step": 628 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029940057852440136, + "loss": 0.3256, + "step": 629 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002993979937584553, + "loss": 0.2062, + "step": 630 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029939540344281197, + "loss": 0.1888, + "step": 631 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002993928075775674, + "loss": 0.2005, + "step": 632 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029939020616281817, + "loss": 0.139, + "step": 633 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029938759919866093, + "loss": 0.0329, + "step": 634 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029938498668519235, + "loss": 0.222, + "step": 635 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029938236862250963, + "loss": 0.3898, + "step": 636 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029937974501071, + "loss": 0.3655, + "step": 637 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029937711584989085, + "loss": 0.156, + "step": 638 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029937448114014995, + "loss": 0.2562, + "step": 639 + }, + { + "epoch": 0.19, + "learning_rate": 0.000299371840881585, + "loss": 0.2923, + "step": 640 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002993691950742943, + "loss": 0.4049, + "step": 641 + }, + { + "epoch": 0.19, + "learning_rate": 0.00029936654371837595, + "loss": 0.3547, + "step": 642 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993638868139286, + "loss": 0.3683, + "step": 643 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993612243610508, + "loss": 0.3134, + "step": 644 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993585563598415, + "loss": 0.1229, + "step": 645 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993558828103998, + "loss": 0.4321, + "step": 646 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993532037128251, + "loss": 0.3215, + "step": 647 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993505190672169, + "loss": 0.1549, + "step": 648 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029934782887367477, + "loss": 0.2787, + "step": 649 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993451331322988, + "loss": 0.3185, + "step": 650 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993424318431891, + "loss": 0.3658, + "step": 651 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029933972500644597, + "loss": 0.4424, + "step": 652 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029933701262217, + "loss": 0.2291, + "step": 653 + }, + { + "epoch": 0.2, + "learning_rate": 0.000299334294690462, + "loss": 0.3363, + "step": 654 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993315712114228, + "loss": 0.2799, + "step": 655 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029932884218515366, + "loss": 0.3773, + "step": 656 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993261076117559, + "loss": 0.4417, + "step": 657 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029932336749133125, + "loss": 0.3764, + "step": 658 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993206218239813, + "loss": 0.1566, + "step": 659 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993178706098081, + "loss": 0.2881, + "step": 660 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993151138489139, + "loss": 0.2519, + "step": 661 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993123515414011, + "loss": 0.3047, + "step": 662 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029930958368737235, + "loss": 0.3065, + "step": 663 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993068102869303, + "loss": 0.4237, + "step": 664 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002993040313401782, + "loss": 0.1637, + "step": 665 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029930124684721907, + "loss": 0.2781, + "step": 666 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002992984568081565, + "loss": 0.2007, + "step": 667 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029929566122309406, + "loss": 0.2458, + "step": 668 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002992928600921356, + "loss": 0.0155, + "step": 669 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002992900534153852, + "loss": 0.4149, + "step": 670 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002992872411929471, + "loss": 0.2661, + "step": 671 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002992844234249258, + "loss": 0.2798, + "step": 672 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029928160011142587, + "loss": 0.4266, + "step": 673 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002992787712525523, + "loss": 0.2037, + "step": 674 + }, + { + "epoch": 0.2, + "learning_rate": 0.00029927593684841015, + "loss": 0.4644, + "step": 675 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992730968991047, + "loss": 0.3323, + "step": 676 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992702514047414, + "loss": 0.3219, + "step": 677 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029926740036542596, + "loss": 0.4154, + "step": 678 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992645437812644, + "loss": 0.154, + "step": 679 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029926168165236275, + "loss": 0.2073, + "step": 680 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992588139788273, + "loss": 0.3378, + "step": 681 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992559407607646, + "loss": 0.4548, + "step": 682 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029925306199828143, + "loss": 0.2912, + "step": 683 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029925017769148466, + "loss": 0.4363, + "step": 684 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992472878404815, + "loss": 0.2653, + "step": 685 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992443924453792, + "loss": 0.3229, + "step": 686 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992414915062854, + "loss": 0.0778, + "step": 687 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029923858502330786, + "loss": 0.2847, + "step": 688 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992356729965545, + "loss": 0.1495, + "step": 689 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992327554261335, + "loss": 0.4434, + "step": 690 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029922983231215326, + "loss": 0.2614, + "step": 691 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029922690365472243, + "loss": 0.2328, + "step": 692 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029922396945394967, + "loss": 0.3486, + "step": 693 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029922102970994405, + "loss": 0.3103, + "step": 694 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029921808442281473, + "loss": 0.3266, + "step": 695 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029921513359267124, + "loss": 0.4676, + "step": 696 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029921217721962307, + "loss": 0.5129, + "step": 697 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029920921530378005, + "loss": 0.4523, + "step": 698 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029920624784525224, + "loss": 0.3135, + "step": 699 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029920327484414983, + "loss": 0.2785, + "step": 700 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002992002963005834, + "loss": 0.419, + "step": 701 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002991973122146634, + "loss": 0.3511, + "step": 702 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002991943225865008, + "loss": 0.2734, + "step": 703 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002991913274162066, + "loss": 0.2312, + "step": 704 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002991883267038921, + "loss": 0.105, + "step": 705 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029918532044966885, + "loss": 0.3721, + "step": 706 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002991823086536483, + "loss": 0.4562, + "step": 707 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002991792913159425, + "loss": 0.3046, + "step": 708 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991762684366635, + "loss": 0.2957, + "step": 709 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029917324001592355, + "loss": 0.3137, + "step": 710 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991702060538352, + "loss": 0.2167, + "step": 711 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029916716655051115, + "loss": 0.2974, + "step": 712 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991641215060643, + "loss": 0.324, + "step": 713 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991610709206077, + "loss": 0.266, + "step": 714 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991580147942548, + "loss": 0.3172, + "step": 715 + }, + { + "epoch": 0.22, + "learning_rate": 0.000299154953127119, + "loss": 0.2799, + "step": 716 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991518859193141, + "loss": 0.3701, + "step": 717 + }, + { + "epoch": 0.22, + "learning_rate": 0.000299148813170954, + "loss": 0.342, + "step": 718 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991457348821529, + "loss": 0.205, + "step": 719 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991426510530251, + "loss": 0.2479, + "step": 720 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029913956168368516, + "loss": 0.1703, + "step": 721 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991364667742479, + "loss": 0.427, + "step": 722 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029913336632482815, + "loss": 0.2069, + "step": 723 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029913026033554126, + "loss": 0.1666, + "step": 724 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991271488065025, + "loss": 0.1995, + "step": 725 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029912403173782743, + "loss": 0.3251, + "step": 726 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991209091296319, + "loss": 0.2823, + "step": 727 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002991177809820319, + "loss": 0.1423, + "step": 728 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029911464729514366, + "loss": 0.182, + "step": 729 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029911150806908346, + "loss": 0.201, + "step": 730 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029910836330396804, + "loss": 0.3141, + "step": 731 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029910521299991423, + "loss": 0.1276, + "step": 732 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029910205715703896, + "loss": 0.3491, + "step": 733 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002990988957754595, + "loss": 0.2206, + "step": 734 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029909572885529327, + "loss": 0.2723, + "step": 735 + }, + { + "epoch": 0.22, + "learning_rate": 0.000299092556396658, + "loss": 0.3119, + "step": 736 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029908937839967146, + "loss": 0.2072, + "step": 737 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029908619486445167, + "loss": 0.2549, + "step": 738 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029908300579111694, + "loss": 0.3454, + "step": 739 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002990798111797857, + "loss": 0.2484, + "step": 740 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029907661103057673, + "loss": 0.2278, + "step": 741 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002990734053436088, + "loss": 0.3896, + "step": 742 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029907019411900095, + "loss": 0.1961, + "step": 743 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029906697735687253, + "loss": 0.5036, + "step": 744 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029906375505734317, + "loss": 0.501, + "step": 745 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002990605272205323, + "loss": 0.2001, + "step": 746 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029905729384656, + "loss": 0.3499, + "step": 747 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029905405493554633, + "loss": 0.324, + "step": 748 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002990508104876117, + "loss": 0.2427, + "step": 749 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029904756050287646, + "loss": 0.3883, + "step": 750 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029904430498146146, + "loss": 0.4183, + "step": 751 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029904104392348757, + "loss": 0.4166, + "step": 752 + }, + { + "epoch": 0.23, + "learning_rate": 0.000299037777329076, + "loss": 0.2737, + "step": 753 + }, + { + "epoch": 0.23, + "learning_rate": 0.000299034505198348, + "loss": 0.4867, + "step": 754 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002990312275314252, + "loss": 0.3071, + "step": 755 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002990279443284293, + "loss": 0.1181, + "step": 756 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029902465558948233, + "loss": 0.1702, + "step": 757 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002990213613147064, + "loss": 0.2976, + "step": 758 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029901806150422393, + "loss": 0.3961, + "step": 759 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002990147561581574, + "loss": 0.3832, + "step": 760 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002990114452766297, + "loss": 0.2617, + "step": 761 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002990081288597638, + "loss": 0.3486, + "step": 762 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029900480690768283, + "loss": 0.3362, + "step": 763 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029900147942051025, + "loss": 0.3107, + "step": 764 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002989981463983697, + "loss": 0.2914, + "step": 765 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002989948078413849, + "loss": 0.1994, + "step": 766 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002989914637496799, + "loss": 0.4098, + "step": 767 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029898811412337896, + "loss": 0.4923, + "step": 768 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002989847589626065, + "loss": 0.1955, + "step": 769 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002989813982674871, + "loss": 0.3696, + "step": 770 + }, + { + "epoch": 0.23, + "learning_rate": 0.00029897803203814566, + "loss": 0.3544, + "step": 771 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002989746602747072, + "loss": 0.3265, + "step": 772 + }, + { + "epoch": 0.23, + "learning_rate": 0.000298971282977297, + "loss": 0.4474, + "step": 773 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002989679001460404, + "loss": 0.1689, + "step": 774 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029896451178106325, + "loss": 0.2232, + "step": 775 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029896111788249135, + "loss": 0.1775, + "step": 776 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002989577184504507, + "loss": 0.3358, + "step": 777 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029895431348506753, + "loss": 0.1955, + "step": 778 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029895090298646856, + "loss": 0.4043, + "step": 779 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029894748695478025, + "loss": 0.2092, + "step": 780 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002989440653901296, + "loss": 0.3913, + "step": 781 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029894063829264376, + "loss": 0.3509, + "step": 782 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029893720566244995, + "loss": 0.2773, + "step": 783 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029893376749967566, + "loss": 0.1448, + "step": 784 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002989303238044487, + "loss": 0.2642, + "step": 785 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002989268745768969, + "loss": 0.2806, + "step": 786 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002989234198171484, + "loss": 0.2678, + "step": 787 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029891995952533167, + "loss": 0.1944, + "step": 788 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002989164937015751, + "loss": 0.3416, + "step": 789 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029891302234600747, + "loss": 0.1723, + "step": 790 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029890954545875773, + "loss": 0.5298, + "step": 791 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002989060630399551, + "loss": 0.3644, + "step": 792 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029890257508972885, + "loss": 0.1947, + "step": 793 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002988990816082086, + "loss": 0.2186, + "step": 794 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002988955825955241, + "loss": 0.1525, + "step": 795 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002988920780518054, + "loss": 0.4187, + "step": 796 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029888856797718257, + "loss": 0.2263, + "step": 797 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029888505237178603, + "loss": 0.2353, + "step": 798 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029888153123574645, + "loss": 0.2691, + "step": 799 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029887800456919456, + "loss": 0.3462, + "step": 800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029887447237226134, + "loss": 0.2493, + "step": 801 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002988709346450781, + "loss": 0.3273, + "step": 802 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002988673913877761, + "loss": 0.5431, + "step": 803 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002988638426004872, + "loss": 0.1533, + "step": 804 + }, + { + "epoch": 0.24, + "learning_rate": 0.000298860288283343, + "loss": 0.5905, + "step": 805 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029885672843647565, + "loss": 0.321, + "step": 806 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029885316306001733, + "loss": 0.4048, + "step": 807 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988495921541005, + "loss": 0.2949, + "step": 808 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988460157188579, + "loss": 0.3131, + "step": 809 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029884243375442225, + "loss": 0.1388, + "step": 810 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988388462609266, + "loss": 0.0826, + "step": 811 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988352532385044, + "loss": 0.3406, + "step": 812 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988316546872889, + "loss": 0.354, + "step": 813 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029882805060741394, + "loss": 0.1797, + "step": 814 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988244409990133, + "loss": 0.2594, + "step": 815 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988208258622211, + "loss": 0.3113, + "step": 816 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988172051971717, + "loss": 0.3167, + "step": 817 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988135790039994, + "loss": 0.2237, + "step": 818 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002988099472828391, + "loss": 0.2831, + "step": 819 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029880631003382564, + "loss": 0.3461, + "step": 820 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029880266725709417, + "loss": 0.3189, + "step": 821 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987990189527799, + "loss": 0.2852, + "step": 822 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987953651210185, + "loss": 0.2256, + "step": 823 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029879170576194554, + "loss": 0.2242, + "step": 824 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987880408756971, + "loss": 0.4494, + "step": 825 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987843704624093, + "loss": 0.4021, + "step": 826 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029878069452221834, + "loss": 0.4495, + "step": 827 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029877701305526094, + "loss": 0.2871, + "step": 828 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029877332606167376, + "loss": 0.2543, + "step": 829 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987696335415938, + "loss": 0.2894, + "step": 830 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987659354951582, + "loss": 0.3151, + "step": 831 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987622319225044, + "loss": 0.3337, + "step": 832 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987585228237699, + "loss": 0.2579, + "step": 833 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987548081990925, + "loss": 0.1255, + "step": 834 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029875108804861016, + "loss": 0.3114, + "step": 835 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029874736237246117, + "loss": 0.2699, + "step": 836 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029874363117078385, + "loss": 0.4193, + "step": 837 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987398944437168, + "loss": 0.5109, + "step": 838 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002987361521913989, + "loss": 0.3867, + "step": 839 + }, + { + "epoch": 0.26, + "learning_rate": 0.000298732404413969, + "loss": 0.2645, + "step": 840 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002987286511115666, + "loss": 0.2494, + "step": 841 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002987248922843308, + "loss": 0.4383, + "step": 842 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029872112793240144, + "loss": 0.2563, + "step": 843 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002987173580559183, + "loss": 0.2954, + "step": 844 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002987135826550214, + "loss": 0.2559, + "step": 845 + }, + { + "epoch": 0.26, + "learning_rate": 0.000298709801729851, + "loss": 0.215, + "step": 846 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002987060152805476, + "loss": 0.0728, + "step": 847 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029870222330725173, + "loss": 0.2341, + "step": 848 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986984258101044, + "loss": 0.4142, + "step": 849 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986946227892466, + "loss": 0.4502, + "step": 850 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029869081424481963, + "loss": 0.3313, + "step": 851 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986870001769649, + "loss": 0.467, + "step": 852 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986831805858241, + "loss": 0.355, + "step": 853 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029867935547153924, + "loss": 0.1509, + "step": 854 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029867552483425227, + "loss": 0.598, + "step": 855 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029867168867410556, + "loss": 0.3371, + "step": 856 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029866784699124154, + "loss": 0.2219, + "step": 857 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029866399978580304, + "loss": 0.1655, + "step": 858 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986601470579328, + "loss": 0.2223, + "step": 859 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029865628880777415, + "loss": 0.1926, + "step": 860 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986524250354702, + "loss": 0.2586, + "step": 861 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029864855574116467, + "loss": 0.2022, + "step": 862 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986446809250012, + "loss": 0.0957, + "step": 863 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986408005871237, + "loss": 0.2782, + "step": 864 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986369147276763, + "loss": 0.31, + "step": 865 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029863302334680347, + "loss": 0.1533, + "step": 866 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986291264446497, + "loss": 0.34, + "step": 867 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029862522402135963, + "loss": 0.3287, + "step": 868 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029862131607707836, + "loss": 0.373, + "step": 869 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002986174026119511, + "loss": 0.2444, + "step": 870 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029861348362612307, + "loss": 0.373, + "step": 871 + }, + { + "epoch": 0.26, + "learning_rate": 0.00029860955911973995, + "loss": 0.0425, + "step": 872 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002986056290929475, + "loss": 0.3146, + "step": 873 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029860169354589173, + "loss": 0.4037, + "step": 874 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985977524787188, + "loss": 0.3334, + "step": 875 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029859380589157514, + "loss": 0.4214, + "step": 876 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029858985378460734, + "loss": 0.3769, + "step": 877 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985858961579622, + "loss": 0.0954, + "step": 878 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985819330117867, + "loss": 0.3082, + "step": 879 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985779643462282, + "loss": 0.5508, + "step": 880 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985739901614339, + "loss": 0.2706, + "step": 881 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029857001045755166, + "loss": 0.1477, + "step": 882 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985660252347292, + "loss": 0.3255, + "step": 883 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985620344931145, + "loss": 0.307, + "step": 884 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029855803823285594, + "loss": 0.3799, + "step": 885 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985540364541019, + "loss": 0.2414, + "step": 886 + }, + { + "epoch": 0.27, + "learning_rate": 0.000298550029157001, + "loss": 0.194, + "step": 887 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029854601634170215, + "loss": 0.2157, + "step": 888 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985419980083544, + "loss": 0.446, + "step": 889 + }, + { + "epoch": 0.27, + "learning_rate": 0.000298537974157107, + "loss": 0.3235, + "step": 890 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985339447881095, + "loss": 0.17, + "step": 891 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029852990990151146, + "loss": 0.4609, + "step": 892 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029852586949746286, + "loss": 0.2561, + "step": 893 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029852182357611377, + "loss": 0.2575, + "step": 894 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985177721376144, + "loss": 0.0305, + "step": 895 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029851371518211544, + "loss": 0.1379, + "step": 896 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985096527097674, + "loss": 0.3842, + "step": 897 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029850558472072124, + "loss": 0.3285, + "step": 898 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002985015112151281, + "loss": 0.3125, + "step": 899 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029849743219313935, + "loss": 0.2236, + "step": 900 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002984933476549064, + "loss": 0.3962, + "step": 901 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002984892576005811, + "loss": 0.3097, + "step": 902 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002984851620303153, + "loss": 0.0667, + "step": 903 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002984810609442611, + "loss": 0.2275, + "step": 904 + }, + { + "epoch": 0.27, + "learning_rate": 0.000298476954342571, + "loss": 0.1367, + "step": 905 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029847284222539737, + "loss": 0.3192, + "step": 906 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984687245928931, + "loss": 0.2483, + "step": 907 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984646014452111, + "loss": 0.1904, + "step": 908 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984604727825045, + "loss": 0.4165, + "step": 909 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984563386049267, + "loss": 0.2536, + "step": 910 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984521989126312, + "loss": 0.3993, + "step": 911 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984480537057719, + "loss": 0.1342, + "step": 912 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984439029845027, + "loss": 0.1271, + "step": 913 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029843974674897787, + "loss": 0.3424, + "step": 914 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984355849993517, + "loss": 0.3562, + "step": 915 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029843141773577885, + "loss": 0.3063, + "step": 916 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984272449584141, + "loss": 0.34, + "step": 917 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029842306666741245, + "loss": 0.3327, + "step": 918 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984188828629291, + "loss": 0.2286, + "step": 919 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984146935451195, + "loss": 0.3671, + "step": 920 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029841049871413925, + "loss": 0.244, + "step": 921 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029840629837014427, + "loss": 0.272, + "step": 922 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002984020925132904, + "loss": 0.2823, + "step": 923 + }, + { + "epoch": 0.28, + "learning_rate": 0.000298397881143734, + "loss": 0.2622, + "step": 924 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029839366426163147, + "loss": 0.3328, + "step": 925 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029838944186713954, + "loss": 0.3027, + "step": 926 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029838521396041495, + "loss": 0.2234, + "step": 927 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002983809805416148, + "loss": 0.0301, + "step": 928 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029837674161089633, + "loss": 0.1884, + "step": 929 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002983724971684171, + "loss": 0.4672, + "step": 930 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002983682472143346, + "loss": 0.2784, + "step": 931 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029836399174880683, + "loss": 0.2994, + "step": 932 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029835973077199185, + "loss": 0.255, + "step": 933 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029835546428404793, + "loss": 0.2622, + "step": 934 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002983511922851336, + "loss": 0.3221, + "step": 935 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029834691477540745, + "loss": 0.2978, + "step": 936 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029834263175502844, + "loss": 0.1968, + "step": 937 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002983383432241557, + "loss": 0.4291, + "step": 938 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029833404918294855, + "loss": 0.1741, + "step": 939 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029832974963156636, + "loss": 0.3613, + "step": 940 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029832544457016904, + "loss": 0.1915, + "step": 941 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002983211339989164, + "loss": 0.2864, + "step": 942 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002983168179179685, + "loss": 0.0486, + "step": 943 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002983124963274858, + "loss": 0.3624, + "step": 944 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002983081692276288, + "loss": 0.2245, + "step": 945 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002983038366185582, + "loss": 0.3403, + "step": 946 + }, + { + "epoch": 0.29, + "learning_rate": 0.000298299498500435, + "loss": 0.2571, + "step": 947 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029829515487342026, + "loss": 0.357, + "step": 948 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029829080573767546, + "loss": 0.3482, + "step": 949 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002982864510933621, + "loss": 0.2411, + "step": 950 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002982820909406418, + "loss": 0.1143, + "step": 951 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029827772527967676, + "loss": 0.4096, + "step": 952 + }, + { + "epoch": 0.29, + "learning_rate": 0.000298273354110629, + "loss": 0.2987, + "step": 953 + }, + { + "epoch": 0.29, + "learning_rate": 0.000298268977433661, + "loss": 0.1878, + "step": 954 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029826459524893523, + "loss": 0.4587, + "step": 955 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029826020755661454, + "loss": 0.2755, + "step": 956 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029825581435686193, + "loss": 0.211, + "step": 957 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029825141564984053, + "loss": 0.2029, + "step": 958 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029824701143571383, + "loss": 0.5266, + "step": 959 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029824260171464534, + "loss": 0.2694, + "step": 960 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029823818648679897, + "loss": 0.2594, + "step": 961 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029823376575233867, + "loss": 0.2366, + "step": 962 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002982293395114286, + "loss": 0.1215, + "step": 963 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002982249077642333, + "loss": 0.3594, + "step": 964 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002982204705109173, + "loss": 0.4075, + "step": 965 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002982160277516455, + "loss": 0.1653, + "step": 966 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029821157948658295, + "loss": 0.2141, + "step": 967 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029820712571589477, + "loss": 0.2751, + "step": 968 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029820266643974656, + "loss": 0.1869, + "step": 969 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029819820165830383, + "loss": 0.3388, + "step": 970 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029819373137173253, + "loss": 0.2138, + "step": 971 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002981892555801987, + "loss": 0.1641, + "step": 972 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002981847742838686, + "loss": 0.2002, + "step": 973 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029818028748290857, + "loss": 0.27, + "step": 974 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029817579517748546, + "loss": 0.3715, + "step": 975 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002981712973677661, + "loss": 0.1403, + "step": 976 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029816679405391755, + "loss": 0.2226, + "step": 977 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002981622852361071, + "loss": 0.2808, + "step": 978 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002981577709145022, + "loss": 0.3262, + "step": 979 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029815325108927063, + "loss": 0.2426, + "step": 980 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002981487257605802, + "loss": 0.2421, + "step": 981 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029814419492859906, + "loss": 0.4084, + "step": 982 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002981396585934955, + "loss": 0.3693, + "step": 983 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029813511675543804, + "loss": 0.1708, + "step": 984 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029813056941459546, + "loss": 0.3751, + "step": 985 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029812601657113656, + "loss": 0.2527, + "step": 986 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029812145822523053, + "loss": 0.2965, + "step": 987 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029811689437704667, + "loss": 0.2195, + "step": 988 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029811232502675454, + "loss": 0.1591, + "step": 989 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002981077501745239, + "loss": 0.2568, + "step": 990 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002981031698205246, + "loss": 0.4367, + "step": 991 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029809858396492693, + "loss": 0.333, + "step": 992 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029809399260790107, + "loss": 0.4648, + "step": 993 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002980893957496177, + "loss": 0.271, + "step": 994 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002980847933902476, + "loss": 0.1867, + "step": 995 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029808018552996167, + "loss": 0.3938, + "step": 996 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029807557216893107, + "loss": 0.2969, + "step": 997 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002980709533073271, + "loss": 0.3577, + "step": 998 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029806632894532155, + "loss": 0.4089, + "step": 999 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029806169908308603, + "loss": 0.2866, + "step": 1000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002980570637207926, + "loss": 0.1652, + "step": 1001 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029805242285861345, + "loss": 0.327, + "step": 1002 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002980477764967209, + "loss": 0.1602, + "step": 1003 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002980431246352876, + "loss": 0.4538, + "step": 1004 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002980384672744864, + "loss": 0.2408, + "step": 1005 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002980338044144902, + "loss": 0.392, + "step": 1006 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029802913605547234, + "loss": 0.1815, + "step": 1007 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002980244621976061, + "loss": 0.3108, + "step": 1008 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002980197828410652, + "loss": 0.214, + "step": 1009 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029801509798602343, + "loss": 0.2501, + "step": 1010 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002980104076326548, + "loss": 0.2969, + "step": 1011 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002980057117811336, + "loss": 0.3037, + "step": 1012 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029800101043163423, + "loss": 0.2866, + "step": 1013 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979963035843313, + "loss": 0.4018, + "step": 1014 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979915912393997, + "loss": 0.3364, + "step": 1015 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029798687339701446, + "loss": 0.213, + "step": 1016 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979821500573509, + "loss": 0.2939, + "step": 1017 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029797742122058434, + "loss": 0.3376, + "step": 1018 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029797268688689055, + "loss": 0.5265, + "step": 1019 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029796794705644537, + "loss": 0.2292, + "step": 1020 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979632017294248, + "loss": 0.177, + "step": 1021 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979584509060053, + "loss": 0.3058, + "step": 1022 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979536945863632, + "loss": 0.2582, + "step": 1023 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029794893277067523, + "loss": 0.4344, + "step": 1024 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979441654591182, + "loss": 0.3349, + "step": 1025 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029793939265186935, + "loss": 0.3343, + "step": 1026 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979346143491058, + "loss": 0.4951, + "step": 1027 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029792983055100525, + "loss": 0.0001, + "step": 1028 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979250412577452, + "loss": 0.2421, + "step": 1029 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979202464695037, + "loss": 0.2686, + "step": 1030 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029791544618645876, + "loss": 0.2185, + "step": 1031 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979106404087888, + "loss": 0.3837, + "step": 1032 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002979058291366723, + "loss": 0.2687, + "step": 1033 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029790101237028795, + "loss": 0.2029, + "step": 1034 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029789619010981474, + "loss": 0.3187, + "step": 1035 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029789136235543174, + "loss": 0.3958, + "step": 1036 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002978865291073183, + "loss": 0.2381, + "step": 1037 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002978816903656541, + "loss": 0.1465, + "step": 1038 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002978768461306186, + "loss": 0.3293, + "step": 1039 + }, + { + "epoch": 0.32, + "learning_rate": 0.000297871996402392, + "loss": 0.2872, + "step": 1040 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029786714118115433, + "loss": 0.2983, + "step": 1041 + }, + { + "epoch": 0.32, + "learning_rate": 0.000297862280467086, + "loss": 0.49, + "step": 1042 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029785741426036755, + "loss": 0.1385, + "step": 1043 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002978525425611797, + "loss": 0.326, + "step": 1044 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029784766536970357, + "loss": 0.1912, + "step": 1045 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002978427826861202, + "loss": 0.3778, + "step": 1046 + }, + { + "epoch": 0.32, + "learning_rate": 0.000297837894510611, + "loss": 0.2689, + "step": 1047 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002978330008433575, + "loss": 0.1733, + "step": 1048 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002978281016845416, + "loss": 0.2042, + "step": 1049 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002978231970343451, + "loss": 0.2964, + "step": 1050 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029781828689295046, + "loss": 0.1608, + "step": 1051 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002978133712605399, + "loss": 0.2945, + "step": 1052 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029780845013729603, + "loss": 0.29, + "step": 1053 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002978035235234017, + "loss": 0.2813, + "step": 1054 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002977985914190398, + "loss": 0.4729, + "step": 1055 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002977936538243938, + "loss": 0.258, + "step": 1056 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002977887107396469, + "loss": 0.2038, + "step": 1057 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029778376216498274, + "loss": 0.2652, + "step": 1058 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029777880810058526, + "loss": 0.1605, + "step": 1059 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002977738485466384, + "loss": 0.1611, + "step": 1060 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002977688835033264, + "loss": 0.2582, + "step": 1061 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029776391297083363, + "loss": 0.3598, + "step": 1062 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002977589369493449, + "loss": 0.2632, + "step": 1063 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029775395543904493, + "loss": 0.3757, + "step": 1064 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029774896844011887, + "loss": 0.3696, + "step": 1065 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029774397595275177, + "loss": 0.1559, + "step": 1066 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002977389779771293, + "loss": 0.3168, + "step": 1067 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029773397451343707, + "loss": 0.3899, + "step": 1068 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029772896556186086, + "loss": 0.2815, + "step": 1069 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002977239511225868, + "loss": 0.4508, + "step": 1070 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029771893119580117, + "loss": 0.2459, + "step": 1071 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002977139057816904, + "loss": 0.3609, + "step": 1072 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002977088748804412, + "loss": 0.3532, + "step": 1073 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002977038384922405, + "loss": 0.3497, + "step": 1074 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002976987966172753, + "loss": 0.319, + "step": 1075 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002976937492557329, + "loss": 0.2172, + "step": 1076 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029768869640780084, + "loss": 0.3636, + "step": 1077 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002976836380736668, + "loss": 0.2489, + "step": 1078 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029767857425351877, + "loss": 0.4424, + "step": 1079 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029767350494754464, + "loss": 0.2484, + "step": 1080 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029766843015593296, + "loss": 0.2877, + "step": 1081 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029766334987887206, + "loss": 0.2808, + "step": 1082 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029765826411655075, + "loss": 0.4201, + "step": 1083 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029765317286915794, + "loss": 0.3839, + "step": 1084 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029764807613688273, + "loss": 0.4093, + "step": 1085 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002976429739199145, + "loss": 0.3341, + "step": 1086 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029763786621844274, + "loss": 0.3289, + "step": 1087 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029763275303265716, + "loss": 0.3535, + "step": 1088 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029762763436274773, + "loss": 0.2452, + "step": 1089 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029762251020890463, + "loss": 0.091, + "step": 1090 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002976173805713182, + "loss": 0.3739, + "step": 1091 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029761224545017895, + "loss": 0.1952, + "step": 1092 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029760710484567763, + "loss": 0.3398, + "step": 1093 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002976019587580052, + "loss": 0.2518, + "step": 1094 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002975968071873529, + "loss": 0.1673, + "step": 1095 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029759165013391203, + "loss": 0.3159, + "step": 1096 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002975864875978741, + "loss": 0.2788, + "step": 1097 + }, + { + "epoch": 0.33, + "learning_rate": 0.000297581319579431, + "loss": 0.3042, + "step": 1098 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029757614607877464, + "loss": 0.2777, + "step": 1099 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029757096709609725, + "loss": 0.2962, + "step": 1100 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029756578263159117, + "loss": 0.4973, + "step": 1101 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029756059268544894, + "loss": 0.3438, + "step": 1102 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002975553972578635, + "loss": 0.3453, + "step": 1103 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002975501963490277, + "loss": 0.3626, + "step": 1104 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029754498995913475, + "loss": 0.0823, + "step": 1105 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029753977808837816, + "loss": 0.2012, + "step": 1106 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029753456073695146, + "loss": 0.1351, + "step": 1107 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002975293379050485, + "loss": 0.1471, + "step": 1108 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002975241095928632, + "loss": 0.3628, + "step": 1109 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002975188758005899, + "loss": 0.1623, + "step": 1110 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002975136365284229, + "loss": 0.4324, + "step": 1111 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002975083917765569, + "loss": 0.3196, + "step": 1112 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002975031415451867, + "loss": 0.2639, + "step": 1113 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974978858345074, + "loss": 0.1717, + "step": 1114 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029749262464471416, + "loss": 0.1432, + "step": 1115 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974873579760024, + "loss": 0.3492, + "step": 1116 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974820858285678, + "loss": 0.2874, + "step": 1117 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974768082026062, + "loss": 0.1644, + "step": 1118 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974715250983136, + "loss": 0.3983, + "step": 1119 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974662365158864, + "loss": 0.1569, + "step": 1120 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974609424555209, + "loss": 0.2908, + "step": 1121 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974556429174138, + "loss": 0.2726, + "step": 1122 + }, + { + "epoch": 0.34, + "learning_rate": 0.000297450337901762, + "loss": 0.3583, + "step": 1123 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029744502740876257, + "loss": 0.2787, + "step": 1124 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974397114386127, + "loss": 0.3737, + "step": 1125 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974343899915099, + "loss": 0.2393, + "step": 1126 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974290630676519, + "loss": 0.2516, + "step": 1127 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029742373066723657, + "loss": 0.1069, + "step": 1128 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974183927904619, + "loss": 0.3344, + "step": 1129 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974130494375263, + "loss": 0.2736, + "step": 1130 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974077006086281, + "loss": 0.5029, + "step": 1131 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002974023463039662, + "loss": 0.1002, + "step": 1132 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029739698652373934, + "loss": 0.236, + "step": 1133 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029739162126814667, + "loss": 0.2753, + "step": 1134 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029738625053738746, + "loss": 0.1125, + "step": 1135 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029738087433166125, + "loss": 0.3363, + "step": 1136 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002973754926511678, + "loss": 0.452, + "step": 1137 + }, + { + "epoch": 0.35, + "learning_rate": 0.000297370105496107, + "loss": 0.1976, + "step": 1138 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002973647128666788, + "loss": 0.3582, + "step": 1139 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002973593147630838, + "loss": 0.173, + "step": 1140 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002973539111855223, + "loss": 0.3876, + "step": 1141 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002973485021341952, + "loss": 0.1884, + "step": 1142 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002973430876093033, + "loss": 0.3877, + "step": 1143 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002973376676110478, + "loss": 0.1421, + "step": 1144 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002973322421396299, + "loss": 0.3675, + "step": 1145 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029732681119525137, + "loss": 0.398, + "step": 1146 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029732137477811373, + "loss": 0.243, + "step": 1147 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029731593288841916, + "loss": 0.2215, + "step": 1148 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002973104855263696, + "loss": 0.1239, + "step": 1149 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029730503269216754, + "loss": 0.2609, + "step": 1150 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972995743860154, + "loss": 0.3498, + "step": 1151 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972941106081161, + "loss": 0.3149, + "step": 1152 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972886413586725, + "loss": 0.3199, + "step": 1153 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972831666378878, + "loss": 0.328, + "step": 1154 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029727768644596537, + "loss": 0.1542, + "step": 1155 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029727220078310874, + "loss": 0.1548, + "step": 1156 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972667096495218, + "loss": 0.2062, + "step": 1157 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972612130454084, + "loss": 0.504, + "step": 1158 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029725571097097275, + "loss": 0.3046, + "step": 1159 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972502034264193, + "loss": 0.2044, + "step": 1160 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972446904119526, + "loss": 0.2991, + "step": 1161 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029723917192777744, + "loss": 0.3112, + "step": 1162 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972336479740988, + "loss": 0.2388, + "step": 1163 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972281185511219, + "loss": 0.2511, + "step": 1164 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029722258365905217, + "loss": 0.2506, + "step": 1165 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029721704329809515, + "loss": 0.0758, + "step": 1166 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972114974684567, + "loss": 0.2755, + "step": 1167 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002972059461703428, + "loss": 0.2632, + "step": 1168 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029720038940395967, + "loss": 0.1529, + "step": 1169 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971948271695138, + "loss": 0.3049, + "step": 1170 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029718925946721166, + "loss": 0.2729, + "step": 1171 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971836862972602, + "loss": 0.2231, + "step": 1172 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971781076598664, + "loss": 0.2673, + "step": 1173 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029717252355523753, + "loss": 0.2893, + "step": 1174 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971669339835809, + "loss": 0.1073, + "step": 1175 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029716133894510435, + "loss": 0.0919, + "step": 1176 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971557384400155, + "loss": 0.3493, + "step": 1177 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971501324685226, + "loss": 0.0997, + "step": 1178 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971445210308337, + "loss": 0.2081, + "step": 1179 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029713890412715743, + "loss": 0.3059, + "step": 1180 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971332817577023, + "loss": 0.336, + "step": 1181 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971276539226772, + "loss": 0.3506, + "step": 1182 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029712202062229123, + "loss": 0.2015, + "step": 1183 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971163818567536, + "loss": 0.1809, + "step": 1184 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029711073762627387, + "loss": 0.0883, + "step": 1185 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029710508793106156, + "loss": 0.2532, + "step": 1186 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029709943277132664, + "loss": 0.3142, + "step": 1187 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002970937721472792, + "loss": 0.2882, + "step": 1188 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002970881060591294, + "loss": 0.3583, + "step": 1189 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002970824345070878, + "loss": 0.2575, + "step": 1190 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002970767574913651, + "loss": 0.4113, + "step": 1191 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002970710750121722, + "loss": 0.3148, + "step": 1192 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002970653870697201, + "loss": 0.2744, + "step": 1193 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029705969366422016, + "loss": 0.3491, + "step": 1194 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002970539947958838, + "loss": 0.1166, + "step": 1195 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002970482904649228, + "loss": 0.3173, + "step": 1196 + }, + { + "epoch": 0.36, + "learning_rate": 0.000297042580671549, + "loss": 0.3236, + "step": 1197 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029703686541597454, + "loss": 0.3401, + "step": 1198 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029703114469841174, + "loss": 0.3872, + "step": 1199 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029702541851907305, + "loss": 0.2197, + "step": 1200 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029701968687817124, + "loss": 0.2567, + "step": 1201 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029701394977591915, + "loss": 0.2395, + "step": 1202 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029700820721253, + "loss": 0.2249, + "step": 1203 + }, + { + "epoch": 0.37, + "learning_rate": 0.000297002459188217, + "loss": 0.2791, + "step": 1204 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029699670570319374, + "loss": 0.2251, + "step": 1205 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029699094675767395, + "loss": 0.2566, + "step": 1206 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002969851823518715, + "loss": 0.2271, + "step": 1207 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029697941248600055, + "loss": 0.2225, + "step": 1208 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029697363716027546, + "loss": 0.071, + "step": 1209 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029696785637491074, + "loss": 0.1807, + "step": 1210 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002969620701301211, + "loss": 0.3086, + "step": 1211 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002969562784261216, + "loss": 0.4453, + "step": 1212 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029695048126312726, + "loss": 0.2389, + "step": 1213 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029694467864135345, + "loss": 0.4797, + "step": 1214 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029693887056101575, + "loss": 0.1506, + "step": 1215 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029693305702232993, + "loss": 0.1264, + "step": 1216 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002969272380255119, + "loss": 0.2647, + "step": 1217 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002969214135707778, + "loss": 0.1784, + "step": 1218 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029691558365834407, + "loss": 0.3888, + "step": 1219 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029690974828842725, + "loss": 0.4533, + "step": 1220 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029690390746124406, + "loss": 0.1062, + "step": 1221 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002968980611770115, + "loss": 0.4002, + "step": 1222 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029689220943594676, + "loss": 0.3731, + "step": 1223 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002968863522382672, + "loss": 0.1414, + "step": 1224 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029688048958419037, + "loss": 0.3628, + "step": 1225 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029687462147393414, + "loss": 0.4786, + "step": 1226 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002968687479077163, + "loss": 0.2193, + "step": 1227 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002968628688857553, + "loss": 0.2831, + "step": 1228 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029685698440826933, + "loss": 0.2398, + "step": 1229 + }, + { + "epoch": 0.37, + "learning_rate": 0.000296851094475477, + "loss": 0.2529, + "step": 1230 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002968451990875972, + "loss": 0.4112, + "step": 1231 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002968392982448489, + "loss": 0.1895, + "step": 1232 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029683339194745126, + "loss": 0.429, + "step": 1233 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029682748019562366, + "loss": 0.4093, + "step": 1234 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029682156298958573, + "loss": 0.121, + "step": 1235 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029681564032955736, + "loss": 0.1331, + "step": 1236 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002968097122157584, + "loss": 0.3481, + "step": 1237 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029680377864840926, + "loss": 0.4508, + "step": 1238 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002967978396277301, + "loss": 0.3433, + "step": 1239 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002967918951539418, + "loss": 0.1888, + "step": 1240 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029678594522726493, + "loss": 0.3134, + "step": 1241 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029677998984792075, + "loss": 0.188, + "step": 1242 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029677402901613035, + "loss": 0.2568, + "step": 1243 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029676806273211517, + "loss": 0.1592, + "step": 1244 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002967620909960969, + "loss": 0.4471, + "step": 1245 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002967561138082972, + "loss": 0.2207, + "step": 1246 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002967501311689383, + "loss": 0.1948, + "step": 1247 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029674414307824244, + "loss": 0.2478, + "step": 1248 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002967381495364319, + "loss": 0.126, + "step": 1249 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029673215054372946, + "loss": 0.204, + "step": 1250 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029672614610035786, + "loss": 0.4296, + "step": 1251 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029672013620654023, + "loss": 0.0881, + "step": 1252 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029671412086249986, + "loss": 0.3226, + "step": 1253 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029670810006846005, + "loss": 0.4581, + "step": 1254 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029670207382464456, + "loss": 0.426, + "step": 1255 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029669604213127725, + "loss": 0.2199, + "step": 1256 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029669000498858215, + "loss": 0.3364, + "step": 1257 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029668396239678353, + "loss": 0.0943, + "step": 1258 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002966779143561058, + "loss": 0.2012, + "step": 1259 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002966718608667738, + "loss": 0.2538, + "step": 1260 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002966658019290122, + "loss": 0.2524, + "step": 1261 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002966597375430461, + "loss": 0.2027, + "step": 1262 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002966536677091009, + "loss": 0.3939, + "step": 1263 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029664759242740204, + "loss": 0.2256, + "step": 1264 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002966415116981751, + "loss": 0.2954, + "step": 1265 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029663542552164603, + "loss": 0.1404, + "step": 1266 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002966293338980409, + "loss": 0.3788, + "step": 1267 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029662323682758606, + "loss": 0.4256, + "step": 1268 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029661713431050786, + "loss": 0.3142, + "step": 1269 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002966110263470331, + "loss": 0.1264, + "step": 1270 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029660491293738867, + "loss": 0.239, + "step": 1271 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029659879408180164, + "loss": 0.2729, + "step": 1272 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002965926697804993, + "loss": 0.3574, + "step": 1273 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002965865400337091, + "loss": 0.2101, + "step": 1274 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002965804048416589, + "loss": 0.2988, + "step": 1275 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029657426420457637, + "loss": 0.1564, + "step": 1276 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002965681181226899, + "loss": 0.179, + "step": 1277 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002965619665962276, + "loss": 0.2958, + "step": 1278 + }, + { + "epoch": 0.39, + "learning_rate": 0.000296555809625418, + "loss": 0.2905, + "step": 1279 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002965496472104898, + "loss": 0.3293, + "step": 1280 + }, + { + "epoch": 0.39, + "learning_rate": 0.000296543479351672, + "loss": 0.3049, + "step": 1281 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029653730604919373, + "loss": 0.4284, + "step": 1282 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029653112730328423, + "loss": 0.4334, + "step": 1283 + }, + { + "epoch": 0.39, + "learning_rate": 0.000296524943114173, + "loss": 0.4971, + "step": 1284 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002965187534820899, + "loss": 0.1467, + "step": 1285 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002965125584072647, + "loss": 0.2174, + "step": 1286 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002965063578899276, + "loss": 0.3223, + "step": 1287 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029650015193030893, + "loss": 0.3447, + "step": 1288 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029649394052863923, + "loss": 0.1574, + "step": 1289 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002964877236851493, + "loss": 0.264, + "step": 1290 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002964815014000699, + "loss": 0.452, + "step": 1291 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029647527367363235, + "loss": 0.1628, + "step": 1292 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002964690405060679, + "loss": 0.4697, + "step": 1293 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002964628018976081, + "loss": 0.293, + "step": 1294 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029645655784848475, + "loss": 0.2422, + "step": 1295 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029645030835892967, + "loss": 0.2475, + "step": 1296 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002964440534291752, + "loss": 0.3514, + "step": 1297 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029643779305945354, + "loss": 0.5646, + "step": 1298 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029643152724999724, + "loss": 0.4192, + "step": 1299 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029642525600103917, + "loss": 0.3455, + "step": 1300 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002964189793128122, + "loss": 0.3523, + "step": 1301 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002964126971855496, + "loss": 0.3214, + "step": 1302 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002964064096194845, + "loss": 0.1744, + "step": 1303 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029640011661485075, + "loss": 0.2651, + "step": 1304 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029639381817188196, + "loss": 0.1123, + "step": 1305 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963875142908121, + "loss": 0.2294, + "step": 1306 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963812049718753, + "loss": 0.1689, + "step": 1307 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963748902153061, + "loss": 0.3738, + "step": 1308 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963685700213389, + "loss": 0.1853, + "step": 1309 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963622443902086, + "loss": 0.2779, + "step": 1310 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963559133221501, + "loss": 0.3452, + "step": 1311 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963495768173986, + "loss": 0.1872, + "step": 1312 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963432348761895, + "loss": 0.4343, + "step": 1313 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963368874987583, + "loss": 0.2362, + "step": 1314 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029633053468534096, + "loss": 0.179, + "step": 1315 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029632417643617333, + "loss": 0.274, + "step": 1316 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963178127514916, + "loss": 0.2406, + "step": 1317 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029631144363153223, + "loss": 0.2242, + "step": 1318 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002963050690765318, + "loss": 0.2934, + "step": 1319 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002962986890867271, + "loss": 0.2068, + "step": 1320 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029629230366235506, + "loss": 0.2605, + "step": 1321 + }, + { + "epoch": 0.4, + "learning_rate": 0.000296285912803653, + "loss": 0.2402, + "step": 1322 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002962795165108582, + "loss": 0.2616, + "step": 1323 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002962731147842084, + "loss": 0.2272, + "step": 1324 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029626670762394127, + "loss": 0.2791, + "step": 1325 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029626029503029486, + "loss": 0.3122, + "step": 1326 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002962538770035074, + "loss": 0.2994, + "step": 1327 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029624745354381733, + "loss": 0.2191, + "step": 1328 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029624102465146326, + "loss": 0.2085, + "step": 1329 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002962345903266839, + "loss": 0.2751, + "step": 1330 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029622815056971837, + "loss": 0.2281, + "step": 1331 + }, + { + "epoch": 0.4, + "learning_rate": 0.00029622170538080584, + "loss": 0.2943, + "step": 1332 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002962152547601858, + "loss": 0.3107, + "step": 1333 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029620879870809777, + "loss": 0.2572, + "step": 1334 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029620233722478165, + "loss": 0.2104, + "step": 1335 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002961958703104774, + "loss": 0.3423, + "step": 1336 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029618939796542524, + "loss": 0.1747, + "step": 1337 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029618292018986574, + "loss": 0.1385, + "step": 1338 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002961764369840394, + "loss": 0.2965, + "step": 1339 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002961699483481871, + "loss": 0.1608, + "step": 1340 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029616345428254984, + "loss": 0.1914, + "step": 1341 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029615695478736885, + "loss": 0.4196, + "step": 1342 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002961504498628856, + "loss": 0.3067, + "step": 1343 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029614393950934176, + "loss": 0.1065, + "step": 1344 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002961374237269791, + "loss": 0.3306, + "step": 1345 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002961309025160397, + "loss": 0.0247, + "step": 1346 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002961243758767658, + "loss": 0.3364, + "step": 1347 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029611784380939983, + "loss": 0.2108, + "step": 1348 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029611130631418446, + "loss": 0.2044, + "step": 1349 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029610476339136254, + "loss": 0.3439, + "step": 1350 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029609821504117704, + "loss": 0.2419, + "step": 1351 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029609166126387136, + "loss": 0.1019, + "step": 1352 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029608510205968885, + "loss": 0.332, + "step": 1353 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002960785374288732, + "loss": 0.2942, + "step": 1354 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029607196737166825, + "loss": 0.2483, + "step": 1355 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029606539188831805, + "loss": 0.1645, + "step": 1356 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002960588109790669, + "loss": 0.2791, + "step": 1357 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002960522246441592, + "loss": 0.2799, + "step": 1358 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029604563288383967, + "loss": 0.2376, + "step": 1359 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029603903569835313, + "loss": 0.1061, + "step": 1360 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029603243308794474, + "loss": 0.1928, + "step": 1361 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029602582505285964, + "loss": 0.3585, + "step": 1362 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002960192115933434, + "loss": 0.178, + "step": 1363 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002960125927096416, + "loss": 0.4452, + "step": 1364 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029600596840200015, + "loss": 0.3869, + "step": 1365 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029599933867066516, + "loss": 0.2023, + "step": 1366 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002959927035158829, + "loss": 0.3678, + "step": 1367 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002959860629378998, + "loss": 0.1359, + "step": 1368 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029597941693696253, + "loss": 0.1157, + "step": 1369 + }, + { + "epoch": 0.42, + "learning_rate": 0.000295972765513318, + "loss": 0.2145, + "step": 1370 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029596610866721335, + "loss": 0.1947, + "step": 1371 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002959594463988958, + "loss": 0.215, + "step": 1372 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002959527787086128, + "loss": 0.3155, + "step": 1373 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029594610559661213, + "loss": 0.1856, + "step": 1374 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002959394270631415, + "loss": 0.2009, + "step": 1375 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002959327431084493, + "loss": 0.2801, + "step": 1376 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002959260537327835, + "loss": 0.4042, + "step": 1377 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029591935893639277, + "loss": 0.378, + "step": 1378 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029591265871952577, + "loss": 0.4197, + "step": 1379 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029590595308243134, + "loss": 0.1953, + "step": 1380 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029589924202535865, + "loss": 0.2923, + "step": 1381 + }, + { + "epoch": 0.42, + "learning_rate": 0.000295892525548557, + "loss": 0.1972, + "step": 1382 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029588580365227577, + "loss": 0.2984, + "step": 1383 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002958790763367648, + "loss": 0.1803, + "step": 1384 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029587234360227394, + "loss": 0.3282, + "step": 1385 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002958656054490533, + "loss": 0.296, + "step": 1386 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002958588618773531, + "loss": 0.309, + "step": 1387 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002958521128874239, + "loss": 0.2411, + "step": 1388 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029584535847951645, + "loss": 0.2656, + "step": 1389 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002958385986538816, + "loss": 0.1958, + "step": 1390 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029583183341077055, + "loss": 0.21, + "step": 1391 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002958250627504345, + "loss": 0.175, + "step": 1392 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029581828667312496, + "loss": 0.2099, + "step": 1393 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002958115051790937, + "loss": 0.0958, + "step": 1394 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002958047182685926, + "loss": 0.3187, + "step": 1395 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002957979259418738, + "loss": 0.1397, + "step": 1396 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002957911281991896, + "loss": 0.2258, + "step": 1397 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029578432504079257, + "loss": 0.3243, + "step": 1398 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002957775164669353, + "loss": 0.3058, + "step": 1399 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002957707024778708, + "loss": 0.3556, + "step": 1400 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029576388307385215, + "loss": 0.1081, + "step": 1401 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002957570582551327, + "loss": 0.1427, + "step": 1402 + }, + { + "epoch": 0.43, + "learning_rate": 0.000295750228021966, + "loss": 0.3631, + "step": 1403 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002957433923746057, + "loss": 0.219, + "step": 1404 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002957365513133058, + "loss": 0.3122, + "step": 1405 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029572970483832036, + "loss": 0.3059, + "step": 1406 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029572285294990374, + "loss": 0.1331, + "step": 1407 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002957159956483105, + "loss": 0.3305, + "step": 1408 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002957091329337953, + "loss": 0.0518, + "step": 1409 + }, + { + "epoch": 0.43, + "learning_rate": 0.000295702264806613, + "loss": 0.3776, + "step": 1410 + }, + { + "epoch": 0.43, + "learning_rate": 0.000295695391267019, + "loss": 0.2985, + "step": 1411 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956885123152684, + "loss": 0.2693, + "step": 1412 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029568162795161676, + "loss": 0.1771, + "step": 1413 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956747381763199, + "loss": 0.259, + "step": 1414 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956678429896336, + "loss": 0.392, + "step": 1415 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956609423918142, + "loss": 0.365, + "step": 1416 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029565403638311794, + "loss": 0.3014, + "step": 1417 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956471249638013, + "loss": 0.362, + "step": 1418 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956402081341211, + "loss": 0.2639, + "step": 1419 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029563328589433426, + "loss": 0.1552, + "step": 1420 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956263582446979, + "loss": 0.4239, + "step": 1421 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956194251854694, + "loss": 0.2124, + "step": 1422 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956124867169063, + "loss": 0.3118, + "step": 1423 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002956055428392663, + "loss": 0.3133, + "step": 1424 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002955985935528073, + "loss": 0.3846, + "step": 1425 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029559163885778763, + "loss": 0.3456, + "step": 1426 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029558467875446544, + "loss": 0.3449, + "step": 1427 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029557771324309945, + "loss": 0.3846, + "step": 1428 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002955707423239482, + "loss": 0.4196, + "step": 1429 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002955637659972708, + "loss": 0.297, + "step": 1430 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029555678426332636, + "loss": 0.2481, + "step": 1431 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002955497971223742, + "loss": 0.2012, + "step": 1432 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029554280457467396, + "loss": 0.2302, + "step": 1433 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002955358066204853, + "loss": 0.3291, + "step": 1434 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002955288032600682, + "loss": 0.1786, + "step": 1435 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002955217944936828, + "loss": 0.2285, + "step": 1436 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002955147803215895, + "loss": 0.3216, + "step": 1437 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002955077607440488, + "loss": 0.1524, + "step": 1438 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002955007357613215, + "loss": 0.3723, + "step": 1439 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029549370537366857, + "loss": 0.192, + "step": 1440 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029548666958135116, + "loss": 0.2998, + "step": 1441 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029547962838463054, + "loss": 0.1838, + "step": 1442 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029547258178376837, + "loss": 0.191, + "step": 1443 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029546552977902644, + "loss": 0.2441, + "step": 1444 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002954584723706666, + "loss": 0.3127, + "step": 1445 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029545140955895106, + "loss": 0.3181, + "step": 1446 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002954443413441422, + "loss": 0.2749, + "step": 1447 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029543726772650255, + "loss": 0.3765, + "step": 1448 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029543018870629485, + "loss": 0.4265, + "step": 1449 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002954231042837822, + "loss": 0.2235, + "step": 1450 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029541601445922766, + "loss": 0.3498, + "step": 1451 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002954089192328946, + "loss": 0.2881, + "step": 1452 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002954018186050466, + "loss": 0.3153, + "step": 1453 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002953947125759474, + "loss": 0.2107, + "step": 1454 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029538760114586105, + "loss": 0.4518, + "step": 1455 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029538048431505164, + "loss": 0.1184, + "step": 1456 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002953733620837835, + "loss": 0.1483, + "step": 1457 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029536623445232136, + "loss": 0.1925, + "step": 1458 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002953591014209298, + "loss": 0.3289, + "step": 1459 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029535196298987394, + "loss": 0.2754, + "step": 1460 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029534481915941886, + "loss": 0.3737, + "step": 1461 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029533766992983, + "loss": 0.2797, + "step": 1462 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002953305153013729, + "loss": 0.3239, + "step": 1463 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029532335527431334, + "loss": 0.4888, + "step": 1464 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002953161898489173, + "loss": 0.2427, + "step": 1465 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002953090190254509, + "loss": 0.3419, + "step": 1466 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002953018428041806, + "loss": 0.4325, + "step": 1467 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002952946611853729, + "loss": 0.2913, + "step": 1468 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029528747416929463, + "loss": 0.332, + "step": 1469 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029528028175621274, + "loss": 0.2977, + "step": 1470 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002952730839463944, + "loss": 0.2402, + "step": 1471 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029526588074010705, + "loss": 0.2589, + "step": 1472 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002952586721376182, + "loss": 0.2131, + "step": 1473 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029525145813919564, + "loss": 0.297, + "step": 1474 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029524423874510735, + "loss": 0.4237, + "step": 1475 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002952370139556215, + "loss": 0.2751, + "step": 1476 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029522978377100647, + "loss": 0.0939, + "step": 1477 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002952225481915309, + "loss": 0.1014, + "step": 1478 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029521530721746353, + "loss": 0.1495, + "step": 1479 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002952080608490733, + "loss": 0.1863, + "step": 1480 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002952008090866295, + "loss": 0.432, + "step": 1481 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029519355193040136, + "loss": 0.284, + "step": 1482 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029518628938065856, + "loss": 0.2404, + "step": 1483 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002951790214376709, + "loss": 0.2782, + "step": 1484 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002951717481017083, + "loss": 0.4125, + "step": 1485 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029516446937304097, + "loss": 0.2794, + "step": 1486 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029515718525193927, + "loss": 0.1577, + "step": 1487 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029514989573867386, + "loss": 0.2785, + "step": 1488 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029514260083351546, + "loss": 0.2661, + "step": 1489 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029513530053673506, + "loss": 0.3852, + "step": 1490 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029512799484860384, + "loss": 0.164, + "step": 1491 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002951206837693932, + "loss": 0.363, + "step": 1492 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029511336729937473, + "loss": 0.2935, + "step": 1493 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002951060454388202, + "loss": 0.4491, + "step": 1494 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029509871818800157, + "loss": 0.2144, + "step": 1495 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002950913855471911, + "loss": 0.2352, + "step": 1496 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029508404751666116, + "loss": 0.4346, + "step": 1497 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029507670409668424, + "loss": 0.359, + "step": 1498 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029506935528753324, + "loss": 0.3834, + "step": 1499 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002950620010894811, + "loss": 0.35, + "step": 1500 + }, + { + "epoch": 0.46, + "learning_rate": 0.000295054641502801, + "loss": 0.384, + "step": 1501 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029504727652776634, + "loss": 0.4987, + "step": 1502 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029503990616465067, + "loss": 0.0797, + "step": 1503 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002950325304137279, + "loss": 0.174, + "step": 1504 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002950251492752718, + "loss": 0.0468, + "step": 1505 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029501776274955683, + "loss": 0.2689, + "step": 1506 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029501037083685716, + "loss": 0.206, + "step": 1507 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002950029735374475, + "loss": 0.2429, + "step": 1508 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029499557085160247, + "loss": 0.2554, + "step": 1509 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002949881627795973, + "loss": 0.2931, + "step": 1510 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029498074932170703, + "loss": 0.2803, + "step": 1511 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029497333047820705, + "loss": 0.2717, + "step": 1512 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029496590624937297, + "loss": 0.2346, + "step": 1513 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002949584766354806, + "loss": 0.2544, + "step": 1514 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029495104163680593, + "loss": 0.202, + "step": 1515 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002949436012536251, + "loss": 0.3041, + "step": 1516 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029493615548621454, + "loss": 0.1258, + "step": 1517 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002949287043348508, + "loss": 0.3768, + "step": 1518 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029492124779981075, + "loss": 0.4258, + "step": 1519 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029491378588137126, + "loss": 0.2839, + "step": 1520 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002949063185798096, + "loss": 0.1417, + "step": 1521 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029489884589540313, + "loss": 0.1888, + "step": 1522 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002948913678284295, + "loss": 0.0993, + "step": 1523 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002948838843791664, + "loss": 0.2891, + "step": 1524 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029487639554789187, + "loss": 0.1256, + "step": 1525 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002948689013348841, + "loss": 0.361, + "step": 1526 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029486140174042147, + "loss": 0.2919, + "step": 1527 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002948538967647825, + "loss": 0.2055, + "step": 1528 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029484638640824614, + "loss": 0.2445, + "step": 1529 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002948388706710913, + "loss": 0.2786, + "step": 1530 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029483134955359707, + "loss": 0.1127, + "step": 1531 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029482382305604296, + "loss": 0.4242, + "step": 1532 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002948162911787085, + "loss": 0.3489, + "step": 1533 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029480875392187347, + "loss": 0.1562, + "step": 1534 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002948012112858179, + "loss": 0.3839, + "step": 1535 + }, + { + "epoch": 0.47, + "learning_rate": 0.000294793663270822, + "loss": 0.2368, + "step": 1536 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002947861098771661, + "loss": 0.3223, + "step": 1537 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029477855110513076, + "loss": 0.246, + "step": 1538 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002947709869549968, + "loss": 0.1221, + "step": 1539 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002947634174270452, + "loss": 0.2034, + "step": 1540 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029475584252155725, + "loss": 0.2863, + "step": 1541 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002947482622388142, + "loss": 0.4638, + "step": 1542 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029474067657909764, + "loss": 0.269, + "step": 1543 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029473308554268945, + "loss": 0.3791, + "step": 1544 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002947254891298715, + "loss": 0.1553, + "step": 1545 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029471788734092606, + "loss": 0.2044, + "step": 1546 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002947102801761355, + "loss": 0.3578, + "step": 1547 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029470266763578236, + "loss": 0.2478, + "step": 1548 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002946950497201495, + "loss": 0.2923, + "step": 1549 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029468742642951986, + "loss": 0.3726, + "step": 1550 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029467979776417655, + "loss": 0.5185, + "step": 1551 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029467216372440307, + "loss": 0.3456, + "step": 1552 + }, + { + "epoch": 0.47, + "learning_rate": 0.000294664524310483, + "loss": 0.328, + "step": 1553 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002946568795227, + "loss": 0.3352, + "step": 1554 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029464922936133815, + "loss": 0.388, + "step": 1555 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002946415738266816, + "loss": 0.2405, + "step": 1556 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002946339129190148, + "loss": 0.3925, + "step": 1557 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002946262466386222, + "loss": 0.2795, + "step": 1558 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029461857498578865, + "loss": 0.2927, + "step": 1559 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029461089796079914, + "loss": 0.2759, + "step": 1560 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002946032155639389, + "loss": 0.4036, + "step": 1561 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029459552779549316, + "loss": 0.103, + "step": 1562 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002945878346557477, + "loss": 0.3789, + "step": 1563 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029458013614498804, + "loss": 0.3293, + "step": 1564 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002945724322635004, + "loss": 0.2893, + "step": 1565 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002945647230115708, + "loss": 0.3492, + "step": 1566 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029455700838948575, + "loss": 0.0982, + "step": 1567 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002945492883975317, + "loss": 0.4754, + "step": 1568 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029454156303599543, + "loss": 0.2101, + "step": 1569 + }, + { + "epoch": 0.48, + "learning_rate": 0.000294533832305164, + "loss": 0.2007, + "step": 1570 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002945260962053245, + "loss": 0.302, + "step": 1571 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029451835473676445, + "loss": 0.2931, + "step": 1572 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002945106078997712, + "loss": 0.1154, + "step": 1573 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029450285569463275, + "loss": 0.3683, + "step": 1574 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002944950981216369, + "loss": 0.2305, + "step": 1575 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002944873351810719, + "loss": 0.1063, + "step": 1576 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002944795668732261, + "loss": 0.0388, + "step": 1577 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029447179319838803, + "loss": 0.2466, + "step": 1578 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002944640141568465, + "loss": 0.1594, + "step": 1579 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029445622974889055, + "loss": 0.3182, + "step": 1580 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002944484399748093, + "loss": 0.3085, + "step": 1581 + }, + { + "epoch": 0.48, + "learning_rate": 0.000294440644834892, + "loss": 0.3983, + "step": 1582 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002944328443294284, + "loss": 0.3938, + "step": 1583 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029442503845870813, + "loss": 0.1276, + "step": 1584 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002944172272230212, + "loss": 0.2155, + "step": 1585 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002944094106226578, + "loss": 0.4954, + "step": 1586 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029440158865790824, + "loss": 0.2417, + "step": 1587 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029439376132906316, + "loss": 0.3916, + "step": 1588 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029438592863641324, + "loss": 0.3094, + "step": 1589 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002943780905802495, + "loss": 0.3086, + "step": 1590 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002943702471608631, + "loss": 0.2039, + "step": 1591 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002943623983785453, + "loss": 0.3783, + "step": 1592 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002943545442335878, + "loss": 0.18, + "step": 1593 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002943466847262823, + "loss": 0.2965, + "step": 1594 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002943388198569208, + "loss": 0.3282, + "step": 1595 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029433094962579535, + "loss": 0.2572, + "step": 1596 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002943230740331984, + "loss": 0.2609, + "step": 1597 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002943151930794224, + "loss": 0.2669, + "step": 1598 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029430730676476024, + "loss": 0.2585, + "step": 1599 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029429941508950485, + "loss": 0.18, + "step": 1600 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029429151805394926, + "loss": 0.2206, + "step": 1601 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029428361565838696, + "loss": 0.3159, + "step": 1602 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029427570790311144, + "loss": 0.341, + "step": 1603 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002942677947884164, + "loss": 0.2141, + "step": 1604 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029425987631459587, + "loss": 0.2033, + "step": 1605 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029425195248194405, + "loss": 0.2822, + "step": 1606 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029424402329075516, + "loss": 0.1612, + "step": 1607 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029423608874132377, + "loss": 0.3472, + "step": 1608 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002942281488339447, + "loss": 0.1743, + "step": 1609 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002942202035689128, + "loss": 0.2686, + "step": 1610 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002942122529465233, + "loss": 0.1995, + "step": 1611 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029420429696707143, + "loss": 0.446, + "step": 1612 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002941963356308529, + "loss": 0.1584, + "step": 1613 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002941883689381633, + "loss": 0.2495, + "step": 1614 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029418039688929867, + "loss": 0.1637, + "step": 1615 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029417241948455504, + "loss": 0.1845, + "step": 1616 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002941644367242288, + "loss": 0.2457, + "step": 1617 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002941564486086165, + "loss": 0.2406, + "step": 1618 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029414845513801494, + "loss": 0.3969, + "step": 1619 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002941404563127209, + "loss": 0.4207, + "step": 1620 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029413245213303165, + "loss": 0.3595, + "step": 1621 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029412444259924446, + "loss": 0.1899, + "step": 1622 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029411642771165686, + "loss": 0.374, + "step": 1623 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002941084074705666, + "loss": 0.2549, + "step": 1624 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002941003818762716, + "loss": 0.3674, + "step": 1625 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029409235092906996, + "loss": 0.1308, + "step": 1626 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029408431462926005, + "loss": 0.3371, + "step": 1627 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029407627297714034, + "loss": 0.1599, + "step": 1628 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029406822597300965, + "loss": 0.3132, + "step": 1629 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002940601736171668, + "loss": 0.3197, + "step": 1630 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029405211590991095, + "loss": 0.1463, + "step": 1631 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002940440528515414, + "loss": 0.1995, + "step": 1632 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029403598444235776, + "loss": 0.1711, + "step": 1633 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029402791068265964, + "loss": 0.2138, + "step": 1634 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029401983157274704, + "loss": 0.3376, + "step": 1635 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029401174711292, + "loss": 0.1361, + "step": 1636 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029400365730347883, + "loss": 0.1845, + "step": 1637 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029399556214472413, + "loss": 0.3565, + "step": 1638 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029398746163695653, + "loss": 0.4043, + "step": 1639 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029397935578047695, + "loss": 0.3053, + "step": 1640 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002939712445755866, + "loss": 0.3048, + "step": 1641 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029396312802258657, + "loss": 0.2179, + "step": 1642 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029395500612177863, + "loss": 0.3258, + "step": 1643 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002939468788734643, + "loss": 0.1433, + "step": 1644 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002939387462779455, + "loss": 0.2505, + "step": 1645 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029393060833552445, + "loss": 0.3189, + "step": 1646 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002939224650465034, + "loss": 0.2461, + "step": 1647 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029391431641118473, + "loss": 0.3148, + "step": 1648 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002939061624298713, + "loss": 0.3786, + "step": 1649 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029389800310286596, + "loss": 0.2087, + "step": 1650 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002938898384304717, + "loss": 0.2936, + "step": 1651 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002938816684129919, + "loss": 0.1656, + "step": 1652 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029387349305073007, + "loss": 0.0771, + "step": 1653 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002938653123439899, + "loss": 0.134, + "step": 1654 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029385712629307524, + "loss": 0.3098, + "step": 1655 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002938489348982902, + "loss": 0.2677, + "step": 1656 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029384073815993906, + "loss": 0.1637, + "step": 1657 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002938325360783263, + "loss": 0.348, + "step": 1658 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029382432865375665, + "loss": 0.2089, + "step": 1659 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029381611588653487, + "loss": 0.1811, + "step": 1660 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002938078977769662, + "loss": 0.3811, + "step": 1661 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029379967432535583, + "loss": 0.4252, + "step": 1662 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029379144553200924, + "loss": 0.2892, + "step": 1663 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029378321139723207, + "loss": 0.2829, + "step": 1664 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002937749719213302, + "loss": 0.4034, + "step": 1665 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002937667271046099, + "loss": 0.1469, + "step": 1666 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002937584769473772, + "loss": 0.1851, + "step": 1667 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002937502214499386, + "loss": 0.2249, + "step": 1668 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002937419606126009, + "loss": 0.3667, + "step": 1669 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029373369443567077, + "loss": 0.2854, + "step": 1670 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002937254229194555, + "loss": 0.2905, + "step": 1671 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002937171460642622, + "loss": 0.2118, + "step": 1672 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029370886387039834, + "loss": 0.1213, + "step": 1673 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029370057633817167, + "loss": 0.3235, + "step": 1674 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002936922834678899, + "loss": 0.1727, + "step": 1675 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029368398525986126, + "loss": 0.1682, + "step": 1676 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002936756817143939, + "loss": 0.4145, + "step": 1677 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002936673728317963, + "loss": 0.3664, + "step": 1678 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029365905861237704, + "loss": 0.3116, + "step": 1679 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029365073905644507, + "loss": 0.2281, + "step": 1680 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002936424141643094, + "loss": 0.2421, + "step": 1681 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029363408393627923, + "loss": 0.1655, + "step": 1682 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029362574837266407, + "loss": 0.0844, + "step": 1683 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029361740747377356, + "loss": 0.2977, + "step": 1684 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029360906123991756, + "loss": 0.2071, + "step": 1685 + }, + { + "epoch": 0.51, + "learning_rate": 0.000293600709671406, + "loss": 0.3733, + "step": 1686 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029359235276854914, + "loss": 0.2254, + "step": 1687 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029358399053165755, + "loss": 0.3213, + "step": 1688 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029357562296104166, + "loss": 0.2065, + "step": 1689 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002935672500570125, + "loss": 0.3372, + "step": 1690 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029355887181988096, + "loss": 0.2746, + "step": 1691 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002935504882499584, + "loss": 0.2462, + "step": 1692 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029354209934755605, + "loss": 0.3108, + "step": 1693 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002935337051129857, + "loss": 0.2581, + "step": 1694 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029352530554655913, + "loss": 0.1682, + "step": 1695 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002935169006485883, + "loss": 0.2696, + "step": 1696 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029350849041938544, + "loss": 0.1737, + "step": 1697 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002935000748592631, + "loss": 0.264, + "step": 1698 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002934916539685337, + "loss": 0.1505, + "step": 1699 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002934832277475102, + "loss": 0.2411, + "step": 1700 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029347479619650554, + "loss": 0.2302, + "step": 1701 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002934663593158329, + "loss": 0.2335, + "step": 1702 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002934579171058057, + "loss": 0.043, + "step": 1703 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029344946956673764, + "loss": 0.4307, + "step": 1704 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002934410166989424, + "loss": 0.3111, + "step": 1705 + }, + { + "epoch": 0.52, + "learning_rate": 0.000293432558502734, + "loss": 0.3435, + "step": 1706 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029342409497842674, + "loss": 0.2399, + "step": 1707 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002934156261263349, + "loss": 0.2959, + "step": 1708 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002934071519467731, + "loss": 0.2863, + "step": 1709 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002933986724400562, + "loss": 0.354, + "step": 1710 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002933901876064991, + "loss": 0.2712, + "step": 1711 + }, + { + "epoch": 0.52, + "learning_rate": 0.000293381697446417, + "loss": 0.2795, + "step": 1712 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002933732019601253, + "loss": 0.4045, + "step": 1713 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002933647011479396, + "loss": 0.1364, + "step": 1714 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002933561950101757, + "loss": 0.3542, + "step": 1715 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002933476835471495, + "loss": 0.1299, + "step": 1716 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029333916675917726, + "loss": 0.193, + "step": 1717 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002933306446465753, + "loss": 0.2628, + "step": 1718 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029332211720966016, + "loss": 0.2151, + "step": 1719 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029331358444874864, + "loss": 0.2057, + "step": 1720 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002933050463641578, + "loss": 0.295, + "step": 1721 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002932965029562047, + "loss": 0.2486, + "step": 1722 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002932879542252067, + "loss": 0.2549, + "step": 1723 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002932794001714814, + "loss": 0.1619, + "step": 1724 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002932708407953466, + "loss": 0.289, + "step": 1725 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002932622760971202, + "loss": 0.256, + "step": 1726 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029325370607712027, + "loss": 0.3175, + "step": 1727 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029324513073566527, + "loss": 0.1346, + "step": 1728 + }, + { + "epoch": 0.52, + "learning_rate": 0.00029323655007307373, + "loss": 0.1361, + "step": 1729 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029322796408966445, + "loss": 0.2314, + "step": 1730 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029321937278575626, + "loss": 0.2486, + "step": 1731 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002932107761616684, + "loss": 0.3394, + "step": 1732 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002932021742177201, + "loss": 0.4331, + "step": 1733 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029319356695423104, + "loss": 0.4352, + "step": 1734 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002931849543715208, + "loss": 0.3157, + "step": 1735 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002931763364699095, + "loss": 0.373, + "step": 1736 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029316771324971704, + "loss": 0.1119, + "step": 1737 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029315908471126397, + "loss": 0.2445, + "step": 1738 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002931504508548707, + "loss": 0.3022, + "step": 1739 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002931418116808579, + "loss": 0.3943, + "step": 1740 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002931331671895466, + "loss": 0.1757, + "step": 1741 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029312451738125787, + "loss": 0.222, + "step": 1742 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029311586225631305, + "loss": 0.1426, + "step": 1743 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002931072018150336, + "loss": 0.2361, + "step": 1744 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029309853605774126, + "loss": 0.2634, + "step": 1745 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029308986498475793, + "loss": 0.4707, + "step": 1746 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002930811885964058, + "loss": 0.0675, + "step": 1747 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029307250689300697, + "loss": 0.3394, + "step": 1748 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029306381987488417, + "loss": 0.361, + "step": 1749 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029305512754235994, + "loss": 0.1413, + "step": 1750 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002930464298957572, + "loss": 0.3305, + "step": 1751 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029303772693539914, + "loss": 0.1752, + "step": 1752 + }, + { + "epoch": 0.53, + "learning_rate": 0.000293029018661609, + "loss": 0.2549, + "step": 1753 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002930203050747102, + "loss": 0.1887, + "step": 1754 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002930115861750264, + "loss": 0.1983, + "step": 1755 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029300286196288166, + "loss": 0.2689, + "step": 1756 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002929941324385999, + "loss": 0.2104, + "step": 1757 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029298539760250545, + "loss": 0.3662, + "step": 1758 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002929766574549228, + "loss": 0.5166, + "step": 1759 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002929679119961766, + "loss": 0.268, + "step": 1760 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002929591612265917, + "loss": 0.1503, + "step": 1761 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029295040514649317, + "loss": 0.4117, + "step": 1762 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002929416437562063, + "loss": 0.2704, + "step": 1763 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029293287705605656, + "loss": 0.3453, + "step": 1764 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029292410504636957, + "loss": 0.3973, + "step": 1765 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002929153277274712, + "loss": 0.2841, + "step": 1766 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002929065450996875, + "loss": 0.2332, + "step": 1767 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002928977571633447, + "loss": 0.1952, + "step": 1768 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002928889639187693, + "loss": 0.2351, + "step": 1769 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002928801653662879, + "loss": 0.267, + "step": 1770 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029287136150622735, + "loss": 0.2546, + "step": 1771 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029286255233891464, + "loss": 0.2763, + "step": 1772 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002928537378646771, + "loss": 0.3231, + "step": 1773 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002928449180838421, + "loss": 0.0946, + "step": 1774 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002928360929967373, + "loss": 0.2822, + "step": 1775 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002928272626036905, + "loss": 0.3929, + "step": 1776 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002928184269050297, + "loss": 0.1814, + "step": 1777 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029280958590108323, + "loss": 0.3385, + "step": 1778 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029280073959217934, + "loss": 0.066, + "step": 1779 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002927918879786468, + "loss": 0.2809, + "step": 1780 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002927830310608143, + "loss": 0.0748, + "step": 1781 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029277416883901095, + "loss": 0.1194, + "step": 1782 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029276530131356585, + "loss": 0.179, + "step": 1783 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029275642848480853, + "loss": 0.2202, + "step": 1784 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029274755035306845, + "loss": 0.1528, + "step": 1785 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002927386669186756, + "loss": 0.2398, + "step": 1786 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002927297781819597, + "loss": 0.1214, + "step": 1787 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002927208841432512, + "loss": 0.2905, + "step": 1788 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002927119848028804, + "loss": 0.2248, + "step": 1789 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002927030801611778, + "loss": 0.2592, + "step": 1790 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002926941702184742, + "loss": 0.3307, + "step": 1791 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002926852549751007, + "loss": 0.4024, + "step": 1792 + }, + { + "epoch": 0.54, + "learning_rate": 0.00029267633443138836, + "loss": 0.2335, + "step": 1793 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002926674085876686, + "loss": 0.3413, + "step": 1794 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029265847744427303, + "loss": 0.3006, + "step": 1795 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002926495410015333, + "loss": 0.3663, + "step": 1796 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029264059925978144, + "loss": 0.1595, + "step": 1797 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029263165221934967, + "loss": 0.2729, + "step": 1798 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029262269988057025, + "loss": 0.2582, + "step": 1799 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002926137422437757, + "loss": 0.2758, + "step": 1800 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002926047793092989, + "loss": 0.285, + "step": 1801 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029259581107747275, + "loss": 0.2293, + "step": 1802 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002925868375486304, + "loss": 0.2636, + "step": 1803 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002925778587231051, + "loss": 0.2176, + "step": 1804 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029256887460123046, + "loss": 0.2073, + "step": 1805 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029255988518334027, + "loss": 0.3329, + "step": 1806 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002925508904697683, + "loss": 0.3212, + "step": 1807 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002925418904608489, + "loss": 0.1044, + "step": 1808 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029253288515691616, + "loss": 0.3573, + "step": 1809 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002925238745583048, + "loss": 0.0903, + "step": 1810 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002925148586653494, + "loss": 0.187, + "step": 1811 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029250583747838485, + "loss": 0.261, + "step": 1812 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029249681099774644, + "loss": 0.2489, + "step": 1813 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029248777922376935, + "loss": 0.4501, + "step": 1814 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002924787421567891, + "loss": 0.2202, + "step": 1815 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029246969979714136, + "loss": 0.2226, + "step": 1816 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029246065214516207, + "loss": 0.2908, + "step": 1817 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029245159920118736, + "loss": 0.2637, + "step": 1818 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029244254096555346, + "loss": 0.3328, + "step": 1819 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029243347743859683, + "loss": 0.2955, + "step": 1820 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002924244086206542, + "loss": 0.2782, + "step": 1821 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002924153345120625, + "loss": 0.3605, + "step": 1822 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002924062551131587, + "loss": 0.3518, + "step": 1823 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029239717042428017, + "loss": 0.0498, + "step": 1824 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029238808044576434, + "loss": 0.368, + "step": 1825 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002923789851779488, + "loss": 0.1863, + "step": 1826 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029236988462117157, + "loss": 0.1469, + "step": 1827 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029236077877577057, + "loss": 0.2805, + "step": 1828 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029235166764208413, + "loss": 0.1966, + "step": 1829 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029234255122045065, + "loss": 0.3393, + "step": 1830 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029233342951120886, + "loss": 0.47, + "step": 1831 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002923243025146975, + "loss": 0.1976, + "step": 1832 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029231517023125566, + "loss": 0.312, + "step": 1833 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002923060326612226, + "loss": 0.2675, + "step": 1834 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002922968898049378, + "loss": 0.1861, + "step": 1835 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029228774166274073, + "loss": 0.246, + "step": 1836 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029227858823497135, + "loss": 0.287, + "step": 1837 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029226942952196964, + "loss": 0.2968, + "step": 1838 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029226026552407584, + "loss": 0.3254, + "step": 1839 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002922510962416303, + "loss": 0.2417, + "step": 1840 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029224192167497375, + "loss": 0.216, + "step": 1841 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029223274182444694, + "loss": 0.2091, + "step": 1842 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002922235566903908, + "loss": 0.2453, + "step": 1843 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002922143662731466, + "loss": 0.438, + "step": 1844 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029220517057305576, + "loss": 0.1645, + "step": 1845 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029219596959045987, + "loss": 0.2165, + "step": 1846 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029218676332570066, + "loss": 0.3568, + "step": 1847 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002921775517791202, + "loss": 0.1404, + "step": 1848 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002921683349510606, + "loss": 0.151, + "step": 1849 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002921591128418642, + "loss": 0.2076, + "step": 1850 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029214988545187367, + "loss": 0.2121, + "step": 1851 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002921406527814318, + "loss": 0.2642, + "step": 1852 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029213141483088145, + "loss": 0.1925, + "step": 1853 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029212217160056586, + "loss": 0.1627, + "step": 1854 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029211292309082836, + "loss": 0.1025, + "step": 1855 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029210366930201246, + "loss": 0.2079, + "step": 1856 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029209441023446203, + "loss": 0.3006, + "step": 1857 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029208514588852097, + "loss": 0.2754, + "step": 1858 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002920758762645333, + "loss": 0.0574, + "step": 1859 + }, + { + "epoch": 0.56, + "learning_rate": 0.00029206660136284353, + "loss": 0.2271, + "step": 1860 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002920573211837961, + "loss": 0.119, + "step": 1861 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029204803572773576, + "loss": 0.2214, + "step": 1862 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029203874499500747, + "loss": 0.3398, + "step": 1863 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029202944898595634, + "loss": 0.1581, + "step": 1864 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002920201477009277, + "loss": 0.3694, + "step": 1865 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029201084114026697, + "loss": 0.3016, + "step": 1866 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029200152930431995, + "loss": 0.4154, + "step": 1867 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029199221219343256, + "loss": 0.2293, + "step": 1868 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002919828898079509, + "loss": 0.1542, + "step": 1869 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002919735621482212, + "loss": 0.3505, + "step": 1870 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029196422921459, + "loss": 0.2958, + "step": 1871 + }, + { + "epoch": 0.57, + "learning_rate": 0.000291954891007404, + "loss": 0.2408, + "step": 1872 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002919455475270101, + "loss": 0.2094, + "step": 1873 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029193619877375534, + "loss": 0.181, + "step": 1874 + }, + { + "epoch": 0.57, + "learning_rate": 0.000291926844747987, + "loss": 0.3006, + "step": 1875 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002919174854500526, + "loss": 0.1731, + "step": 1876 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002919081208802998, + "loss": 0.3105, + "step": 1877 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029189875103907643, + "loss": 0.1065, + "step": 1878 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002918893759267306, + "loss": 0.127, + "step": 1879 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029187999554361054, + "loss": 0.1069, + "step": 1880 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002918706098900647, + "loss": 0.1772, + "step": 1881 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029186121896644163, + "loss": 0.1517, + "step": 1882 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002918518227730904, + "loss": 0.2422, + "step": 1883 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002918424213103599, + "loss": 0.1662, + "step": 1884 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002918330145785994, + "loss": 0.1921, + "step": 1885 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029182360257815835, + "loss": 0.1529, + "step": 1886 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002918141853093863, + "loss": 0.253, + "step": 1887 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002918047627726332, + "loss": 0.1485, + "step": 1888 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029179533496824896, + "loss": 0.041, + "step": 1889 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029178590189658386, + "loss": 0.1449, + "step": 1890 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029177646355798827, + "loss": 0.3484, + "step": 1891 + }, + { + "epoch": 0.57, + "learning_rate": 0.00029176701995281285, + "loss": 0.2447, + "step": 1892 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002917575710814083, + "loss": 0.1739, + "step": 1893 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002917481169441257, + "loss": 0.3498, + "step": 1894 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002917386575413163, + "loss": 0.2539, + "step": 1895 + }, + { + "epoch": 0.58, + "learning_rate": 0.00029172919287333136, + "loss": 0.3209, + "step": 1896 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002917197229405225, + "loss": 0.4159, + "step": 1897 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002917102477432416, + "loss": 0.3969, + "step": 1898 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002917007672818405, + "loss": 0.3017, + "step": 1899 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002916912815566715, + "loss": 0.1974, + "step": 1900 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002916817905680868, + "loss": 0.1618, + "step": 1901 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002916722943164391, + "loss": 0.0767, + "step": 1902 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002916627928020811, + "loss": 0.3935, + "step": 1903 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002916532860253658, + "loss": 0.188, + "step": 1904 + }, + { + "epoch": 0.58, + "learning_rate": 0.00029164377398664627, + "loss": 0.1811, + "step": 1905 + }, + { + "epoch": 0.58, + "learning_rate": 0.00029163425668627595, + "loss": 0.1736, + "step": 1906 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002916247341246083, + "loss": 0.2487, + "step": 1907 + }, + { + "epoch": 0.58, + "learning_rate": 0.00029161520630199714, + "loss": 0.115, + "step": 1908 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002916056732187963, + "loss": 0.2255, + "step": 1909 + }, + { + "epoch": 0.58, + "learning_rate": 0.00029159613487536, + "loss": 0.2397, + "step": 1910 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002915865912720425, + "loss": 0.31, + "step": 1911 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002915770424091983, + "loss": 0.1988, + "step": 1912 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002915674882871822, + "loss": 0.4858, + "step": 1913 + }, + { + "epoch": 0.58, + "learning_rate": 0.000291557928906349, + "loss": 0.4887, + "step": 1914 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002915483642670539, + "loss": 0.2873, + "step": 1915 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002915387943696521, + "loss": 0.0864, + "step": 1916 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002915292192144992, + "loss": 0.1039, + "step": 1917 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002915196388019508, + "loss": 0.4565, + "step": 1918 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002915100531323628, + "loss": 0.1106, + "step": 1919 + }, + { + "epoch": 0.58, + "learning_rate": 0.00029150046220609136, + "loss": 0.1282, + "step": 1920 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002914908660234927, + "loss": 0.194, + "step": 1921 + }, + { + "epoch": 0.58, + "learning_rate": 0.00029148126458492326, + "loss": 0.3163, + "step": 1922 + }, + { + "epoch": 0.58, + "learning_rate": 0.00029147165789073973, + "loss": 0.3475, + "step": 1923 + }, + { + "epoch": 0.58, + "learning_rate": 0.000291462045941299, + "loss": 0.2512, + "step": 1924 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002914524287369581, + "loss": 0.2025, + "step": 1925 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002914428062780742, + "loss": 0.111, + "step": 1926 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029143317856500486, + "loss": 0.1849, + "step": 1927 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029142354559810767, + "loss": 0.334, + "step": 1928 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029141390737774057, + "loss": 0.3098, + "step": 1929 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029140426390426144, + "loss": 0.261, + "step": 1930 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002913946151780286, + "loss": 0.2775, + "step": 1931 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002913849611994004, + "loss": 0.2843, + "step": 1932 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002913753019687355, + "loss": 0.3453, + "step": 1933 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029136563748639275, + "loss": 0.2209, + "step": 1934 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029135596775273106, + "loss": 0.2948, + "step": 1935 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002913462927681098, + "loss": 0.1759, + "step": 1936 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029133661253288813, + "loss": 0.2421, + "step": 1937 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029132692704742587, + "loss": 0.4065, + "step": 1938 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002913172363120826, + "loss": 0.2618, + "step": 1939 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002913075403272185, + "loss": 0.233, + "step": 1940 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002912978390931936, + "loss": 0.311, + "step": 1941 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002912881326103684, + "loss": 0.1311, + "step": 1942 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029127842087910334, + "loss": 0.3583, + "step": 1943 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029126870389975924, + "loss": 0.1296, + "step": 1944 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029125898167269715, + "loss": 0.1133, + "step": 1945 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029124925419827803, + "loss": 0.3254, + "step": 1946 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029123952147686337, + "loss": 0.4281, + "step": 1947 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029122978350881463, + "loss": 0.1678, + "step": 1948 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002912200402944936, + "loss": 0.0841, + "step": 1949 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029121029183426224, + "loss": 0.1999, + "step": 1950 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029120053812848256, + "loss": 0.1661, + "step": 1951 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029119077917751703, + "loss": 0.1401, + "step": 1952 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002911810149817281, + "loss": 0.2997, + "step": 1953 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002911712455414784, + "loss": 0.1919, + "step": 1954 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002911614708571309, + "loss": 0.0866, + "step": 1955 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002911516909290488, + "loss": 0.266, + "step": 1956 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002911419057575952, + "loss": 0.2658, + "step": 1957 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002911321153431338, + "loss": 0.268, + "step": 1958 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029112231968602813, + "loss": 0.2653, + "step": 1959 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029111251878664217, + "loss": 0.3122, + "step": 1960 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002911027126453398, + "loss": 0.2765, + "step": 1961 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002910929012624856, + "loss": 0.2144, + "step": 1962 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002910830846384438, + "loss": 0.3085, + "step": 1963 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029107326277357914, + "loss": 0.3016, + "step": 1964 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029106343566825645, + "loss": 0.3617, + "step": 1965 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002910536033228408, + "loss": 0.3215, + "step": 1966 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002910437657376974, + "loss": 0.2536, + "step": 1967 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029103392291319173, + "loss": 0.3661, + "step": 1968 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029102407484968946, + "loss": 0.2418, + "step": 1969 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002910142215475563, + "loss": 0.2747, + "step": 1970 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029100436300715833, + "loss": 0.2469, + "step": 1971 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029099449922886174, + "loss": 0.2915, + "step": 1972 + }, + { + "epoch": 0.6, + "learning_rate": 0.000290984630213033, + "loss": 0.3442, + "step": 1973 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002909747559600387, + "loss": 0.3356, + "step": 1974 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029096487647024556, + "loss": 0.1776, + "step": 1975 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002909549917440207, + "loss": 0.2703, + "step": 1976 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002909451017817312, + "loss": 0.3472, + "step": 1977 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002909352065837445, + "loss": 0.3789, + "step": 1978 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002909253061504282, + "loss": 0.4946, + "step": 1979 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029091540048215, + "loss": 0.0983, + "step": 1980 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002909054895792779, + "loss": 0.3091, + "step": 1981 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002908955734421801, + "loss": 0.2296, + "step": 1982 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002908856520712249, + "loss": 0.2374, + "step": 1983 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002908757254667808, + "loss": 0.2646, + "step": 1984 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029086579362921667, + "loss": 0.4312, + "step": 1985 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002908558565589014, + "loss": 0.2981, + "step": 1986 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029084591425620413, + "loss": 0.3897, + "step": 1987 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002908359667214941, + "loss": 0.3564, + "step": 1988 + }, + { + "epoch": 0.6, + "learning_rate": 0.000290826013955141, + "loss": 0.3288, + "step": 1989 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002908160559575144, + "loss": 0.1348, + "step": 1990 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029080609272898433, + "loss": 0.3561, + "step": 1991 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002907961242699207, + "loss": 0.0464, + "step": 1992 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029078615058069404, + "loss": 0.36, + "step": 1993 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002907761716616747, + "loss": 0.2961, + "step": 1994 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002907661875132334, + "loss": 0.3437, + "step": 1995 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029075619813574103, + "loss": 0.297, + "step": 1996 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002907462035295686, + "loss": 0.1672, + "step": 1997 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002907362036950876, + "loss": 0.2305, + "step": 1998 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002907261986326692, + "loss": 0.185, + "step": 1999 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029071618834268525, + "loss": 0.135, + "step": 2000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029070617282550754, + "loss": 0.3249, + "step": 2001 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029069615208150815, + "loss": 0.2952, + "step": 2002 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002906861261110593, + "loss": 0.2556, + "step": 2003 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029067609491453334, + "loss": 0.3139, + "step": 2004 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002906660584923031, + "loss": 0.1327, + "step": 2005 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002906560168447412, + "loss": 0.4226, + "step": 2006 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002906459699722208, + "loss": 0.1889, + "step": 2007 + }, + { + "epoch": 0.61, + "learning_rate": 0.000290635917875115, + "loss": 0.0995, + "step": 2008 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002906258605537973, + "loss": 0.2354, + "step": 2009 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002906157980086413, + "loss": 0.3073, + "step": 2010 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002906057302400207, + "loss": 0.5341, + "step": 2011 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029059565724830953, + "loss": 0.1667, + "step": 2012 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029058557903388204, + "loss": 0.4069, + "step": 2013 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002905754955971125, + "loss": 0.1942, + "step": 2014 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002905654069383756, + "loss": 0.2601, + "step": 2015 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029055531305804597, + "loss": 0.3224, + "step": 2016 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029054521395649864, + "loss": 0.2658, + "step": 2017 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002905351096341088, + "loss": 0.2879, + "step": 2018 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029052500009125174, + "loss": 0.2601, + "step": 2019 + }, + { + "epoch": 0.61, + "learning_rate": 0.000290514885328303, + "loss": 0.3574, + "step": 2020 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002905047653456383, + "loss": 0.2322, + "step": 2021 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029049464014363366, + "loss": 0.4208, + "step": 2022 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002904845097226651, + "loss": 0.2584, + "step": 2023 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029047437408310893, + "loss": 0.2094, + "step": 2024 + }, + { + "epoch": 0.61, + "learning_rate": 0.00029046423322534175, + "loss": 0.1953, + "step": 2025 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002904540871497402, + "loss": 0.1669, + "step": 2026 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029044393585668125, + "loss": 0.2679, + "step": 2027 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002904337793465419, + "loss": 0.2149, + "step": 2028 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002904236176196994, + "loss": 0.2087, + "step": 2029 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029041345067653136, + "loss": 0.279, + "step": 2030 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029040327851741535, + "loss": 0.2369, + "step": 2031 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002903931011427293, + "loss": 0.2415, + "step": 2032 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029038291855285124, + "loss": 0.2846, + "step": 2033 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029037273074815936, + "loss": 0.1907, + "step": 2034 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029036253772903226, + "loss": 0.2177, + "step": 2035 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029035233949584844, + "loss": 0.234, + "step": 2036 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002903421360489868, + "loss": 0.2223, + "step": 2037 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002903319273888264, + "loss": 0.1903, + "step": 2038 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002903217135157464, + "loss": 0.1627, + "step": 2039 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029031149443012615, + "loss": 0.3138, + "step": 2040 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029030127013234545, + "loss": 0.1485, + "step": 2041 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002902910406227839, + "loss": 0.3189, + "step": 2042 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029028080590182164, + "loss": 0.3512, + "step": 2043 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029027056596983884, + "loss": 0.407, + "step": 2044 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029026032082721584, + "loss": 0.3548, + "step": 2045 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002902500704743332, + "loss": 0.2699, + "step": 2046 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029023981491157173, + "loss": 0.2564, + "step": 2047 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029022955413931243, + "loss": 0.3234, + "step": 2048 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029021928815793635, + "loss": 0.3827, + "step": 2049 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002902090169678249, + "loss": 0.2275, + "step": 2050 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002901987405693597, + "loss": 0.4564, + "step": 2051 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002901884589629224, + "loss": 0.2218, + "step": 2052 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002901781721488949, + "loss": 0.2228, + "step": 2053 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002901678801276594, + "loss": 0.1578, + "step": 2054 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029015758289959825, + "loss": 0.1601, + "step": 2055 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002901472804650938, + "loss": 0.2123, + "step": 2056 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002901369728245289, + "loss": 0.1674, + "step": 2057 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029012665997828647, + "loss": 0.1568, + "step": 2058 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029011634192674943, + "loss": 0.2364, + "step": 2059 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002901060186703013, + "loss": 0.3734, + "step": 2060 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029009569020932533, + "loss": 0.152, + "step": 2061 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029008535654420533, + "loss": 0.2337, + "step": 2062 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002900750176753251, + "loss": 0.2528, + "step": 2063 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002900646736030688, + "loss": 0.1789, + "step": 2064 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029005432432782055, + "loss": 0.3341, + "step": 2065 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002900439698499649, + "loss": 0.2544, + "step": 2066 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002900336101698864, + "loss": 0.2642, + "step": 2067 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029002324528797, + "loss": 0.196, + "step": 2068 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029001287520460055, + "loss": 0.2847, + "step": 2069 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002900024999201634, + "loss": 0.2101, + "step": 2070 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028999211943504403, + "loss": 0.156, + "step": 2071 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002899817337496278, + "loss": 0.3996, + "step": 2072 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002899713428643008, + "loss": 0.3294, + "step": 2073 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002899609467794488, + "loss": 0.2909, + "step": 2074 + }, + { + "epoch": 0.63, + "learning_rate": 0.000289950545495458, + "loss": 0.3412, + "step": 2075 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028994013901271495, + "loss": 0.3636, + "step": 2076 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028992972733160604, + "loss": 0.2887, + "step": 2077 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002899193104525181, + "loss": 0.1099, + "step": 2078 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028990888837583813, + "loss": 0.2923, + "step": 2079 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028989846110195325, + "loss": 0.3496, + "step": 2080 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002898880286312507, + "loss": 0.2362, + "step": 2081 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028987759096411815, + "loss": 0.2818, + "step": 2082 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002898671481009433, + "loss": 0.2228, + "step": 2083 + }, + { + "epoch": 0.63, + "learning_rate": 0.000289856700042114, + "loss": 0.2404, + "step": 2084 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002898462467880185, + "loss": 0.249, + "step": 2085 + }, + { + "epoch": 0.63, + "learning_rate": 0.000289835788339045, + "loss": 0.3787, + "step": 2086 + }, + { + "epoch": 0.63, + "learning_rate": 0.000289825324695582, + "loss": 0.3229, + "step": 2087 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028981485585801825, + "loss": 0.2444, + "step": 2088 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002898043818267426, + "loss": 0.3079, + "step": 2089 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028979390260214417, + "loss": 0.2206, + "step": 2090 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002897834181846121, + "loss": 0.4328, + "step": 2091 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028977292857453605, + "loss": 0.2989, + "step": 2092 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002897624337723055, + "loss": 0.2382, + "step": 2093 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028975193377831046, + "loss": 0.1452, + "step": 2094 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002897414285929409, + "loss": 0.3865, + "step": 2095 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028973091821658705, + "loss": 0.2085, + "step": 2096 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028972040264963936, + "loss": 0.2957, + "step": 2097 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028970988189248837, + "loss": 0.3706, + "step": 2098 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028969935594552504, + "loss": 0.0666, + "step": 2099 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002896888248091402, + "loss": 0.1943, + "step": 2100 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028967828848372527, + "loss": 0.3739, + "step": 2101 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028966774696967146, + "loss": 0.2828, + "step": 2102 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028965720026737047, + "loss": 0.2575, + "step": 2103 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028964664837721396, + "loss": 0.233, + "step": 2104 + }, + { + "epoch": 0.64, + "learning_rate": 0.000289636091299594, + "loss": 0.2298, + "step": 2105 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028962552903490275, + "loss": 0.2106, + "step": 2106 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002896149615835325, + "loss": 0.2942, + "step": 2107 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028960438894587594, + "loss": 0.241, + "step": 2108 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028959381112232565, + "loss": 0.2932, + "step": 2109 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028958322811327463, + "loss": 0.1218, + "step": 2110 + }, + { + "epoch": 0.64, + "learning_rate": 0.000289572639919116, + "loss": 0.2971, + "step": 2111 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002895620465402431, + "loss": 0.3583, + "step": 2112 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002895514479770495, + "loss": 0.2912, + "step": 2113 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028954084422992877, + "loss": 0.2658, + "step": 2114 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002895302352992749, + "loss": 0.2315, + "step": 2115 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002895196211854819, + "loss": 0.2584, + "step": 2116 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028950900188894416, + "loss": 0.1992, + "step": 2117 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002894983774100561, + "loss": 0.3014, + "step": 2118 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002894877477492124, + "loss": 0.3449, + "step": 2119 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002894771129068079, + "loss": 0.1688, + "step": 2120 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028946647288323766, + "loss": 0.3424, + "step": 2121 + }, + { + "epoch": 0.64, + "learning_rate": 0.000289455827678897, + "loss": 0.0806, + "step": 2122 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002894451772941812, + "loss": 0.2518, + "step": 2123 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028943452172948604, + "loss": 0.2594, + "step": 2124 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002894238609852073, + "loss": 0.2004, + "step": 2125 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028941319506174093, + "loss": 0.1579, + "step": 2126 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028940252395948324, + "loss": 0.2112, + "step": 2127 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028939184767883057, + "loss": 0.4063, + "step": 2128 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002893811662201795, + "loss": 0.1858, + "step": 2129 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028937047958392683, + "loss": 0.196, + "step": 2130 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002893597877704696, + "loss": 0.0839, + "step": 2131 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028934909078020486, + "loss": 0.222, + "step": 2132 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028933838861353007, + "loss": 0.1792, + "step": 2133 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002893276812708427, + "loss": 0.2623, + "step": 2134 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002893169687525406, + "loss": 0.2668, + "step": 2135 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002893062510590217, + "loss": 0.3443, + "step": 2136 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028929552819068397, + "loss": 0.1931, + "step": 2137 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002892848001479259, + "loss": 0.2402, + "step": 2138 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028927406693114596, + "loss": 0.2735, + "step": 2139 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028926332854074284, + "loss": 0.2428, + "step": 2140 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028925258497711546, + "loss": 0.1069, + "step": 2141 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002892418362406629, + "loss": 0.1721, + "step": 2142 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002892310823317845, + "loss": 0.1699, + "step": 2143 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028922032325087965, + "loss": 0.2209, + "step": 2144 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028920955899834795, + "loss": 0.1692, + "step": 2145 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028919878957458947, + "loss": 0.0921, + "step": 2146 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002891880149800041, + "loss": 0.2417, + "step": 2147 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028917723521499223, + "loss": 0.188, + "step": 2148 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028916645027995415, + "loss": 0.2479, + "step": 2149 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002891556601752905, + "loss": 0.185, + "step": 2150 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002891448649014022, + "loss": 0.2642, + "step": 2151 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002891340644586902, + "loss": 0.0475, + "step": 2152 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028912325884755566, + "loss": 0.2792, + "step": 2153 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002891124480684001, + "loss": 0.2301, + "step": 2154 + }, + { + "epoch": 0.65, + "learning_rate": 0.000289101632121625, + "loss": 0.1342, + "step": 2155 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028909081100763224, + "loss": 0.3833, + "step": 2156 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028907998472682366, + "loss": 0.2393, + "step": 2157 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002890691532796015, + "loss": 0.2558, + "step": 2158 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028905831666636817, + "loss": 0.2953, + "step": 2159 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028904747488752607, + "loss": 0.1786, + "step": 2160 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002890366279434781, + "loss": 0.2859, + "step": 2161 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002890257758346271, + "loss": 0.4017, + "step": 2162 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028901491856137625, + "loss": 0.2644, + "step": 2163 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028900405612412877, + "loss": 0.2269, + "step": 2164 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028899318852328826, + "loss": 0.189, + "step": 2165 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002889823157592584, + "loss": 0.3139, + "step": 2166 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002889714378324431, + "loss": 0.2782, + "step": 2167 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002889605547432464, + "loss": 0.2221, + "step": 2168 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028894966649207257, + "loss": 0.286, + "step": 2169 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028893877307932606, + "loss": 0.234, + "step": 2170 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002889278745054117, + "loss": 0.3447, + "step": 2171 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028891697077073405, + "loss": 0.3069, + "step": 2172 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002889060618756984, + "loss": 0.4429, + "step": 2173 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028889514782070983, + "loss": 0.1042, + "step": 2174 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028888422860617386, + "loss": 0.3361, + "step": 2175 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028887330423249606, + "loss": 0.023, + "step": 2176 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028886237470008224, + "loss": 0.1892, + "step": 2177 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028885144000933844, + "loss": 0.207, + "step": 2178 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002888405001606708, + "loss": 0.2621, + "step": 2179 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002888295551544857, + "loss": 0.28, + "step": 2180 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002888186049911898, + "loss": 0.2465, + "step": 2181 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028880764967118976, + "loss": 0.4876, + "step": 2182 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002887966891948926, + "loss": 0.2097, + "step": 2183 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028878572356270547, + "loss": 0.3362, + "step": 2184 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028877475277503566, + "loss": 0.2765, + "step": 2185 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002887637768322908, + "loss": 0.1307, + "step": 2186 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002887527957348785, + "loss": 0.3246, + "step": 2187 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002887418094832068, + "loss": 0.2496, + "step": 2188 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002887308180776836, + "loss": 0.1594, + "step": 2189 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028871982151871747, + "loss": 0.3247, + "step": 2190 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028870881980671664, + "loss": 0.5083, + "step": 2191 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028869781294209003, + "loss": 0.2161, + "step": 2192 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028868680092524634, + "loss": 0.1117, + "step": 2193 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002886757837565947, + "loss": 0.2273, + "step": 2194 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028866476143654435, + "loss": 0.4372, + "step": 2195 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028865373396550474, + "loss": 0.3961, + "step": 2196 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002886427013438855, + "loss": 0.2009, + "step": 2197 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002886316635720965, + "loss": 0.3717, + "step": 2198 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028862062065054773, + "loss": 0.3502, + "step": 2199 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002886095725796494, + "loss": 0.1152, + "step": 2200 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028859851935981193, + "loss": 0.2331, + "step": 2201 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028858746099144584, + "loss": 0.1267, + "step": 2202 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028857639747496204, + "loss": 0.2809, + "step": 2203 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028856532881077143, + "loss": 0.5856, + "step": 2204 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002885542549992852, + "loss": 0.3441, + "step": 2205 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002885431760409147, + "loss": 0.2123, + "step": 2206 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002885320919360715, + "loss": 0.3094, + "step": 2207 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028852100268516727, + "loss": 0.2092, + "step": 2208 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002885099082886141, + "loss": 0.1889, + "step": 2209 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002884988087468239, + "loss": 0.2273, + "step": 2210 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028848770406020915, + "loss": 0.0473, + "step": 2211 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002884765942291823, + "loss": 0.2593, + "step": 2212 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002884654792541561, + "loss": 0.3744, + "step": 2213 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028845435913554335, + "loss": 0.2797, + "step": 2214 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028844323387375716, + "loss": 0.3436, + "step": 2215 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002884321034692109, + "loss": 0.2315, + "step": 2216 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002884209679223178, + "loss": 0.2713, + "step": 2217 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028840982723349177, + "loss": 0.3557, + "step": 2218 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028839868140314646, + "loss": 0.1741, + "step": 2219 + }, + { + "epoch": 0.67, + "learning_rate": 0.00028838753043169605, + "loss": 0.1037, + "step": 2220 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002883763743195547, + "loss": 0.3386, + "step": 2221 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002883652130671368, + "loss": 0.122, + "step": 2222 + }, + { + "epoch": 0.67, + "learning_rate": 0.000288354046674857, + "loss": 0.0992, + "step": 2223 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002883428751431301, + "loss": 0.1162, + "step": 2224 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002883316984723711, + "loss": 0.393, + "step": 2225 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028832051666299505, + "loss": 0.1881, + "step": 2226 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002883093297154175, + "loss": 0.4579, + "step": 2227 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028829813763005393, + "loss": 0.3657, + "step": 2228 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002882869404073201, + "loss": 0.1946, + "step": 2229 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028827573804763196, + "loss": 0.2025, + "step": 2230 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002882645305514057, + "loss": 0.252, + "step": 2231 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002882533179190575, + "loss": 0.2377, + "step": 2232 + }, + { + "epoch": 0.68, + "learning_rate": 0.000288242100151004, + "loss": 0.1884, + "step": 2233 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002882308772476619, + "loss": 0.3037, + "step": 2234 + }, + { + "epoch": 0.68, + "learning_rate": 0.000288219649209448, + "loss": 0.2095, + "step": 2235 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028820841603677955, + "loss": 0.2796, + "step": 2236 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002881971777300736, + "loss": 0.4928, + "step": 2237 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002881859342897479, + "loss": 0.2571, + "step": 2238 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002881746857162199, + "loss": 0.2727, + "step": 2239 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028816343200990753, + "loss": 0.1297, + "step": 2240 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028815217317122884, + "loss": 0.1976, + "step": 2241 + }, + { + "epoch": 0.68, + "learning_rate": 0.000288140909200602, + "loss": 0.3672, + "step": 2242 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028812964009844547, + "loss": 0.2956, + "step": 2243 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028811836586517797, + "loss": 0.0831, + "step": 2244 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028810708650121815, + "loss": 0.222, + "step": 2245 + }, + { + "epoch": 0.68, + "learning_rate": 0.000288095802006985, + "loss": 0.3752, + "step": 2246 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002880845123828978, + "loss": 0.1161, + "step": 2247 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028807321762937596, + "loss": 0.3533, + "step": 2248 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002880619177468389, + "loss": 0.1835, + "step": 2249 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028805061273570656, + "loss": 0.3007, + "step": 2250 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002880393025963987, + "loss": 0.2778, + "step": 2251 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002880279873293356, + "loss": 0.3493, + "step": 2252 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028801666693493744, + "loss": 0.352, + "step": 2253 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002880053414136249, + "loss": 0.1785, + "step": 2254 + }, + { + "epoch": 0.68, + "learning_rate": 0.00028799401076581865, + "loss": 0.3449, + "step": 2255 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002879826749919395, + "loss": 0.2181, + "step": 2256 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028797133409240867, + "loss": 0.3418, + "step": 2257 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028795998806764735, + "loss": 0.1986, + "step": 2258 + }, + { + "epoch": 0.69, + "learning_rate": 0.000287948636918077, + "loss": 0.3454, + "step": 2259 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028793728064411937, + "loss": 0.1458, + "step": 2260 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002879259192461962, + "loss": 0.2994, + "step": 2261 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028791455272472965, + "loss": 0.1364, + "step": 2262 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028790318108014193, + "loss": 0.2571, + "step": 2263 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002878918043128554, + "loss": 0.39, + "step": 2264 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028788042242329265, + "loss": 0.4226, + "step": 2265 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002878690354118766, + "loss": 0.2179, + "step": 2266 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002878576432790301, + "loss": 0.3487, + "step": 2267 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028784624602517654, + "loss": 0.0784, + "step": 2268 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028783484365073905, + "loss": 0.2921, + "step": 2269 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002878234361561414, + "loss": 0.2718, + "step": 2270 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028781202354180726, + "loss": 0.0946, + "step": 2271 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028780060580816053, + "loss": 0.2097, + "step": 2272 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002877891829556254, + "loss": 0.1387, + "step": 2273 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002877777549846262, + "loss": 0.1482, + "step": 2274 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028776632189558745, + "loss": 0.3785, + "step": 2275 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002877548836889338, + "loss": 0.2298, + "step": 2276 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028774344036509023, + "loss": 0.4382, + "step": 2277 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028773199192448173, + "loss": 0.3168, + "step": 2278 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002877205383675337, + "loss": 0.1655, + "step": 2279 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028770907969467147, + "loss": 0.2669, + "step": 2280 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028769761590632074, + "loss": 0.1088, + "step": 2281 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002876861470029074, + "loss": 0.2163, + "step": 2282 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002876746729848575, + "loss": 0.2921, + "step": 2283 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028766319385259713, + "loss": 0.268, + "step": 2284 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028765170960655287, + "loss": 0.1042, + "step": 2285 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002876402202471513, + "loss": 0.4105, + "step": 2286 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002876287257748191, + "loss": 0.286, + "step": 2287 + }, + { + "epoch": 0.69, + "learning_rate": 0.00028761722618998335, + "loss": 0.2007, + "step": 2288 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002876057214930712, + "loss": 0.1585, + "step": 2289 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028759421168451005, + "loss": 0.2759, + "step": 2290 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002875826967647274, + "loss": 0.1763, + "step": 2291 + }, + { + "epoch": 0.7, + "learning_rate": 0.000287571176734151, + "loss": 0.3029, + "step": 2292 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028755965159320886, + "loss": 0.1643, + "step": 2293 + }, + { + "epoch": 0.7, + "learning_rate": 0.000287548121342329, + "loss": 0.2311, + "step": 2294 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028753658598193986, + "loss": 0.2428, + "step": 2295 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028752504551246983, + "loss": 0.3139, + "step": 2296 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028751349993434765, + "loss": 0.3878, + "step": 2297 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002875019492480022, + "loss": 0.1835, + "step": 2298 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002874903934538626, + "loss": 0.3283, + "step": 2299 + }, + { + "epoch": 0.7, + "learning_rate": 0.000287478832552358, + "loss": 0.2655, + "step": 2300 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002874672665439179, + "loss": 0.1229, + "step": 2301 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028745569542897205, + "loss": 0.321, + "step": 2302 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028744411920795024, + "loss": 0.2315, + "step": 2303 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028743253788128233, + "loss": 0.3216, + "step": 2304 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028742095144939874, + "loss": 0.2566, + "step": 2305 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002874093599127298, + "loss": 0.1839, + "step": 2306 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028739776327170604, + "loss": 0.3802, + "step": 2307 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028738616152675827, + "loss": 0.4218, + "step": 2308 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028737455467831756, + "loss": 0.2013, + "step": 2309 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002873629427268149, + "loss": 0.128, + "step": 2310 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002873513256726818, + "loss": 0.1239, + "step": 2311 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002873397035163497, + "loss": 0.4136, + "step": 2312 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028732807625825036, + "loss": 0.3143, + "step": 2313 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002873164438988157, + "loss": 0.2253, + "step": 2314 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002873048064384778, + "loss": 0.6165, + "step": 2315 + }, + { + "epoch": 0.7, + "learning_rate": 0.000287293163877669, + "loss": 0.2949, + "step": 2316 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002872815162168218, + "loss": 0.2973, + "step": 2317 + }, + { + "epoch": 0.7, + "learning_rate": 0.00028726986345636884, + "loss": 0.4118, + "step": 2318 + }, + { + "epoch": 0.7, + "learning_rate": 0.000287258205596743, + "loss": 0.3028, + "step": 2319 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002872465426383773, + "loss": 0.3699, + "step": 2320 + }, + { + "epoch": 0.7, + "learning_rate": 0.000287234874581705, + "loss": 0.2402, + "step": 2321 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028722320142715954, + "loss": 0.1611, + "step": 2322 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002872115231751746, + "loss": 0.3623, + "step": 2323 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028719983982618394, + "loss": 0.4318, + "step": 2324 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028718815138062154, + "loss": 0.411, + "step": 2325 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028717645783892166, + "loss": 0.2506, + "step": 2326 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002871647592015186, + "loss": 0.1508, + "step": 2327 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028715305546884697, + "loss": 0.26, + "step": 2328 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028714134664134153, + "loss": 0.2502, + "step": 2329 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028712963271943724, + "loss": 0.3457, + "step": 2330 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028711791370356917, + "loss": 0.3628, + "step": 2331 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002871061895941728, + "loss": 0.2857, + "step": 2332 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002870944603916835, + "loss": 0.2297, + "step": 2333 + }, + { + "epoch": 0.71, + "learning_rate": 0.000287082726096537, + "loss": 0.2671, + "step": 2334 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002870709867091692, + "loss": 0.2219, + "step": 2335 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028705924223001625, + "loss": 0.4238, + "step": 2336 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028704749265951433, + "loss": 0.2338, + "step": 2337 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002870357379981, + "loss": 0.2755, + "step": 2338 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002870239782462098, + "loss": 0.166, + "step": 2339 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002870122134042807, + "loss": 0.3466, + "step": 2340 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002870004434727496, + "loss": 0.1339, + "step": 2341 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028698866845205376, + "loss": 0.3264, + "step": 2342 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002869768883426306, + "loss": 0.1759, + "step": 2343 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028696510314491777, + "loss": 0.1876, + "step": 2344 + }, + { + "epoch": 0.71, + "learning_rate": 0.000286953312859353, + "loss": 0.2871, + "step": 2345 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028694151748637426, + "loss": 0.1806, + "step": 2346 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002869297170264197, + "loss": 0.2895, + "step": 2347 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028691791147992775, + "loss": 0.3637, + "step": 2348 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028690610084733685, + "loss": 0.2857, + "step": 2349 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002868942851290858, + "loss": 0.0464, + "step": 2350 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028688246432561346, + "loss": 0.4376, + "step": 2351 + }, + { + "epoch": 0.71, + "learning_rate": 0.000286870638437359, + "loss": 0.4993, + "step": 2352 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002868588074647617, + "loss": 0.1392, + "step": 2353 + }, + { + "epoch": 0.71, + "learning_rate": 0.000286846971408261, + "loss": 0.1963, + "step": 2354 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002868351302682967, + "loss": 0.3059, + "step": 2355 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002868232840453085, + "loss": 0.2506, + "step": 2356 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028681143273973656, + "loss": 0.2321, + "step": 2357 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002867995763520211, + "loss": 0.2557, + "step": 2358 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028678771488260243, + "loss": 0.5061, + "step": 2359 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002867758483319214, + "loss": 0.3687, + "step": 2360 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028676397670041867, + "loss": 0.1397, + "step": 2361 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028675209998853523, + "loss": 0.2158, + "step": 2362 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028674021819671234, + "loss": 0.3773, + "step": 2363 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002867283313253913, + "loss": 0.1872, + "step": 2364 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002867164393750137, + "loss": 0.1862, + "step": 2365 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002867045423460214, + "loss": 0.2293, + "step": 2366 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002866926402388561, + "loss": 0.3582, + "step": 2367 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028668073305396013, + "loss": 0.2238, + "step": 2368 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028666882079177565, + "loss": 0.1499, + "step": 2369 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002866569034527454, + "loss": 0.2204, + "step": 2370 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002866449810373118, + "loss": 0.3417, + "step": 2371 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002866330535459179, + "loss": 0.2349, + "step": 2372 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002866211209790068, + "loss": 0.1945, + "step": 2373 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002866091833370216, + "loss": 0.2949, + "step": 2374 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002865972406204059, + "loss": 0.408, + "step": 2375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002865852928296033, + "loss": 0.3832, + "step": 2376 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028657333996505755, + "loss": 0.1489, + "step": 2377 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002865613820272127, + "loss": 0.0311, + "step": 2378 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002865494190165131, + "loss": 0.2701, + "step": 2379 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002865374509334029, + "loss": 0.2397, + "step": 2380 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002865254777783268, + "loss": 0.1889, + "step": 2381 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028651349955172953, + "loss": 0.4901, + "step": 2382 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002865015162540561, + "loss": 0.417, + "step": 2383 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028648952788575167, + "loss": 0.3234, + "step": 2384 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002864775344472615, + "loss": 0.3799, + "step": 2385 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002864655359390312, + "loss": 0.0394, + "step": 2386 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002864535323615063, + "loss": 0.0823, + "step": 2387 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002864415237151329, + "loss": 0.1259, + "step": 2388 + }, + { + "epoch": 0.73, + "learning_rate": 0.000286429510000357, + "loss": 0.3381, + "step": 2389 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002864174912176249, + "loss": 0.2694, + "step": 2390 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028640546736738296, + "loss": 0.1879, + "step": 2391 + }, + { + "epoch": 0.73, + "learning_rate": 0.000286393438450078, + "loss": 0.2285, + "step": 2392 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028638140446615674, + "loss": 0.3317, + "step": 2393 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002863693654160662, + "loss": 0.3492, + "step": 2394 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002863573213002537, + "loss": 0.2729, + "step": 2395 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002863452721191665, + "loss": 0.2763, + "step": 2396 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028633321787325237, + "loss": 0.2695, + "step": 2397 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028632115856295896, + "loss": 0.2273, + "step": 2398 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028630909418873423, + "loss": 0.082, + "step": 2399 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002862970247510264, + "loss": 0.2405, + "step": 2400 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028628495025028377, + "loss": 0.271, + "step": 2401 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002862728706869549, + "loss": 0.2416, + "step": 2402 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002862607860614885, + "loss": 0.1304, + "step": 2403 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002862486963743335, + "loss": 0.1423, + "step": 2404 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028623660162593894, + "loss": 0.1282, + "step": 2405 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002862245018167542, + "loss": 0.1624, + "step": 2406 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002862123969472286, + "loss": 0.1764, + "step": 2407 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028620028701781193, + "loss": 0.2475, + "step": 2408 + }, + { + "epoch": 0.73, + "learning_rate": 0.000286188172028954, + "loss": 0.3073, + "step": 2409 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002861760519811049, + "loss": 0.3634, + "step": 2410 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028616392687471476, + "loss": 0.2312, + "step": 2411 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002861517967102341, + "loss": 0.1431, + "step": 2412 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028613966148811334, + "loss": 0.1989, + "step": 2413 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028612752120880345, + "loss": 0.2999, + "step": 2414 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002861153758727553, + "loss": 0.0608, + "step": 2415 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028610322548042015, + "loss": 0.327, + "step": 2416 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028609107003224924, + "loss": 0.3088, + "step": 2417 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002860789095286942, + "loss": 0.1438, + "step": 2418 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028606674397020664, + "loss": 0.1453, + "step": 2419 + }, + { + "epoch": 0.73, + "learning_rate": 0.00028605457335723864, + "loss": 0.3434, + "step": 2420 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002860423976902422, + "loss": 0.2146, + "step": 2421 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002860302169696696, + "loss": 0.3015, + "step": 2422 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028601803119597335, + "loss": 0.2598, + "step": 2423 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028600584036960613, + "loss": 0.1453, + "step": 2424 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002859936444910208, + "loss": 0.1685, + "step": 2425 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028598144356067033, + "loss": 0.209, + "step": 2426 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028596923757900797, + "loss": 0.3207, + "step": 2427 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028595702654648716, + "loss": 0.1782, + "step": 2428 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002859448104635616, + "loss": 0.1517, + "step": 2429 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002859325893306849, + "loss": 0.1806, + "step": 2430 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028592036314831117, + "loss": 0.2877, + "step": 2431 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002859081319168945, + "loss": 0.1717, + "step": 2432 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002858958956368893, + "loss": 0.1475, + "step": 2433 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028588365430875004, + "loss": 0.3255, + "step": 2434 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028587140793293156, + "loss": 0.2828, + "step": 2435 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002858591565098887, + "loss": 0.217, + "step": 2436 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028584690004007657, + "loss": 0.2412, + "step": 2437 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028583463852395046, + "loss": 0.2234, + "step": 2438 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028582237196196587, + "loss": 0.1458, + "step": 2439 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028581010035457844, + "loss": 0.2847, + "step": 2440 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028579782370224407, + "loss": 0.1574, + "step": 2441 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002857855420054188, + "loss": 0.2458, + "step": 2442 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028577325526455875, + "loss": 0.1318, + "step": 2443 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002857609634801205, + "loss": 0.3089, + "step": 2444 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002857486666525605, + "loss": 0.2163, + "step": 2445 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028573636478233566, + "loss": 0.2351, + "step": 2446 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028572405786990294, + "loss": 0.2186, + "step": 2447 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002857117459157194, + "loss": 0.2132, + "step": 2448 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002856994289202425, + "loss": 0.1743, + "step": 2449 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002856871068839298, + "loss": 0.313, + "step": 2450 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002856747798072389, + "loss": 0.1392, + "step": 2451 + }, + { + "epoch": 0.74, + "learning_rate": 0.00028566244769062785, + "loss": 0.1828, + "step": 2452 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002856501105345547, + "loss": 0.305, + "step": 2453 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002856377683394777, + "loss": 0.2123, + "step": 2454 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002856254211058554, + "loss": 0.2277, + "step": 2455 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002856130688341464, + "loss": 0.2361, + "step": 2456 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028560071152480956, + "loss": 0.3193, + "step": 2457 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002855883491783039, + "loss": 0.2176, + "step": 2458 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028557598179508874, + "loss": 0.2991, + "step": 2459 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028556360937562344, + "loss": 0.2926, + "step": 2460 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028555123192036756, + "loss": 0.2226, + "step": 2461 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028553884942978086, + "loss": 0.4158, + "step": 2462 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028552646190432347, + "loss": 0.2615, + "step": 2463 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028551406934445537, + "loss": 0.3113, + "step": 2464 + }, + { + "epoch": 0.75, + "learning_rate": 0.000285501671750637, + "loss": 0.3156, + "step": 2465 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002854892691233289, + "loss": 0.0859, + "step": 2466 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028547686146299176, + "loss": 0.3284, + "step": 2467 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028546444877008655, + "loss": 0.2538, + "step": 2468 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028545203104507423, + "loss": 0.175, + "step": 2469 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028543960828841624, + "loss": 0.1089, + "step": 2470 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028542718050057395, + "loss": 0.2567, + "step": 2471 + }, + { + "epoch": 0.75, + "learning_rate": 0.000285414747682009, + "loss": 0.2362, + "step": 2472 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028540230983318334, + "loss": 0.4656, + "step": 2473 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002853898669545589, + "loss": 0.4329, + "step": 2474 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028537741904659795, + "loss": 0.3118, + "step": 2475 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028536496610976286, + "loss": 0.3089, + "step": 2476 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002853525081445162, + "loss": 0.1194, + "step": 2477 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002853400451513208, + "loss": 0.1869, + "step": 2478 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028532757713063965, + "loss": 0.2219, + "step": 2479 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002853151040829358, + "loss": 0.1605, + "step": 2480 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028530262600867266, + "loss": 0.3385, + "step": 2481 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002852901429083137, + "loss": 0.1001, + "step": 2482 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002852776547823227, + "loss": 0.1294, + "step": 2483 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028526516163116357, + "loss": 0.5129, + "step": 2484 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002852526634553003, + "loss": 0.3074, + "step": 2485 + }, + { + "epoch": 0.75, + "learning_rate": 0.00028524016025519716, + "loss": 0.2502, + "step": 2486 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028522765203131873, + "loss": 0.2607, + "step": 2487 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028521513878412955, + "loss": 0.3722, + "step": 2488 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002852026205140945, + "loss": 0.3353, + "step": 2489 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028519009722167857, + "loss": 0.3443, + "step": 2490 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028517756890734697, + "loss": 0.2284, + "step": 2491 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028516503557156505, + "loss": 0.2451, + "step": 2492 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002851524972147985, + "loss": 0.3064, + "step": 2493 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002851399538375129, + "loss": 0.3499, + "step": 2494 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002851274054401744, + "loss": 0.3286, + "step": 2495 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028511485202324904, + "loss": 0.3517, + "step": 2496 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028510229358720313, + "loss": 0.1303, + "step": 2497 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002850897301325032, + "loss": 0.0878, + "step": 2498 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028507716165961593, + "loss": 0.2626, + "step": 2499 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002850645881690082, + "loss": 0.3046, + "step": 2500 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002850520096611471, + "loss": 0.3793, + "step": 2501 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028503942613649995, + "loss": 0.1084, + "step": 2502 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002850268375955341, + "loss": 0.2113, + "step": 2503 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028501424403871707, + "loss": 0.1279, + "step": 2504 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028500164546651694, + "loss": 0.1393, + "step": 2505 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002849890418794015, + "loss": 0.3027, + "step": 2506 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028497643327783905, + "loss": 0.27, + "step": 2507 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002849638196622979, + "loss": 0.1822, + "step": 2508 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002849512010332466, + "loss": 0.1452, + "step": 2509 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002849385773911539, + "loss": 0.21, + "step": 2510 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028492594873648884, + "loss": 0.438, + "step": 2511 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028491331506972045, + "loss": 0.1076, + "step": 2512 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028490067639131794, + "loss": 0.4603, + "step": 2513 + }, + { + "epoch": 0.76, + "learning_rate": 0.000284888032701751, + "loss": 0.129, + "step": 2514 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028487538400148916, + "loss": 0.1634, + "step": 2515 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002848627302910023, + "loss": 0.0546, + "step": 2516 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002848500715707605, + "loss": 0.292, + "step": 2517 + }, + { + "epoch": 0.76, + "learning_rate": 0.000284837407841234, + "loss": 0.1587, + "step": 2518 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002848247391028932, + "loss": 0.2358, + "step": 2519 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002848120653562087, + "loss": 0.3936, + "step": 2520 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002847993866016513, + "loss": 0.1853, + "step": 2521 + }, + { + "epoch": 0.77, + "learning_rate": 0.000284786702839692, + "loss": 0.2624, + "step": 2522 + }, + { + "epoch": 0.77, + "learning_rate": 0.000284774014070802, + "loss": 0.3168, + "step": 2523 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002847613202954525, + "loss": 0.3174, + "step": 2524 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002847486215141152, + "loss": 0.3391, + "step": 2525 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028473591772726174, + "loss": 0.2767, + "step": 2526 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028472320893536404, + "loss": 0.2851, + "step": 2527 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002847104951388942, + "loss": 0.2464, + "step": 2528 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028469777633832456, + "loss": 0.1894, + "step": 2529 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002846850525341274, + "loss": 0.1279, + "step": 2530 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002846723237267756, + "loss": 0.3391, + "step": 2531 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002846595899167419, + "loss": 0.4101, + "step": 2532 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028464685110449927, + "loss": 0.3756, + "step": 2533 + }, + { + "epoch": 0.77, + "learning_rate": 0.000284634107290521, + "loss": 0.2937, + "step": 2534 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002846213584752804, + "loss": 0.1586, + "step": 2535 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002846086046592512, + "loss": 0.1591, + "step": 2536 + }, + { + "epoch": 0.77, + "learning_rate": 0.000284595845842907, + "loss": 0.2577, + "step": 2537 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002845830820267219, + "loss": 0.2831, + "step": 2538 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002845703132111699, + "loss": 0.2121, + "step": 2539 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002845575393967254, + "loss": 0.1888, + "step": 2540 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002845447605838629, + "loss": 0.1766, + "step": 2541 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002845319767730571, + "loss": 0.1523, + "step": 2542 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002845191879647829, + "loss": 0.3325, + "step": 2543 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028450639415951526, + "loss": 0.15, + "step": 2544 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028449359535772956, + "loss": 0.1572, + "step": 2545 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028448079155990127, + "loss": 0.2961, + "step": 2546 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028446798276650587, + "loss": 0.2128, + "step": 2547 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028445516897801916, + "loss": 0.2533, + "step": 2548 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002844423501949173, + "loss": 0.3955, + "step": 2549 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002844295264176763, + "loss": 0.1414, + "step": 2550 + }, + { + "epoch": 0.77, + "learning_rate": 0.00028441669764677266, + "loss": 0.2914, + "step": 2551 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002844038638826829, + "loss": 0.3946, + "step": 2552 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028439102512588374, + "loss": 0.3383, + "step": 2553 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002843781813768521, + "loss": 0.2657, + "step": 2554 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028436533263606503, + "loss": 0.1603, + "step": 2555 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002843524789039999, + "loss": 0.2567, + "step": 2556 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028433962018113414, + "loss": 0.2055, + "step": 2557 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002843267564679454, + "loss": 0.3409, + "step": 2558 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002843138877649117, + "loss": 0.2079, + "step": 2559 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028430101407251083, + "loss": 0.2987, + "step": 2560 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002842881353912212, + "loss": 0.2349, + "step": 2561 + }, + { + "epoch": 0.78, + "learning_rate": 0.000284275251721521, + "loss": 0.0914, + "step": 2562 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002842623630638891, + "loss": 0.4576, + "step": 2563 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002842494694188041, + "loss": 0.2337, + "step": 2564 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028423657078674496, + "loss": 0.266, + "step": 2565 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028422366716819087, + "loss": 0.3541, + "step": 2566 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028421075856362117, + "loss": 0.1284, + "step": 2567 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028419784497351534, + "loss": 0.2697, + "step": 2568 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002841849263983531, + "loss": 0.201, + "step": 2569 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002841720028386144, + "loss": 0.2682, + "step": 2570 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002841590742947792, + "loss": 0.2495, + "step": 2571 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002841461407673278, + "loss": 0.3568, + "step": 2572 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028413320225674065, + "loss": 0.3132, + "step": 2573 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002841202587634984, + "loss": 0.1652, + "step": 2574 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002841073102880818, + "loss": 0.2361, + "step": 2575 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002840943568309719, + "loss": 0.0557, + "step": 2576 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002840813983926499, + "loss": 0.2375, + "step": 2577 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002840684349735972, + "loss": 0.2281, + "step": 2578 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028405546657429517, + "loss": 0.2889, + "step": 2579 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028404249319522574, + "loss": 0.2733, + "step": 2580 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002840295148368707, + "loss": 0.3435, + "step": 2581 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002840165314997123, + "loss": 0.0879, + "step": 2582 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028400354318423274, + "loss": 0.1987, + "step": 2583 + }, + { + "epoch": 0.78, + "learning_rate": 0.00028399054989091446, + "loss": 0.2304, + "step": 2584 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002839775516202403, + "loss": 0.2919, + "step": 2585 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028396454837269284, + "loss": 0.2287, + "step": 2586 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002839515401487553, + "loss": 0.3576, + "step": 2587 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002839385269489109, + "loss": 0.3834, + "step": 2588 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028392550877364293, + "loss": 0.2523, + "step": 2589 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002839124856234351, + "loss": 0.1565, + "step": 2590 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028389945749877107, + "loss": 0.2351, + "step": 2591 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002838864244001349, + "loss": 0.303, + "step": 2592 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002838733863280107, + "loss": 0.1637, + "step": 2593 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028386034328288273, + "loss": 0.2744, + "step": 2594 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028384729526523554, + "loss": 0.3423, + "step": 2595 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028383424227555385, + "loss": 0.1597, + "step": 2596 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028382118431432256, + "loss": 0.2284, + "step": 2597 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028380812138202664, + "loss": 0.3278, + "step": 2598 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028379505347915143, + "loss": 0.3565, + "step": 2599 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028378198060618234, + "loss": 0.2924, + "step": 2600 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028376890276360497, + "loss": 0.1676, + "step": 2601 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028375581995190516, + "loss": 0.3149, + "step": 2602 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028374273217156884, + "loss": 0.2699, + "step": 2603 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002837296394230822, + "loss": 0.1266, + "step": 2604 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002837165417069317, + "loss": 0.4045, + "step": 2605 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002837034390236037, + "loss": 0.258, + "step": 2606 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028369033137358506, + "loss": 0.2908, + "step": 2607 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028367721875736273, + "loss": 0.2548, + "step": 2608 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028366410117542366, + "loss": 0.0907, + "step": 2609 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028365097862825513, + "loss": 0.1327, + "step": 2610 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028363785111634477, + "loss": 0.2357, + "step": 2611 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002836247186401801, + "loss": 0.2082, + "step": 2612 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028361158120024897, + "loss": 0.2264, + "step": 2613 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002835984387970394, + "loss": 0.2228, + "step": 2614 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028358529143103966, + "loss": 0.2715, + "step": 2615 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028357213910273806, + "loss": 0.1989, + "step": 2616 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028355898181262317, + "loss": 0.2547, + "step": 2617 + }, + { + "epoch": 0.79, + "learning_rate": 0.00028354581956118374, + "loss": 0.2238, + "step": 2618 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028353265234890875, + "loss": 0.1643, + "step": 2619 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028351948017628733, + "loss": 0.2952, + "step": 2620 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028350630304380874, + "loss": 0.1857, + "step": 2621 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028349312095196253, + "loss": 0.1306, + "step": 2622 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028347993390123835, + "loss": 0.259, + "step": 2623 + }, + { + "epoch": 0.8, + "learning_rate": 0.000283466741892126, + "loss": 0.3032, + "step": 2624 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028345354492511564, + "loss": 0.3668, + "step": 2625 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002834403430006974, + "loss": 0.3183, + "step": 2626 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002834271361193617, + "loss": 0.3194, + "step": 2627 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028341392428159917, + "loss": 0.2337, + "step": 2628 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002834007074879006, + "loss": 0.349, + "step": 2629 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028338748573875696, + "loss": 0.4203, + "step": 2630 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028337425903465936, + "loss": 0.2658, + "step": 2631 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028336102737609915, + "loss": 0.0937, + "step": 2632 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002833477907635679, + "loss": 0.0448, + "step": 2633 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002833345491975572, + "loss": 0.2447, + "step": 2634 + }, + { + "epoch": 0.8, + "learning_rate": 0.000283321302678559, + "loss": 0.2931, + "step": 2635 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002833080512070654, + "loss": 0.1493, + "step": 2636 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002832947947835686, + "loss": 0.1988, + "step": 2637 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028328153340856106, + "loss": 0.2227, + "step": 2638 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028326826708253544, + "loss": 0.3022, + "step": 2639 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002832549958059845, + "loss": 0.3969, + "step": 2640 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028324171957940116, + "loss": 0.247, + "step": 2641 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002832284384032787, + "loss": 0.2796, + "step": 2642 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028321515227811044, + "loss": 0.079, + "step": 2643 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028320186120438993, + "loss": 0.1519, + "step": 2644 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002831885651826109, + "loss": 0.2059, + "step": 2645 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028317526421326724, + "loss": 0.2823, + "step": 2646 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028316195829685304, + "loss": 0.2665, + "step": 2647 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002831486474338626, + "loss": 0.2914, + "step": 2648 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028313533162479035, + "loss": 0.288, + "step": 2649 + }, + { + "epoch": 0.8, + "learning_rate": 0.00028312201087013093, + "loss": 0.2685, + "step": 2650 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002831086851703792, + "loss": 0.3462, + "step": 2651 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028309535452603014, + "loss": 0.1232, + "step": 2652 + }, + { + "epoch": 0.81, + "learning_rate": 0.000283082018937579, + "loss": 0.2011, + "step": 2653 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028306867840552103, + "loss": 0.2888, + "step": 2654 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028305533293035195, + "loss": 0.4717, + "step": 2655 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028304198251256744, + "loss": 0.1238, + "step": 2656 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002830286271526634, + "loss": 0.2843, + "step": 2657 + }, + { + "epoch": 0.81, + "learning_rate": 0.000283015266851136, + "loss": 0.1643, + "step": 2658 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002830019016084814, + "loss": 0.2778, + "step": 2659 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002829885314251963, + "loss": 0.3133, + "step": 2660 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028297515630177717, + "loss": 0.2423, + "step": 2661 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028296177623872096, + "loss": 0.2861, + "step": 2662 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002829483912365246, + "loss": 0.166, + "step": 2663 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028293500129568546, + "loss": 0.1727, + "step": 2664 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028292160641670087, + "loss": 0.4252, + "step": 2665 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002829082066000683, + "loss": 0.1348, + "step": 2666 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028289480184628566, + "loss": 0.2926, + "step": 2667 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002828813921558508, + "loss": 0.2686, + "step": 2668 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002828679775292619, + "loss": 0.1731, + "step": 2669 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028285455796701734, + "loss": 0.285, + "step": 2670 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002828411334696155, + "loss": 0.2841, + "step": 2671 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002828277040375551, + "loss": 0.3598, + "step": 2672 + }, + { + "epoch": 0.81, + "learning_rate": 0.000282814269671335, + "loss": 0.346, + "step": 2673 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028280083037145426, + "loss": 0.2712, + "step": 2674 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002827873861384121, + "loss": 0.3283, + "step": 2675 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028277393697270797, + "loss": 0.1587, + "step": 2676 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002827604828748414, + "loss": 0.1677, + "step": 2677 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028274702384531225, + "loss": 0.2849, + "step": 2678 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028273355988462046, + "loss": 0.4161, + "step": 2679 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028272009099326607, + "loss": 0.1037, + "step": 2680 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028270661717174955, + "loss": 0.3018, + "step": 2681 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002826931384205714, + "loss": 0.3055, + "step": 2682 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002826796547402322, + "loss": 0.1702, + "step": 2683 + }, + { + "epoch": 0.81, + "learning_rate": 0.00028266616613123295, + "loss": 0.3672, + "step": 2684 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002826526725940746, + "loss": 0.2113, + "step": 2685 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028263917412925854, + "loss": 0.3316, + "step": 2686 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002826256707372861, + "loss": 0.1709, + "step": 2687 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028261216241865894, + "loss": 0.1832, + "step": 2688 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002825986491738788, + "loss": 0.2113, + "step": 2689 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002825851310034477, + "loss": 0.263, + "step": 2690 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002825716079078678, + "loss": 0.2749, + "step": 2691 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002825580798876414, + "loss": 0.2663, + "step": 2692 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002825445469432711, + "loss": 0.3025, + "step": 2693 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028253100907525946, + "loss": 0.1546, + "step": 2694 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028251746628410956, + "loss": 0.2793, + "step": 2695 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002825039185703244, + "loss": 0.2866, + "step": 2696 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028249036593440717, + "loss": 0.249, + "step": 2697 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002824768083768614, + "loss": 0.3001, + "step": 2698 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028246324589819075, + "loss": 0.1807, + "step": 2699 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002824496784988989, + "loss": 0.2062, + "step": 2700 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028243610617948985, + "loss": 0.096, + "step": 2701 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028242252894046787, + "loss": 0.1426, + "step": 2702 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002824089467823373, + "loss": 0.1888, + "step": 2703 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002823953597056026, + "loss": 0.1842, + "step": 2704 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028238176771076855, + "loss": 0.3438, + "step": 2705 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028236817079834, + "loss": 0.3248, + "step": 2706 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002823545689688221, + "loss": 0.1331, + "step": 2707 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002823409622227201, + "loss": 0.1972, + "step": 2708 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028232735056053944, + "loss": 0.0681, + "step": 2709 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002823137339827857, + "loss": 0.4132, + "step": 2710 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002823001124899648, + "loss": 0.3165, + "step": 2711 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002822864860825827, + "loss": 0.1851, + "step": 2712 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002822728547611455, + "loss": 0.2708, + "step": 2713 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002822592185261597, + "loss": 0.1629, + "step": 2714 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028224557737813183, + "loss": 0.19, + "step": 2715 + }, + { + "epoch": 0.82, + "learning_rate": 0.00028223193131756845, + "loss": 0.1157, + "step": 2716 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002822182803449766, + "loss": 0.3271, + "step": 2717 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028220462446086343, + "loss": 0.2251, + "step": 2718 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028219096366573616, + "loss": 0.2614, + "step": 2719 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028217729796010217, + "loss": 0.3591, + "step": 2720 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028216362734446917, + "loss": 0.3745, + "step": 2721 + }, + { + "epoch": 0.83, + "learning_rate": 0.000282149951819345, + "loss": 0.2142, + "step": 2722 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002821362713852377, + "loss": 0.148, + "step": 2723 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002821225860426553, + "loss": 0.272, + "step": 2724 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028210889579210634, + "loss": 0.2098, + "step": 2725 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002820952006340993, + "loss": 0.231, + "step": 2726 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002820815005691429, + "loss": 0.3557, + "step": 2727 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002820677955977461, + "loss": 0.3056, + "step": 2728 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028205408572041797, + "loss": 0.4672, + "step": 2729 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002820403709376678, + "loss": 0.3447, + "step": 2730 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028202665125000504, + "loss": 0.2766, + "step": 2731 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002820129266579394, + "loss": 0.1602, + "step": 2732 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028199919716198065, + "loss": 0.133, + "step": 2733 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028198546276263876, + "loss": 0.2877, + "step": 2734 + }, + { + "epoch": 0.83, + "learning_rate": 0.000281971723460424, + "loss": 0.3307, + "step": 2735 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028195797925584676, + "loss": 0.3042, + "step": 2736 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002819442301494175, + "loss": 0.3478, + "step": 2737 + }, + { + "epoch": 0.83, + "learning_rate": 0.000281930476141647, + "loss": 0.3621, + "step": 2738 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028191671723304623, + "loss": 0.2547, + "step": 2739 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028190295342412627, + "loss": 0.1766, + "step": 2740 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002818891847153983, + "loss": 0.3334, + "step": 2741 + }, + { + "epoch": 0.83, + "learning_rate": 0.000281875411107374, + "loss": 0.1989, + "step": 2742 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028186163260056487, + "loss": 0.2245, + "step": 2743 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028184784919548276, + "loss": 0.2581, + "step": 2744 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002818340608926397, + "loss": 0.2093, + "step": 2745 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002818202676925478, + "loss": 0.1968, + "step": 2746 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002818064695957196, + "loss": 0.2583, + "step": 2747 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028179266660266754, + "loss": 0.0873, + "step": 2748 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002817788587139044, + "loss": 0.3419, + "step": 2749 + }, + { + "epoch": 0.83, + "learning_rate": 0.00028176504592994313, + "loss": 0.1565, + "step": 2750 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002817512282512968, + "loss": 0.1243, + "step": 2751 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028173740567847866, + "loss": 0.1802, + "step": 2752 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002817235782120022, + "loss": 0.2511, + "step": 2753 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002817097458523811, + "loss": 0.2849, + "step": 2754 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002816959086001292, + "loss": 0.1832, + "step": 2755 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002816820664557605, + "loss": 0.171, + "step": 2756 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028166821941978914, + "loss": 0.3566, + "step": 2757 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002816543674927296, + "loss": 0.4093, + "step": 2758 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028164051067509633, + "loss": 0.132, + "step": 2759 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002816266489674041, + "loss": 0.1953, + "step": 2760 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028161278237016793, + "loss": 0.1812, + "step": 2761 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028159891088390285, + "loss": 0.2771, + "step": 2762 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028158503450912407, + "loss": 0.1141, + "step": 2763 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028157115324634714, + "loss": 0.2457, + "step": 2764 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028155726709608775, + "loss": 0.3757, + "step": 2765 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002815433760588616, + "loss": 0.2743, + "step": 2766 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002815294801351848, + "loss": 0.1273, + "step": 2767 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028151557932557355, + "loss": 0.1151, + "step": 2768 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002815016736305442, + "loss": 0.109, + "step": 2769 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002814877630506133, + "loss": 0.2279, + "step": 2770 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002814738475862976, + "loss": 0.1263, + "step": 2771 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028145992723811394, + "loss": 0.0822, + "step": 2772 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002814460020065795, + "loss": 0.3049, + "step": 2773 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002814320718922116, + "loss": 0.3453, + "step": 2774 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002814181368955276, + "loss": 0.2781, + "step": 2775 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002814041970170452, + "loss": 0.2899, + "step": 2776 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028139025225728224, + "loss": 0.1927, + "step": 2777 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002813763026167567, + "loss": 0.1704, + "step": 2778 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028136234809598683, + "loss": 0.2539, + "step": 2779 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002813483886954909, + "loss": 0.247, + "step": 2780 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028133442441578745, + "loss": 0.3486, + "step": 2781 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002813204552573953, + "loss": 0.358, + "step": 2782 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002813064812208334, + "loss": 0.2872, + "step": 2783 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028129250230662066, + "loss": 0.1107, + "step": 2784 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002812785185152766, + "loss": 0.2275, + "step": 2785 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028126452984732046, + "loss": 0.2844, + "step": 2786 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028125053630327195, + "loss": 0.4154, + "step": 2787 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028123653788365095, + "loss": 0.1475, + "step": 2788 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028122253458897744, + "loss": 0.1892, + "step": 2789 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028120852641977157, + "loss": 0.2618, + "step": 2790 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002811945133765537, + "loss": 0.3244, + "step": 2791 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028118049545984434, + "loss": 0.2594, + "step": 2792 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002811664726701643, + "loss": 0.1388, + "step": 2793 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028115244500803444, + "loss": 0.2609, + "step": 2794 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002811384124739758, + "loss": 0.1731, + "step": 2795 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002811243750685097, + "loss": 0.2302, + "step": 2796 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028111033279215765, + "loss": 0.1927, + "step": 2797 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002810962856454412, + "loss": 0.2406, + "step": 2798 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002810822336288822, + "loss": 0.1888, + "step": 2799 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028106817674300254, + "loss": 0.2021, + "step": 2800 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002810541149883245, + "loss": 0.1487, + "step": 2801 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002810400483653704, + "loss": 0.2595, + "step": 2802 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028102597687466277, + "loss": 0.2433, + "step": 2803 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028101190051672435, + "loss": 0.2186, + "step": 2804 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028099781929207803, + "loss": 0.1346, + "step": 2805 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002809837332012468, + "loss": 0.3087, + "step": 2806 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028096964224475407, + "loss": 0.4218, + "step": 2807 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028095554642312316, + "loss": 0.0904, + "step": 2808 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028094144573687774, + "loss": 0.1866, + "step": 2809 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002809273401865416, + "loss": 0.2224, + "step": 2810 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028091322977263867, + "loss": 0.3812, + "step": 2811 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002808991144956932, + "loss": 0.3513, + "step": 2812 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002808849943562294, + "loss": 0.2629, + "step": 2813 + }, + { + "epoch": 0.85, + "learning_rate": 0.00028087086935477194, + "loss": 0.1646, + "step": 2814 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002808567394918455, + "loss": 0.2088, + "step": 2815 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028084260476797485, + "loss": 0.1178, + "step": 2816 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002808284651836852, + "loss": 0.2981, + "step": 2817 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028081432073950163, + "loss": 0.2812, + "step": 2818 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002808001714359497, + "loss": 0.2207, + "step": 2819 + }, + { + "epoch": 0.86, + "learning_rate": 0.000280786017273555, + "loss": 0.4173, + "step": 2820 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002807718582528432, + "loss": 0.3902, + "step": 2821 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028075769437434044, + "loss": 0.1241, + "step": 2822 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002807435256385727, + "loss": 0.1963, + "step": 2823 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028072935204606647, + "loss": 0.3101, + "step": 2824 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002807151735973481, + "loss": 0.4083, + "step": 2825 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028070099029294443, + "loss": 0.3701, + "step": 2826 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028068680213338217, + "loss": 0.1228, + "step": 2827 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028067260911918845, + "loss": 0.1671, + "step": 2828 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028065841125089053, + "loss": 0.28, + "step": 2829 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002806442085290158, + "loss": 0.2628, + "step": 2830 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028063000095409176, + "loss": 0.2091, + "step": 2831 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028061578852664633, + "loss": 0.4458, + "step": 2832 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028060157124720737, + "loss": 0.3483, + "step": 2833 + }, + { + "epoch": 0.86, + "learning_rate": 0.000280587349116303, + "loss": 0.2457, + "step": 2834 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028057312213446156, + "loss": 0.1704, + "step": 2835 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028055889030221154, + "loss": 0.2389, + "step": 2836 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002805446536200816, + "loss": 0.3695, + "step": 2837 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002805304120886006, + "loss": 0.0928, + "step": 2838 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002805161657082976, + "loss": 0.4602, + "step": 2839 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002805019144797017, + "loss": 0.3345, + "step": 2840 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002804876584033424, + "loss": 0.15, + "step": 2841 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002804733974797493, + "loss": 0.1458, + "step": 2842 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002804591317094521, + "loss": 0.1689, + "step": 2843 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028044486109298064, + "loss": 0.223, + "step": 2844 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028043058563086517, + "loss": 0.2854, + "step": 2845 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002804163053236359, + "loss": 0.342, + "step": 2846 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002804020201718234, + "loss": 0.2238, + "step": 2847 + }, + { + "epoch": 0.86, + "learning_rate": 0.00028038773017595816, + "loss": 0.2057, + "step": 2848 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028037343533657115, + "loss": 0.3458, + "step": 2849 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002803591356541933, + "loss": 0.2681, + "step": 2850 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028034483112935585, + "loss": 0.1759, + "step": 2851 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002803305217625902, + "loss": 0.1148, + "step": 2852 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002803162075544278, + "loss": 0.3498, + "step": 2853 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002803018885054005, + "loss": 0.3244, + "step": 2854 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028028756461604006, + "loss": 0.2269, + "step": 2855 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002802732358868787, + "loss": 0.171, + "step": 2856 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002802589023184487, + "loss": 0.2071, + "step": 2857 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028024456391128244, + "loss": 0.2708, + "step": 2858 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028023022066591254, + "loss": 0.2348, + "step": 2859 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028021587258287184, + "loss": 0.1686, + "step": 2860 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028020151966269335, + "loss": 0.4509, + "step": 2861 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002801871619059102, + "loss": 0.1086, + "step": 2862 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028017279931305577, + "loss": 0.2836, + "step": 2863 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002801584318846636, + "loss": 0.2156, + "step": 2864 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002801440596212673, + "loss": 0.2417, + "step": 2865 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002801296825234009, + "loss": 0.34, + "step": 2866 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002801153005915983, + "loss": 0.2535, + "step": 2867 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028010091382639393, + "loss": 0.2968, + "step": 2868 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028008652222832214, + "loss": 0.0441, + "step": 2869 + }, + { + "epoch": 0.87, + "learning_rate": 0.00028007212579791746, + "loss": 0.1775, + "step": 2870 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002800577245357148, + "loss": 0.2941, + "step": 2871 + }, + { + "epoch": 0.87, + "learning_rate": 0.000280043318442249, + "loss": 0.2132, + "step": 2872 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002800289075180553, + "loss": 0.3304, + "step": 2873 + }, + { + "epoch": 0.87, + "learning_rate": 0.000280014491763669, + "loss": 0.3152, + "step": 2874 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002800000711796256, + "loss": 0.3198, + "step": 2875 + }, + { + "epoch": 0.87, + "learning_rate": 0.00027998564576646077, + "loss": 0.2334, + "step": 2876 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002799712155247104, + "loss": 0.2312, + "step": 2877 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002799567804549105, + "loss": 0.4106, + "step": 2878 + }, + { + "epoch": 0.87, + "learning_rate": 0.00027994234055759724, + "loss": 0.3122, + "step": 2879 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002799278958333072, + "loss": 0.285, + "step": 2880 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002799134462825767, + "loss": 0.331, + "step": 2881 + }, + { + "epoch": 0.88, + "learning_rate": 0.00027989899190594274, + "loss": 0.3346, + "step": 2882 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002798845327039421, + "loss": 0.425, + "step": 2883 + }, + { + "epoch": 0.88, + "learning_rate": 0.000279870068677112, + "loss": 0.2932, + "step": 2884 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002798555998259897, + "loss": 0.2167, + "step": 2885 + }, + { + "epoch": 0.88, + "learning_rate": 0.00027984112615111263, + "loss": 0.2228, + "step": 2886 + }, + { + "epoch": 0.88, + "learning_rate": 0.00027982664765301846, + "loss": 0.2081, + "step": 2887 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002798121643322451, + "loss": 0.3677, + "step": 2888 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002797976761893304, + "loss": 0.1987, + "step": 2889 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002797831832248128, + "loss": 0.2948, + "step": 2890 + }, + { + "epoch": 0.88, + "learning_rate": 0.00027976868543923045, + "loss": 0.254, + "step": 2891 + }, + { + "epoch": 0.88, + "learning_rate": 0.000279754182833122, + "loss": 0.3475, + "step": 2892 + }, + { + "epoch": 0.88, + "learning_rate": 0.00027973967540702614, + "loss": 0.271, + "step": 2893 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002797251631614818, + "loss": 0.2212, + "step": 2894 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002797106460970281, + "loss": 0.1665, + "step": 2895 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002796961242142042, + "loss": 0.1631, + "step": 2896 + }, + { + "epoch": 0.88, + "learning_rate": 0.00027968159751354965, + "loss": 0.2122, + "step": 2897 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002796670659956041, + "loss": 0.232, + "step": 2898 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002796525296609072, + "loss": 0.1682, + "step": 2899 + }, + { + "epoch": 0.88, + "learning_rate": 0.00027963798850999907, + "loss": 0.3035, + "step": 2900 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002796234425434198, + "loss": 0.3432, + "step": 2901 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002796088917617098, + "loss": 0.1065, + "step": 2902 + }, + { + "epoch": 0.88, + "learning_rate": 0.00027959433616540956, + "loss": 0.3399, + "step": 2903 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002795797757550597, + "loss": 0.2427, + "step": 2904 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002795652105312012, + "loss": 0.2693, + "step": 2905 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002795506404943751, + "loss": 0.25, + "step": 2906 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002795360656451225, + "loss": 0.3361, + "step": 2907 + }, + { + "epoch": 0.88, + "learning_rate": 0.000279521485983985, + "loss": 0.3724, + "step": 2908 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002795069015115042, + "loss": 0.2386, + "step": 2909 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002794923122282217, + "loss": 0.1653, + "step": 2910 + }, + { + "epoch": 0.88, + "learning_rate": 0.00027947771813467947, + "loss": 0.3096, + "step": 2911 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002794631192314198, + "loss": 0.3544, + "step": 2912 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002794485155189849, + "loss": 0.3441, + "step": 2913 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002794339069979172, + "loss": 0.2595, + "step": 2914 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027941929366875944, + "loss": 0.2483, + "step": 2915 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002794046755320544, + "loss": 0.2754, + "step": 2916 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027939005258834516, + "loss": 0.108, + "step": 2917 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027937542483817495, + "loss": 0.3532, + "step": 2918 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027936079228208703, + "loss": 0.2185, + "step": 2919 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002793461549206251, + "loss": 0.2841, + "step": 2920 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002793315127543327, + "loss": 0.1025, + "step": 2921 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027931686578375393, + "loss": 0.2887, + "step": 2922 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027930221400943283, + "loss": 0.4155, + "step": 2923 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027928755743191364, + "loss": 0.3773, + "step": 2924 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027927289605174084, + "loss": 0.3285, + "step": 2925 + }, + { + "epoch": 0.89, + "learning_rate": 0.000279258229869459, + "loss": 0.2393, + "step": 2926 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027924355888561306, + "loss": 0.4374, + "step": 2927 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002792288831007478, + "loss": 0.1063, + "step": 2928 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027921420251540854, + "loss": 0.4349, + "step": 2929 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002791995171301406, + "loss": 0.1938, + "step": 2930 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027918482694548944, + "loss": 0.2145, + "step": 2931 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002791701319620008, + "loss": 0.229, + "step": 2932 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027915543218022055, + "loss": 0.2036, + "step": 2933 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027914072760069474, + "loss": 0.1923, + "step": 2934 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027912601822396957, + "loss": 0.2781, + "step": 2935 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002791113040505915, + "loss": 0.2242, + "step": 2936 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027909658508110715, + "loss": 0.1302, + "step": 2937 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027908186131606316, + "loss": 0.3349, + "step": 2938 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002790671327560066, + "loss": 0.2332, + "step": 2939 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002790523994014845, + "loss": 0.3597, + "step": 2940 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027903766125304423, + "loss": 0.2724, + "step": 2941 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002790229183112333, + "loss": 0.2508, + "step": 2942 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027900817057659926, + "loss": 0.1282, + "step": 2943 + }, + { + "epoch": 0.89, + "learning_rate": 0.00027899341804969, + "loss": 0.2823, + "step": 2944 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002789786607310535, + "loss": 0.3713, + "step": 2945 + }, + { + "epoch": 0.89, + "learning_rate": 0.000278963898621238, + "loss": 0.2512, + "step": 2946 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002789491317207919, + "loss": 0.1621, + "step": 2947 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027893436003026363, + "loss": 0.1696, + "step": 2948 + }, + { + "epoch": 0.9, + "learning_rate": 0.000278919583550202, + "loss": 0.0798, + "step": 2949 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002789048022811559, + "loss": 0.4016, + "step": 2950 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027889001622367443, + "loss": 0.0911, + "step": 2951 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027887522537830677, + "loss": 0.2245, + "step": 2952 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027886042974560246, + "loss": 0.1771, + "step": 2953 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027884562932611103, + "loss": 0.2591, + "step": 2954 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027883082412038233, + "loss": 0.2654, + "step": 2955 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002788160141289663, + "loss": 0.2631, + "step": 2956 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002788011993524131, + "loss": 0.338, + "step": 2957 + }, + { + "epoch": 0.9, + "learning_rate": 0.000278786379791273, + "loss": 0.0909, + "step": 2958 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027877155544609665, + "loss": 0.1327, + "step": 2959 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002787567263174346, + "loss": 0.1947, + "step": 2960 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002787418924058377, + "loss": 0.302, + "step": 2961 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027872705371185705, + "loss": 0.348, + "step": 2962 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002787122102360439, + "loss": 0.2807, + "step": 2963 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027869736197894953, + "loss": 0.2256, + "step": 2964 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002786825089411255, + "loss": 0.4669, + "step": 2965 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002786676511231237, + "loss": 0.2389, + "step": 2966 + }, + { + "epoch": 0.9, + "learning_rate": 0.000278652788525496, + "loss": 0.2763, + "step": 2967 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027863792114879443, + "loss": 0.4711, + "step": 2968 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027862304899357133, + "loss": 0.2104, + "step": 2969 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027860817206037914, + "loss": 0.2793, + "step": 2970 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027859329034977053, + "loss": 0.2689, + "step": 2971 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002785784038622983, + "loss": 0.4088, + "step": 2972 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027856351259851535, + "loss": 0.0908, + "step": 2973 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002785486165589749, + "loss": 0.1338, + "step": 2974 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002785337157442304, + "loss": 0.271, + "step": 2975 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002785188101548352, + "loss": 0.4213, + "step": 2976 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002785038997913431, + "loss": 0.2711, + "step": 2977 + }, + { + "epoch": 0.9, + "learning_rate": 0.000278488984654308, + "loss": 0.0954, + "step": 2978 + }, + { + "epoch": 0.9, + "learning_rate": 0.00027847406474428384, + "loss": 0.0943, + "step": 2979 + }, + { + "epoch": 0.9, + "learning_rate": 0.000278459140061825, + "loss": 0.2283, + "step": 2980 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027844421060748575, + "loss": 0.1015, + "step": 2981 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027842927638182076, + "loss": 0.1019, + "step": 2982 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027841433738538476, + "loss": 0.1867, + "step": 2983 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002783993936187327, + "loss": 0.2826, + "step": 2984 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002783844450824197, + "loss": 0.2547, + "step": 2985 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027836949177700106, + "loss": 0.2411, + "step": 2986 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002783545337030322, + "loss": 0.3398, + "step": 2987 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027833957086106887, + "loss": 0.2708, + "step": 2988 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002783246032516668, + "loss": 0.2745, + "step": 2989 + }, + { + "epoch": 0.91, + "learning_rate": 0.000278309630875382, + "loss": 0.1331, + "step": 2990 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002782946537327707, + "loss": 0.1087, + "step": 2991 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002782796718243893, + "loss": 0.102, + "step": 2992 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002782646851507942, + "loss": 0.2893, + "step": 2993 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027824969371254216, + "loss": 0.1501, + "step": 2994 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027823469751019016, + "loss": 0.2838, + "step": 2995 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027821969654429513, + "loss": 0.2471, + "step": 2996 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002782046908154144, + "loss": 0.1883, + "step": 2997 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002781896803241054, + "loss": 0.332, + "step": 2998 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027817466507092564, + "loss": 0.0579, + "step": 2999 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027815964505643296, + "loss": 0.2257, + "step": 3000 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002781446202811853, + "loss": 0.3317, + "step": 3001 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027812959074574074, + "loss": 0.1632, + "step": 3002 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002781145564506576, + "loss": 0.4839, + "step": 3003 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002780995173964945, + "loss": 0.3565, + "step": 3004 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027808447358380987, + "loss": 0.2017, + "step": 3005 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027806942501316267, + "loss": 0.1961, + "step": 3006 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002780543716851119, + "loss": 0.1499, + "step": 3007 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027803931360021676, + "loss": 0.2741, + "step": 3008 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027802425075903656, + "loss": 0.176, + "step": 3009 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002780091831621309, + "loss": 0.0317, + "step": 3010 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002779941108100594, + "loss": 0.202, + "step": 3011 + }, + { + "epoch": 0.91, + "learning_rate": 0.00027797903370338213, + "loss": 0.1281, + "step": 3012 + }, + { + "epoch": 0.91, + "learning_rate": 0.000277963951842659, + "loss": 0.2853, + "step": 3013 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002779488652284503, + "loss": 0.2241, + "step": 3014 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027793377386131646, + "loss": 0.0949, + "step": 3015 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002779186777418181, + "loss": 0.0699, + "step": 3016 + }, + { + "epoch": 0.92, + "learning_rate": 0.000277903576870516, + "loss": 0.2135, + "step": 3017 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027788847124797104, + "loss": 0.1131, + "step": 3018 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027787336087474446, + "loss": 0.2153, + "step": 3019 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002778582457513975, + "loss": 0.2979, + "step": 3020 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002778431258784916, + "loss": 0.1881, + "step": 3021 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002778280012565885, + "loss": 0.2618, + "step": 3022 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027781287188625005, + "loss": 0.3402, + "step": 3023 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027779773776803816, + "loss": 0.2915, + "step": 3024 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002777825989025151, + "loss": 0.1825, + "step": 3025 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027776745529024317, + "loss": 0.2974, + "step": 3026 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027775230693178503, + "loss": 0.2263, + "step": 3027 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027773715382770325, + "loss": 0.3638, + "step": 3028 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027772199597856083, + "loss": 0.2331, + "step": 3029 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027770683338492077, + "loss": 0.3896, + "step": 3030 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002776916660473464, + "loss": 0.185, + "step": 3031 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027767649396640107, + "loss": 0.2154, + "step": 3032 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027766131714264836, + "loss": 0.1165, + "step": 3033 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002776461355766521, + "loss": 0.3209, + "step": 3034 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002776309492689762, + "loss": 0.2406, + "step": 3035 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027761575822018487, + "loss": 0.1598, + "step": 3036 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027760056243084227, + "loss": 0.4342, + "step": 3037 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027758536190151305, + "loss": 0.3291, + "step": 3038 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002775701566327617, + "loss": 0.1664, + "step": 3039 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027755494662515316, + "loss": 0.1478, + "step": 3040 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002775397318792524, + "loss": 0.2573, + "step": 3041 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002775245123956246, + "loss": 0.1603, + "step": 3042 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002775092881748352, + "loss": 0.2765, + "step": 3043 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027749405921744956, + "loss": 0.1426, + "step": 3044 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002774788255240335, + "loss": 0.2049, + "step": 3045 + }, + { + "epoch": 0.92, + "learning_rate": 0.00027746358709515297, + "loss": 0.2093, + "step": 3046 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027744834393137396, + "loss": 0.0998, + "step": 3047 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027743309603326267, + "loss": 0.4802, + "step": 3048 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002774178434013856, + "loss": 0.3505, + "step": 3049 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002774025860363092, + "loss": 0.2626, + "step": 3050 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002773873239386004, + "loss": 0.3235, + "step": 3051 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002773720571088261, + "loss": 0.1511, + "step": 3052 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027735678554755336, + "loss": 0.1635, + "step": 3053 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027734150925534955, + "loss": 0.2067, + "step": 3054 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027732622823278206, + "loss": 0.2992, + "step": 3055 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002773109424804186, + "loss": 0.3273, + "step": 3056 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002772956519988269, + "loss": 0.3501, + "step": 3057 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027728035678857506, + "loss": 0.1885, + "step": 3058 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002772650568502312, + "loss": 0.3637, + "step": 3059 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002772497521843637, + "loss": 0.0859, + "step": 3060 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027723444279154107, + "loss": 0.1379, + "step": 3061 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027721912867233195, + "loss": 0.4228, + "step": 3062 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002772038098273053, + "loss": 0.1792, + "step": 3063 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002771884862570302, + "loss": 0.1487, + "step": 3064 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027717315796207574, + "loss": 0.1903, + "step": 3065 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027715782494301135, + "loss": 0.2113, + "step": 3066 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002771424872004067, + "loss": 0.3263, + "step": 3067 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027712714473483155, + "loss": 0.1446, + "step": 3068 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027711179754685573, + "loss": 0.1741, + "step": 3069 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027709644563704935, + "loss": 0.3087, + "step": 3070 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027708108900598275, + "loss": 0.2163, + "step": 3071 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002770657276542263, + "loss": 0.2652, + "step": 3072 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027705036158235074, + "loss": 0.0829, + "step": 3073 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027703499079092685, + "loss": 0.4331, + "step": 3074 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027701961528052557, + "loss": 0.2564, + "step": 3075 + }, + { + "epoch": 0.93, + "learning_rate": 0.000277004235051718, + "loss": 0.2909, + "step": 3076 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002769888501050756, + "loss": 0.3197, + "step": 3077 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002769734604411698, + "loss": 0.2132, + "step": 3078 + }, + { + "epoch": 0.93, + "learning_rate": 0.00027695806606057227, + "loss": 0.2108, + "step": 3079 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002769426669638549, + "loss": 0.2105, + "step": 3080 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027692726315158966, + "loss": 0.3406, + "step": 3081 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002769118546243489, + "loss": 0.2024, + "step": 3082 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002768964413827048, + "loss": 0.1227, + "step": 3083 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002768810234272301, + "loss": 0.0645, + "step": 3084 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002768656007584974, + "loss": 0.1705, + "step": 3085 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002768501733770797, + "loss": 0.0636, + "step": 3086 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027683474128355006, + "loss": 0.2961, + "step": 3087 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002768193044784817, + "loss": 0.1309, + "step": 3088 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027680386296244803, + "loss": 0.1639, + "step": 3089 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002767884167360228, + "loss": 0.2948, + "step": 3090 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002767729657997796, + "loss": 0.3198, + "step": 3091 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027675751015429254, + "loss": 0.2826, + "step": 3092 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027674204980013566, + "loss": 0.1946, + "step": 3093 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002767265847378833, + "loss": 0.2728, + "step": 3094 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002767111149681099, + "loss": 0.2506, + "step": 3095 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002766956404913902, + "loss": 0.2833, + "step": 3096 + }, + { + "epoch": 0.94, + "learning_rate": 0.000276680161308299, + "loss": 0.1963, + "step": 3097 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002766646774194113, + "loss": 0.2564, + "step": 3098 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027664918882530225, + "loss": 0.4011, + "step": 3099 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027663369552654725, + "loss": 0.2793, + "step": 3100 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002766181975237218, + "loss": 0.2063, + "step": 3101 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027660269481740156, + "loss": 0.1428, + "step": 3102 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002765871874081625, + "loss": 0.1065, + "step": 3103 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002765716752965807, + "loss": 0.1902, + "step": 3104 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027655615848323225, + "loss": 0.1396, + "step": 3105 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002765406369686937, + "loss": 0.1874, + "step": 3106 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027652511075354155, + "loss": 0.1404, + "step": 3107 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027650957983835255, + "loss": 0.2913, + "step": 3108 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002764940442237037, + "loss": 0.1374, + "step": 3109 + }, + { + "epoch": 0.94, + "learning_rate": 0.000276478503910172, + "loss": 0.3058, + "step": 3110 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002764629588983348, + "loss": 0.1291, + "step": 3111 + }, + { + "epoch": 0.94, + "learning_rate": 0.00027644740918876943, + "loss": 0.0493, + "step": 3112 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027643185478205373, + "loss": 0.1412, + "step": 3113 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027641629567876536, + "loss": 0.2605, + "step": 3114 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027640073187948225, + "loss": 0.2372, + "step": 3115 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002763851633847827, + "loss": 0.2423, + "step": 3116 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027636959019524494, + "loss": 0.2252, + "step": 3117 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002763540123114474, + "loss": 0.3157, + "step": 3118 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002763384297339689, + "loss": 0.4746, + "step": 3119 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002763228424633882, + "loss": 0.2849, + "step": 3120 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027630725050028436, + "loss": 0.112, + "step": 3121 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027629165384523654, + "loss": 0.2601, + "step": 3122 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027627605249882413, + "loss": 0.2981, + "step": 3123 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027626044646162664, + "loss": 0.0742, + "step": 3124 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027624483573422394, + "loss": 0.3298, + "step": 3125 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027622922031719565, + "loss": 0.1262, + "step": 3126 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002762136002111221, + "loss": 0.2643, + "step": 3127 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027619797541658333, + "loss": 0.4341, + "step": 3128 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027618234593415993, + "loss": 0.0843, + "step": 3129 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002761667117644324, + "loss": 0.4072, + "step": 3130 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027615107290798144, + "loss": 0.2317, + "step": 3131 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002761354293653881, + "loss": 0.2567, + "step": 3132 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002761197811372334, + "loss": 0.2248, + "step": 3133 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027610412822409876, + "loss": 0.1167, + "step": 3134 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002760884706265655, + "loss": 0.3215, + "step": 3135 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027607280834521534, + "loss": 0.1403, + "step": 3136 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027605714138063, + "loss": 0.4083, + "step": 3137 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027604146973339154, + "loss": 0.2584, + "step": 3138 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002760257934040821, + "loss": 0.1251, + "step": 3139 + }, + { + "epoch": 0.95, + "learning_rate": 0.00027601011239328406, + "loss": 0.2409, + "step": 3140 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002759944267015798, + "loss": 0.2455, + "step": 3141 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002759787363295521, + "loss": 0.1347, + "step": 3142 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002759630412777837, + "loss": 0.2037, + "step": 3143 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002759473415468577, + "loss": 0.3219, + "step": 3144 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002759316371373574, + "loss": 0.217, + "step": 3145 + }, + { + "epoch": 0.96, + "learning_rate": 0.000275915928049866, + "loss": 0.2135, + "step": 3146 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002759002142849672, + "loss": 0.3352, + "step": 3147 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002758844958432445, + "loss": 0.2236, + "step": 3148 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027586877272528206, + "loss": 0.0996, + "step": 3149 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002758530449316638, + "loss": 0.2285, + "step": 3150 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002758373124629739, + "loss": 0.2598, + "step": 3151 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027582157531979694, + "loss": 0.2387, + "step": 3152 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027580583350271737, + "loss": 0.2015, + "step": 3153 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002757900870123201, + "loss": 0.1831, + "step": 3154 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002757743358491899, + "loss": 0.2014, + "step": 3155 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002757585800139119, + "loss": 0.1008, + "step": 3156 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002757428195070715, + "loss": 0.132, + "step": 3157 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002757270543292541, + "loss": 0.3459, + "step": 3158 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002757112844810453, + "loss": 0.1536, + "step": 3159 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027569550996303096, + "loss": 0.1862, + "step": 3160 + }, + { + "epoch": 0.96, + "learning_rate": 0.000275679730775797, + "loss": 0.298, + "step": 3161 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027566394691992956, + "loss": 0.2687, + "step": 3162 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002756481583960151, + "loss": 0.3277, + "step": 3163 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027563236520463996, + "loss": 0.2488, + "step": 3164 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027561656734639085, + "loss": 0.1432, + "step": 3165 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002756007648218546, + "loss": 0.2672, + "step": 3166 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002755849576316183, + "loss": 0.1289, + "step": 3167 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002755691457762691, + "loss": 0.2648, + "step": 3168 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002755533292563944, + "loss": 0.202, + "step": 3169 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002755375080725816, + "loss": 0.1639, + "step": 3170 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027552168222541855, + "loss": 0.2008, + "step": 3171 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027550585171549316, + "loss": 0.3806, + "step": 3172 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027549001654339337, + "loss": 0.3185, + "step": 3173 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002754741767097074, + "loss": 0.3633, + "step": 3174 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027545833221502385, + "loss": 0.3652, + "step": 3175 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027544248305993107, + "loss": 0.3253, + "step": 3176 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027542662924501796, + "loss": 0.1065, + "step": 3177 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027541077077087336, + "loss": 0.3297, + "step": 3178 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002753949076380863, + "loss": 0.284, + "step": 3179 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002753790398472463, + "loss": 0.2489, + "step": 3180 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027536316739894257, + "loss": 0.3624, + "step": 3181 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002753472902937648, + "loss": 0.1805, + "step": 3182 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002753314085323027, + "loss": 0.1206, + "step": 3183 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027531552211514635, + "loss": 0.2407, + "step": 3184 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027529963104288587, + "loss": 0.1456, + "step": 3185 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002752837353161115, + "loss": 0.2735, + "step": 3186 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027526783493541377, + "loss": 0.277, + "step": 3187 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027525192990138334, + "loss": 0.1549, + "step": 3188 + }, + { + "epoch": 0.97, + "learning_rate": 0.000275236020214611, + "loss": 0.0884, + "step": 3189 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027522010587568773, + "loss": 0.2218, + "step": 3190 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027520418688520477, + "loss": 0.2646, + "step": 3191 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027518826324375345, + "loss": 0.2875, + "step": 3192 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002751723349519252, + "loss": 0.1993, + "step": 3193 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027515640201031174, + "loss": 0.437, + "step": 3194 + }, + { + "epoch": 0.97, + "learning_rate": 0.000275140464419505, + "loss": 0.2915, + "step": 3195 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027512452218009703, + "loss": 0.2211, + "step": 3196 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027510857529267996, + "loss": 0.286, + "step": 3197 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027509262375784616, + "loss": 0.1374, + "step": 3198 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027507666757618825, + "loss": 0.3067, + "step": 3199 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002750607067482989, + "loss": 0.282, + "step": 3200 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027504474127477106, + "loss": 0.185, + "step": 3201 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027502877115619773, + "loss": 0.2296, + "step": 3202 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027501279639317226, + "loss": 0.2104, + "step": 3203 + }, + { + "epoch": 0.97, + "learning_rate": 0.000274996816986288, + "loss": 0.2042, + "step": 3204 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027498083293613844, + "loss": 0.1803, + "step": 3205 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027496484424331755, + "loss": 0.287, + "step": 3206 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002749488509084191, + "loss": 0.258, + "step": 3207 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027493285293203726, + "loss": 0.0719, + "step": 3208 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002749168503147663, + "loss": 0.1313, + "step": 3209 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027490084305720067, + "loss": 0.233, + "step": 3210 + }, + { + "epoch": 0.97, + "learning_rate": 0.00027488483115993496, + "loss": 0.2434, + "step": 3211 + }, + { + "epoch": 0.98, + "learning_rate": 0.000274868814623564, + "loss": 0.1712, + "step": 3212 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002748527934486828, + "loss": 0.2347, + "step": 3213 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027483676763588637, + "loss": 0.1736, + "step": 3214 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027482073718577017, + "loss": 0.1758, + "step": 3215 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002748047020989296, + "loss": 0.139, + "step": 3216 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002747886623759603, + "loss": 0.3934, + "step": 3217 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002747726180174582, + "loss": 0.2685, + "step": 3218 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027475656902401917, + "loss": 0.2739, + "step": 3219 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027474051539623946, + "loss": 0.4702, + "step": 3220 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027472445713471545, + "loss": 0.1935, + "step": 3221 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002747083942400436, + "loss": 0.288, + "step": 3222 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002746923267128205, + "loss": 0.078, + "step": 3223 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027467625455364326, + "loss": 0.3898, + "step": 3224 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002746601777631087, + "loss": 0.0758, + "step": 3225 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002746440963418141, + "loss": 0.3802, + "step": 3226 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027462801029035685, + "loss": 0.1821, + "step": 3227 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027461191960933445, + "loss": 0.1343, + "step": 3228 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027459582429934475, + "loss": 0.1599, + "step": 3229 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002745797243609855, + "loss": 0.3983, + "step": 3230 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002745636197948548, + "loss": 0.1683, + "step": 3231 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002745475106015509, + "loss": 0.2656, + "step": 3232 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027453139678167225, + "loss": 0.2235, + "step": 3233 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027451527833581736, + "loss": 0.3542, + "step": 3234 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027449915526458503, + "loss": 0.1997, + "step": 3235 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027448302756857417, + "loss": 0.2391, + "step": 3236 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002744668952483839, + "loss": 0.2282, + "step": 3237 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002744507583046134, + "loss": 0.2222, + "step": 3238 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027443461673786227, + "loss": 0.2152, + "step": 3239 + }, + { + "epoch": 0.98, + "learning_rate": 0.00027441847054873, + "loss": 0.3113, + "step": 3240 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002744023197378164, + "loss": 0.2279, + "step": 3241 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002743861643057214, + "loss": 0.1478, + "step": 3242 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002743700042530452, + "loss": 0.1816, + "step": 3243 + }, + { + "epoch": 0.98, + "learning_rate": 0.000274353839580388, + "loss": 0.2872, + "step": 3244 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027433767028835036, + "loss": 0.2878, + "step": 3245 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002743214963775329, + "loss": 0.2918, + "step": 3246 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002743053178485364, + "loss": 0.1381, + "step": 3247 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027428913470196187, + "loss": 0.0711, + "step": 3248 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027427294693841043, + "loss": 0.2049, + "step": 3249 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027425675455848347, + "loss": 0.2856, + "step": 3250 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027424055756278243, + "loss": 0.1846, + "step": 3251 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027422435595190906, + "loss": 0.4586, + "step": 3252 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002742081497264651, + "loss": 0.3633, + "step": 3253 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027419193888705265, + "loss": 0.2142, + "step": 3254 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002741757234342739, + "loss": 0.2346, + "step": 3255 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027415950336873103, + "loss": 0.2727, + "step": 3256 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002741432786910268, + "loss": 0.2768, + "step": 3257 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002741270494017638, + "loss": 0.0553, + "step": 3258 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002741108155015449, + "loss": 0.2747, + "step": 3259 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027409457699097324, + "loss": 0.1087, + "step": 3260 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027407833387065186, + "loss": 0.2803, + "step": 3261 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027406208614118424, + "loss": 0.2811, + "step": 3262 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027404583380317396, + "loss": 0.297, + "step": 3263 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002740295768572247, + "loss": 0.242, + "step": 3264 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002740133153039403, + "loss": 0.2259, + "step": 3265 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027399704914392504, + "loss": 0.3109, + "step": 3266 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002739807783777829, + "loss": 0.2662, + "step": 3267 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027396450300611855, + "loss": 0.2297, + "step": 3268 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027394822302953634, + "loss": 0.2364, + "step": 3269 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002739319384486411, + "loss": 0.2125, + "step": 3270 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027391564926403783, + "loss": 0.294, + "step": 3271 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027389935547633156, + "loss": 0.2445, + "step": 3272 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002738830570861276, + "loss": 0.1727, + "step": 3273 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002738667540940313, + "loss": 0.1416, + "step": 3274 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002738504465006483, + "loss": 0.302, + "step": 3275 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002738341343065845, + "loss": 0.3265, + "step": 3276 + }, + { + "epoch": 0.99, + "learning_rate": 0.00027381781751244564, + "loss": 0.0901, + "step": 3277 + }, + { + "epoch": 1.0, + "learning_rate": 0.000273801496118838, + "loss": 0.2115, + "step": 3278 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002737851701263678, + "loss": 0.2371, + "step": 3279 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027376883953564155, + "loss": 0.2913, + "step": 3280 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027375250434726587, + "loss": 0.1525, + "step": 3281 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002737361645618475, + "loss": 0.3679, + "step": 3282 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027371982017999354, + "loss": 0.1511, + "step": 3283 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002737034712023111, + "loss": 0.2491, + "step": 3284 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027368711762940735, + "loss": 0.2417, + "step": 3285 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027367075946188997, + "loss": 0.0439, + "step": 3286 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002736543967003665, + "loss": 0.3527, + "step": 3287 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002736380293454448, + "loss": 0.1656, + "step": 3288 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027362165739773286, + "loss": 0.2286, + "step": 3289 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002736052808578389, + "loss": 0.1586, + "step": 3290 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002735888997263712, + "loss": 0.29, + "step": 3291 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027357251400393835, + "loss": 0.0726, + "step": 3292 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002735561236911489, + "loss": 0.235, + "step": 3293 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027353972878861187, + "loss": 0.12, + "step": 3294 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027352332929693614, + "loss": 0.1328, + "step": 3295 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027350692521673094, + "loss": 0.168, + "step": 3296 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027349051654860574, + "loss": 0.2157, + "step": 3297 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027347410329316987, + "loss": 0.0155, + "step": 3298 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027345768545103323, + "loss": 0.1662, + "step": 3299 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027344126302280555, + "loss": 0.1112, + "step": 3300 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027342483600909694, + "loss": 0.1342, + "step": 3301 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027340840441051766, + "loss": 0.1215, + "step": 3302 + }, + { + "epoch": 1.0, + "learning_rate": 0.000273391968227678, + "loss": 0.0831, + "step": 3303 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002733755274611886, + "loss": 0.3257, + "step": 3304 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002733590821116601, + "loss": 0.1576, + "step": 3305 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027334263217970354, + "loss": 0.2056, + "step": 3306 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002733261776659298, + "loss": 0.12, + "step": 3307 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027330971857095026, + "loss": 0.2468, + "step": 3308 + }, + { + "epoch": 1.0, + "learning_rate": 0.00027329325489537626, + "loss": 0.1675, + "step": 3309 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027327678663981945, + "loss": 0.0973, + "step": 3310 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027326031380489146, + "loss": 0.1616, + "step": 3311 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002732438363912043, + "loss": 0.2437, + "step": 3312 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027322735439937, + "loss": 0.0661, + "step": 3313 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027321086783000086, + "loss": 0.118, + "step": 3314 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002731943766837093, + "loss": 0.0296, + "step": 3315 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002731778809611079, + "loss": 0.1171, + "step": 3316 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002731613806628094, + "loss": 0.3711, + "step": 3317 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027314487578942683, + "loss": 0.2328, + "step": 3318 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027312836634157327, + "loss": 0.1042, + "step": 3319 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002731118523198619, + "loss": 0.1437, + "step": 3320 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027309533372490627, + "loss": 0.2858, + "step": 3321 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027307881055731994, + "loss": 0.3004, + "step": 3322 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002730622828177168, + "loss": 0.2877, + "step": 3323 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002730457505067107, + "loss": 0.2514, + "step": 3324 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002730292136249157, + "loss": 0.158, + "step": 3325 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002730126721729463, + "loss": 0.154, + "step": 3326 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027299612615141685, + "loss": 0.0426, + "step": 3327 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027297957556094197, + "loss": 0.1141, + "step": 3328 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002729630204021365, + "loss": 0.1253, + "step": 3329 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002729464606756154, + "loss": 0.1381, + "step": 3330 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027292989638199385, + "loss": 0.2154, + "step": 3331 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002729133275218871, + "loss": 0.3223, + "step": 3332 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002728967540959107, + "loss": 0.1823, + "step": 3333 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002728801761046803, + "loss": 0.1217, + "step": 3334 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027286359354881164, + "loss": 0.2968, + "step": 3335 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002728470064289208, + "loss": 0.1094, + "step": 3336 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027283041474562397, + "loss": 0.255, + "step": 3337 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027281381849953736, + "loss": 0.2227, + "step": 3338 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027279721769127757, + "loss": 0.1967, + "step": 3339 + }, + { + "epoch": 1.01, + "learning_rate": 0.00027278061232146125, + "loss": 0.0713, + "step": 3340 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002727640023907052, + "loss": 0.0947, + "step": 3341 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002727473878996265, + "loss": 0.1423, + "step": 3342 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027273076884884223, + "loss": 0.1496, + "step": 3343 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027271414523896985, + "loss": 0.1354, + "step": 3344 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027269751707062684, + "loss": 0.0356, + "step": 3345 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002726808843444309, + "loss": 0.1584, + "step": 3346 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002726642470609998, + "loss": 0.0764, + "step": 3347 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027264760522095164, + "loss": 0.1286, + "step": 3348 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027263095882490467, + "loss": 0.2448, + "step": 3349 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002726143078734771, + "loss": 0.1866, + "step": 3350 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027259765236728765, + "loss": 0.2669, + "step": 3351 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002725809923069549, + "loss": 0.1869, + "step": 3352 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027256432769309774, + "loss": 0.2645, + "step": 3353 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027254765852633523, + "loss": 0.0488, + "step": 3354 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002725309848072866, + "loss": 0.0975, + "step": 3355 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027251430653657115, + "loss": 0.1217, + "step": 3356 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002724976237148085, + "loss": 0.1601, + "step": 3357 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027248093634261837, + "loss": 0.158, + "step": 3358 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027246424442062056, + "loss": 0.1234, + "step": 3359 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027244754794943527, + "loss": 0.085, + "step": 3360 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027243084692968263, + "loss": 0.1505, + "step": 3361 + }, + { + "epoch": 1.02, + "learning_rate": 0.000272414141361983, + "loss": 0.1158, + "step": 3362 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027239743124695705, + "loss": 0.1963, + "step": 3363 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002723807165852255, + "loss": 0.172, + "step": 3364 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002723639973774091, + "loss": 0.2331, + "step": 3365 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002723472736241291, + "loss": 0.2871, + "step": 3366 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002723305453260066, + "loss": 0.2539, + "step": 3367 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002723138124836632, + "loss": 0.1854, + "step": 3368 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027229707509772026, + "loss": 0.1978, + "step": 3369 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002722803331687996, + "loss": 0.1874, + "step": 3370 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027226358669752316, + "loss": 0.2274, + "step": 3371 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027224683568451304, + "loss": 0.2186, + "step": 3372 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002722300801303914, + "loss": 0.1261, + "step": 3373 + }, + { + "epoch": 1.02, + "learning_rate": 0.00027221332003578074, + "loss": 0.186, + "step": 3374 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002721965554013037, + "loss": 0.0777, + "step": 3375 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002721797862275829, + "loss": 0.2326, + "step": 3376 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002721630125152413, + "loss": 0.2971, + "step": 3377 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002721462342649021, + "loss": 0.2964, + "step": 3378 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027212945147718845, + "loss": 0.1038, + "step": 3379 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027211266415272384, + "loss": 0.2724, + "step": 3380 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002720958722921318, + "loss": 0.0877, + "step": 3381 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002720790758960362, + "loss": 0.1163, + "step": 3382 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027206227496506086, + "loss": 0.2903, + "step": 3383 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027204546949983, + "loss": 0.1815, + "step": 3384 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002720286595009679, + "loss": 0.0009, + "step": 3385 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002720118449690988, + "loss": 0.2752, + "step": 3386 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002719950259048476, + "loss": 0.2314, + "step": 3387 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002719782023088389, + "loss": 0.0968, + "step": 3388 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027196137418169765, + "loss": 0.1484, + "step": 3389 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027194454152404895, + "loss": 0.1342, + "step": 3390 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027192770433651824, + "loss": 0.1346, + "step": 3391 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002719108626197308, + "loss": 0.1536, + "step": 3392 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002718940163743123, + "loss": 0.2062, + "step": 3393 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027187716560088854, + "loss": 0.3212, + "step": 3394 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027186031030008556, + "loss": 0.1734, + "step": 3395 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002718434504725293, + "loss": 0.2323, + "step": 3396 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027182658611884624, + "loss": 0.1788, + "step": 3397 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027180971723966276, + "loss": 0.2083, + "step": 3398 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002717928438356054, + "loss": 0.0924, + "step": 3399 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002717759659073011, + "loss": 0.15, + "step": 3400 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002717590834553768, + "loss": 0.1883, + "step": 3401 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027174219648045956, + "loss": 0.2759, + "step": 3402 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002717253049831768, + "loss": 0.1687, + "step": 3403 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027170840896415584, + "loss": 0.2356, + "step": 3404 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002716915084240244, + "loss": 0.1936, + "step": 3405 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002716746033634103, + "loss": 0.0651, + "step": 3406 + }, + { + "epoch": 1.03, + "learning_rate": 0.0002716576937829415, + "loss": 0.2496, + "step": 3407 + }, + { + "epoch": 1.03, + "learning_rate": 0.00027164077968324616, + "loss": 0.1205, + "step": 3408 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027162386106495244, + "loss": 0.2588, + "step": 3409 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027160693792868906, + "loss": 0.244, + "step": 3410 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027159001027508454, + "loss": 0.2204, + "step": 3411 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027157307810476766, + "loss": 0.1623, + "step": 3412 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002715561414183674, + "loss": 0.2434, + "step": 3413 + }, + { + "epoch": 1.04, + "learning_rate": 0.000271539200216513, + "loss": 0.2131, + "step": 3414 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027152225449983376, + "loss": 0.1353, + "step": 3415 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002715053042689591, + "loss": 0.1493, + "step": 3416 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027148834952451866, + "loss": 0.18, + "step": 3417 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027147139026714235, + "loss": 0.1453, + "step": 3418 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027145442649746004, + "loss": 0.2393, + "step": 3419 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027143745821610197, + "loss": 0.1629, + "step": 3420 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027142048542369844, + "loss": 0.2177, + "step": 3421 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027140350812088, + "loss": 0.1553, + "step": 3422 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002713865263082772, + "loss": 0.2831, + "step": 3423 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027136953998652087, + "loss": 0.1238, + "step": 3424 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002713525491562421, + "loss": 0.0598, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027133555381807196, + "loss": 0.1356, + "step": 3426 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027131855397264184, + "loss": 0.1114, + "step": 3427 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002713015496205832, + "loss": 0.1725, + "step": 3428 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027128454076252764, + "loss": 0.1527, + "step": 3429 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002712675273991071, + "loss": 0.1478, + "step": 3430 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027125050953095357, + "loss": 0.1048, + "step": 3431 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027123348715869913, + "loss": 0.1467, + "step": 3432 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002712164602829762, + "loss": 0.1195, + "step": 3433 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027119942890441715, + "loss": 0.2043, + "step": 3434 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027118239302365477, + "loss": 0.1906, + "step": 3435 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002711653526413219, + "loss": 0.0735, + "step": 3436 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027114830775805144, + "loss": 0.0968, + "step": 3437 + }, + { + "epoch": 1.04, + "learning_rate": 0.00027113125837447663, + "loss": 0.2562, + "step": 3438 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002711142044912308, + "loss": 0.1465, + "step": 3439 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002710971461089474, + "loss": 0.1476, + "step": 3440 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002710800832282602, + "loss": 0.0644, + "step": 3441 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002710630158498029, + "loss": 0.1899, + "step": 3442 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027104594397420965, + "loss": 0.2403, + "step": 3443 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002710288676021145, + "loss": 0.1924, + "step": 3444 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002710117867341518, + "loss": 0.0795, + "step": 3445 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027099470137095615, + "loss": 0.1538, + "step": 3446 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027097761151316215, + "loss": 0.0977, + "step": 3447 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002709605171614046, + "loss": 0.1705, + "step": 3448 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002709434183163186, + "loss": 0.4089, + "step": 3449 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027092631497853924, + "loss": 0.1601, + "step": 3450 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002709092071487019, + "loss": 0.2171, + "step": 3451 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002708920948274421, + "loss": 0.2063, + "step": 3452 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002708749780153955, + "loss": 0.1823, + "step": 3453 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002708578567131979, + "loss": 0.2004, + "step": 3454 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027084073092148534, + "loss": 0.1014, + "step": 3455 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027082360064089396, + "loss": 0.1065, + "step": 3456 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002708064658720602, + "loss": 0.1989, + "step": 3457 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002707893266156204, + "loss": 0.196, + "step": 3458 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027077218287221137, + "loss": 0.1608, + "step": 3459 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002707550346424699, + "loss": 0.0986, + "step": 3460 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027073788192703304, + "loss": 0.2536, + "step": 3461 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002707207247265379, + "loss": 0.215, + "step": 3462 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002707035630416219, + "loss": 0.2487, + "step": 3463 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027068639687292245, + "loss": 0.1762, + "step": 3464 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027066922622107726, + "loss": 0.1521, + "step": 3465 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002706520510867241, + "loss": 0.2344, + "step": 3466 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002706348714705012, + "loss": 0.1818, + "step": 3467 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027061768737304646, + "loss": 0.1322, + "step": 3468 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027060049879499843, + "loss": 0.1189, + "step": 3469 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002705833057369955, + "loss": 0.2243, + "step": 3470 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027056610819967635, + "loss": 0.1345, + "step": 3471 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027054890618367986, + "loss": 0.1527, + "step": 3472 + }, + { + "epoch": 1.05, + "learning_rate": 0.000270531699689645, + "loss": 0.2254, + "step": 3473 + }, + { + "epoch": 1.05, + "learning_rate": 0.00027051448871821094, + "loss": 0.1804, + "step": 3474 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002704972732700171, + "loss": 0.3136, + "step": 3475 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002704800533457028, + "loss": 0.1994, + "step": 3476 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002704628289459079, + "loss": 0.2257, + "step": 3477 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027044560007127215, + "loss": 0.1882, + "step": 3478 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002704283667224356, + "loss": 0.2816, + "step": 3479 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002704111289000383, + "loss": 0.1083, + "step": 3480 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002703938866047207, + "loss": 0.2249, + "step": 3481 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027037663983712324, + "loss": 0.1456, + "step": 3482 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027035938859788663, + "loss": 0.1586, + "step": 3483 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002703421328876517, + "loss": 0.2103, + "step": 3484 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002703248727070594, + "loss": 0.1177, + "step": 3485 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027030760805675096, + "loss": 0.113, + "step": 3486 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027029033893736766, + "loss": 0.1922, + "step": 3487 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027027306534955104, + "loss": 0.1268, + "step": 3488 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002702557872939428, + "loss": 0.2549, + "step": 3489 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002702385047711846, + "loss": 0.07, + "step": 3490 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027022121778191854, + "loss": 0.1017, + "step": 3491 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027020392632678687, + "loss": 0.1589, + "step": 3492 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027018663040643176, + "loss": 0.2737, + "step": 3493 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002701693300214958, + "loss": 0.1311, + "step": 3494 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027015202517262165, + "loss": 0.1355, + "step": 3495 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002701347158604521, + "loss": 0.294, + "step": 3496 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002701174020856301, + "loss": 0.3069, + "step": 3497 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002701000838487989, + "loss": 0.2206, + "step": 3498 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027008276115060174, + "loss": 0.1438, + "step": 3499 + }, + { + "epoch": 1.06, + "learning_rate": 0.00027006543399168216, + "loss": 0.327, + "step": 3500 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002700481023726838, + "loss": 0.1438, + "step": 3501 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002700307662942505, + "loss": 0.2474, + "step": 3502 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002700134257570262, + "loss": 0.1585, + "step": 3503 + }, + { + "epoch": 1.06, + "learning_rate": 0.00026999608076165506, + "loss": 0.2062, + "step": 3504 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002699787313087814, + "loss": 0.2665, + "step": 3505 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002699613773990497, + "loss": 0.1386, + "step": 3506 + }, + { + "epoch": 1.06, + "learning_rate": 0.00026994401903310467, + "loss": 0.1833, + "step": 3507 + }, + { + "epoch": 1.07, + "learning_rate": 0.000269926656211591, + "loss": 0.2064, + "step": 3508 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002699092889351538, + "loss": 0.1216, + "step": 3509 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002698919172044381, + "loss": 0.1928, + "step": 3510 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026987454102008927, + "loss": 0.1997, + "step": 3511 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026985716038275273, + "loss": 0.0409, + "step": 3512 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026983977529307417, + "loss": 0.1358, + "step": 3513 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026982238575169943, + "loss": 0.2152, + "step": 3514 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002698049917592744, + "loss": 0.1408, + "step": 3515 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026978759331644525, + "loss": 0.1888, + "step": 3516 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026977019042385834, + "loss": 0.2008, + "step": 3517 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026975278308216004, + "loss": 0.1871, + "step": 3518 + }, + { + "epoch": 1.07, + "learning_rate": 0.000269735371291997, + "loss": 0.0356, + "step": 3519 + }, + { + "epoch": 1.07, + "learning_rate": 0.000269717955054016, + "loss": 0.1143, + "step": 3520 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026970053436886415, + "loss": 0.1413, + "step": 3521 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002696831092371884, + "loss": 0.2605, + "step": 3522 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002696656796596361, + "loss": 0.1637, + "step": 3523 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026964824563685476, + "loss": 0.1947, + "step": 3524 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002696308071694919, + "loss": 0.0882, + "step": 3525 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002696133642581955, + "loss": 0.2384, + "step": 3526 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002695959169036133, + "loss": 0.2175, + "step": 3527 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002695784651063934, + "loss": 0.2742, + "step": 3528 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002695610088671843, + "loss": 0.2204, + "step": 3529 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002695435481866343, + "loss": 0.1551, + "step": 3530 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026952608306539204, + "loss": 0.1122, + "step": 3531 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002695086135041063, + "loss": 0.124, + "step": 3532 + }, + { + "epoch": 1.07, + "learning_rate": 0.000269491139503426, + "loss": 0.1622, + "step": 3533 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002694736610640003, + "loss": 0.0543, + "step": 3534 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002694561781864785, + "loss": 0.1253, + "step": 3535 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002694386908715099, + "loss": 0.1036, + "step": 3536 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026942119911974423, + "loss": 0.267, + "step": 3537 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026940370293183116, + "loss": 0.2265, + "step": 3538 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026938620230842076, + "loss": 0.2456, + "step": 3539 + }, + { + "epoch": 1.07, + "learning_rate": 0.00026936869725016296, + "loss": 0.3714, + "step": 3540 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002693511877577081, + "loss": 0.2474, + "step": 3541 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026933367383170665, + "loss": 0.1709, + "step": 3542 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002693161554728092, + "loss": 0.1516, + "step": 3543 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002692986326816664, + "loss": 0.1371, + "step": 3544 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026928110545892925, + "loss": 0.0294, + "step": 3545 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002692635738052488, + "loss": 0.1036, + "step": 3546 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026924603772127636, + "loss": 0.1972, + "step": 3547 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002692284972076633, + "loss": 0.107, + "step": 3548 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002692109522650612, + "loss": 0.1463, + "step": 3549 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002691934028941218, + "loss": 0.1722, + "step": 3550 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026917584909549706, + "loss": 0.2558, + "step": 3551 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026915829086983893, + "loss": 0.0222, + "step": 3552 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026914072821779977, + "loss": 0.249, + "step": 3553 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002691231611400319, + "loss": 0.248, + "step": 3554 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026910558963718793, + "loss": 0.2919, + "step": 3555 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026908801370992063, + "loss": 0.1375, + "step": 3556 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002690704333588828, + "loss": 0.1546, + "step": 3557 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026905284858472754, + "loss": 0.2869, + "step": 3558 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002690352593881081, + "loss": 0.2706, + "step": 3559 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002690176657696778, + "loss": 0.3337, + "step": 3560 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002690000677300903, + "loss": 0.3202, + "step": 3561 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026898246526999914, + "loss": 0.3265, + "step": 3562 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026896485839005837, + "loss": 0.0931, + "step": 3563 + }, + { + "epoch": 1.08, + "learning_rate": 0.000268947247090922, + "loss": 0.263, + "step": 3564 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026892963137324416, + "loss": 0.1148, + "step": 3565 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026891201123767936, + "loss": 0.1303, + "step": 3566 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026889438668488197, + "loss": 0.1031, + "step": 3567 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002688767577155068, + "loss": 0.1718, + "step": 3568 + }, + { + "epoch": 1.08, + "learning_rate": 0.00026885912433020865, + "loss": 0.1485, + "step": 3569 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002688414865296426, + "loss": 0.1781, + "step": 3570 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002688238443144638, + "loss": 0.1434, + "step": 3571 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002688061976853277, + "loss": 0.1919, + "step": 3572 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002687885466428897, + "loss": 0.2997, + "step": 3573 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026877089118780556, + "loss": 0.1916, + "step": 3574 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026875323132073106, + "loss": 0.1202, + "step": 3575 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002687355670423223, + "loss": 0.1297, + "step": 3576 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002687178983532354, + "loss": 0.1427, + "step": 3577 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026870022525412674, + "loss": 0.262, + "step": 3578 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002686825477456527, + "loss": 0.2571, + "step": 3579 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002686648658284702, + "loss": 0.2699, + "step": 3580 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026864717950323585, + "loss": 0.0886, + "step": 3581 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026862948877060666, + "loss": 0.1484, + "step": 3582 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026861179363123986, + "loss": 0.2065, + "step": 3583 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002685940940857928, + "loss": 0.1554, + "step": 3584 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002685763901349229, + "loss": 0.1517, + "step": 3585 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026855868177928777, + "loss": 0.2371, + "step": 3586 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002685409690195453, + "loss": 0.1919, + "step": 3587 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026852325185635354, + "loss": 0.1272, + "step": 3588 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026850553029037047, + "loss": 0.0997, + "step": 3589 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002684878043222545, + "loss": 0.136, + "step": 3590 + }, + { + "epoch": 1.09, + "learning_rate": 0.000268470073952664, + "loss": 0.0671, + "step": 3591 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002684523391822577, + "loss": 0.1119, + "step": 3592 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026843460001169437, + "loss": 0.1314, + "step": 3593 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026841685644163294, + "loss": 0.2294, + "step": 3594 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002683991084727326, + "loss": 0.1502, + "step": 3595 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002683813561056525, + "loss": 0.1902, + "step": 3596 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002683635993410522, + "loss": 0.1619, + "step": 3597 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002683458381795913, + "loss": 0.2618, + "step": 3598 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026832807262192954, + "loss": 0.1426, + "step": 3599 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002683103026687269, + "loss": 0.1227, + "step": 3600 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002682925283206434, + "loss": 0.3051, + "step": 3601 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026827474957833943, + "loss": 0.1897, + "step": 3602 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026825696644247534, + "loss": 0.2249, + "step": 3603 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026823917891371173, + "loss": 0.0778, + "step": 3604 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002682213869927093, + "loss": 0.151, + "step": 3605 + }, + { + "epoch": 1.09, + "learning_rate": 0.00026820359068012906, + "loss": 0.173, + "step": 3606 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002681857899766321, + "loss": 0.1524, + "step": 3607 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002681679848828796, + "loss": 0.2961, + "step": 3608 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026815017539953294, + "loss": 0.24, + "step": 3609 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026813236152725375, + "loss": 0.2898, + "step": 3610 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002681145432667038, + "loss": 0.1952, + "step": 3611 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026809672061854493, + "loss": 0.1728, + "step": 3612 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002680788935834391, + "loss": 0.1653, + "step": 3613 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002680610621620487, + "loss": 0.2511, + "step": 3614 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002680432263550361, + "loss": 0.2217, + "step": 3615 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002680253861630638, + "loss": 0.3184, + "step": 3616 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002680075415867944, + "loss": 0.3028, + "step": 3617 + }, + { + "epoch": 1.1, + "learning_rate": 0.000267989692626891, + "loss": 0.2388, + "step": 3618 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026797183928401646, + "loss": 0.0988, + "step": 3619 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002679539815588341, + "loss": 0.2091, + "step": 3620 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002679361194520072, + "loss": 0.0948, + "step": 3621 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026791825296419925, + "loss": 0.1498, + "step": 3622 + }, + { + "epoch": 1.1, + "learning_rate": 0.000267900382096074, + "loss": 0.408, + "step": 3623 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026788250684829537, + "loss": 0.1844, + "step": 3624 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026786462722152725, + "loss": 0.2828, + "step": 3625 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002678467432164339, + "loss": 0.2168, + "step": 3626 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026782885483367955, + "loss": 0.243, + "step": 3627 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002678109620739288, + "loss": 0.3528, + "step": 3628 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026779306493784634, + "loss": 0.2141, + "step": 3629 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002677751634260969, + "loss": 0.1311, + "step": 3630 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002677572575393455, + "loss": 0.1363, + "step": 3631 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026773934727825733, + "loss": 0.172, + "step": 3632 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002677214326434977, + "loss": 0.0915, + "step": 3633 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026770351363573203, + "loss": 0.1443, + "step": 3634 + }, + { + "epoch": 1.1, + "learning_rate": 0.000267685590255626, + "loss": 0.0763, + "step": 3635 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002676676625038454, + "loss": 0.1919, + "step": 3636 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026764973038105616, + "loss": 0.1521, + "step": 3637 + }, + { + "epoch": 1.1, + "learning_rate": 0.0002676317938879245, + "loss": 0.2586, + "step": 3638 + }, + { + "epoch": 1.1, + "learning_rate": 0.00026761385302511657, + "loss": 0.0773, + "step": 3639 + }, + { + "epoch": 1.11, + "learning_rate": 0.000267595907793299, + "loss": 0.1248, + "step": 3640 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026757795819313824, + "loss": 0.1474, + "step": 3641 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002675600042253011, + "loss": 0.2726, + "step": 3642 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002675420458904546, + "loss": 0.0246, + "step": 3643 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002675240831892658, + "loss": 0.1317, + "step": 3644 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002675061161224019, + "loss": 0.1106, + "step": 3645 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026748814469053036, + "loss": 0.2021, + "step": 3646 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026747016889431884, + "loss": 0.1946, + "step": 3647 + }, + { + "epoch": 1.11, + "learning_rate": 0.000267452188734435, + "loss": 0.0935, + "step": 3648 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026743420421154675, + "loss": 0.1449, + "step": 3649 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002674162153263222, + "loss": 0.0623, + "step": 3650 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002673982220794295, + "loss": 0.0668, + "step": 3651 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002673802244715372, + "loss": 0.2031, + "step": 3652 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002673622225033138, + "loss": 0.221, + "step": 3653 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002673442161754279, + "loss": 0.1454, + "step": 3654 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026732620548854856, + "loss": 0.2261, + "step": 3655 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026730819044334475, + "loss": 0.3088, + "step": 3656 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026729017104048565, + "loss": 0.0615, + "step": 3657 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002672721472806406, + "loss": 0.1524, + "step": 3658 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002672541191644792, + "loss": 0.1791, + "step": 3659 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002672360866926711, + "loss": 0.1723, + "step": 3660 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002672180498658862, + "loss": 0.2944, + "step": 3661 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026720000868479446, + "loss": 0.1147, + "step": 3662 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002671819631500661, + "loss": 0.0647, + "step": 3663 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002671639132623715, + "loss": 0.2812, + "step": 3664 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026714585902238105, + "loss": 0.2413, + "step": 3665 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026712780043076543, + "loss": 0.0039, + "step": 3666 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002671097374881956, + "loss": 0.1738, + "step": 3667 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002670916701953423, + "loss": 0.2237, + "step": 3668 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002670735985528769, + "loss": 0.2299, + "step": 3669 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002670555225614707, + "loss": 0.1246, + "step": 3670 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026703744222179496, + "loss": 0.1813, + "step": 3671 + }, + { + "epoch": 1.11, + "learning_rate": 0.00026701935753452153, + "loss": 0.1145, + "step": 3672 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026700126850032213, + "loss": 0.138, + "step": 3673 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002669831751198687, + "loss": 0.1579, + "step": 3674 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026696507739383334, + "loss": 0.1209, + "step": 3675 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026694697532288837, + "loss": 0.1882, + "step": 3676 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002669288689077062, + "loss": 0.2058, + "step": 3677 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026691075814895944, + "loss": 0.1966, + "step": 3678 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026689264304732087, + "loss": 0.1074, + "step": 3679 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002668745236034634, + "loss": 0.1638, + "step": 3680 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002668563998180601, + "loss": 0.2322, + "step": 3681 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026683827169178423, + "loss": 0.1967, + "step": 3682 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002668201392253092, + "loss": 0.2587, + "step": 3683 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002668020024193085, + "loss": 0.2075, + "step": 3684 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026678386127445603, + "loss": 0.0564, + "step": 3685 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002667657157914255, + "loss": 0.1575, + "step": 3686 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002667475659708911, + "loss": 0.1333, + "step": 3687 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002667294118135269, + "loss": 0.2431, + "step": 3688 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002667112533200075, + "loss": 0.166, + "step": 3689 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026669309049100713, + "loss": 0.0729, + "step": 3690 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002666749233272008, + "loss": 0.2378, + "step": 3691 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002666567518292631, + "loss": 0.1181, + "step": 3692 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026663857599786917, + "loss": 0.1184, + "step": 3693 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026662039583369424, + "loss": 0.103, + "step": 3694 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026660221133741356, + "loss": 0.126, + "step": 3695 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026658402250970265, + "loss": 0.0791, + "step": 3696 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026656582935123723, + "loss": 0.2097, + "step": 3697 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026654763186269304, + "loss": 0.0891, + "step": 3698 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002665294300447461, + "loss": 0.1146, + "step": 3699 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026651122389807254, + "loss": 0.2203, + "step": 3700 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002664930134233487, + "loss": 0.2225, + "step": 3701 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026647479862125105, + "loss": 0.2219, + "step": 3702 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002664565794924562, + "loss": 0.1283, + "step": 3703 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026643835603764086, + "loss": 0.2195, + "step": 3704 + }, + { + "epoch": 1.12, + "learning_rate": 0.00026642012825748207, + "loss": 0.0967, + "step": 3705 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026640189615265695, + "loss": 0.1436, + "step": 3706 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026638365972384264, + "loss": 0.0532, + "step": 3707 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026636541897171676, + "loss": 0.0676, + "step": 3708 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002663471738969568, + "loss": 0.1536, + "step": 3709 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002663289245002405, + "loss": 0.1136, + "step": 3710 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026631067078224574, + "loss": 0.2113, + "step": 3711 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002662924127436507, + "loss": 0.3893, + "step": 3712 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002662741503851335, + "loss": 0.1268, + "step": 3713 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002662558837073726, + "loss": 0.2048, + "step": 3714 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026623761271104656, + "loss": 0.1537, + "step": 3715 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026621933739683404, + "loss": 0.2116, + "step": 3716 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026620105776541396, + "loss": 0.1494, + "step": 3717 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026618277381746535, + "loss": 0.1827, + "step": 3718 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002661644855536674, + "loss": 0.1376, + "step": 3719 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002661461929746994, + "loss": 0.2105, + "step": 3720 + }, + { + "epoch": 1.13, + "learning_rate": 0.000266127896081241, + "loss": 0.1402, + "step": 3721 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002661095948739718, + "loss": 0.2192, + "step": 3722 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026609128935357163, + "loss": 0.2155, + "step": 3723 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026607297952072046, + "loss": 0.1901, + "step": 3724 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002660546653760985, + "loss": 0.1281, + "step": 3725 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026603634692038607, + "loss": 0.2691, + "step": 3726 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026601802415426363, + "loss": 0.159, + "step": 3727 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026599969707841176, + "loss": 0.0159, + "step": 3728 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026598136569351143, + "loss": 0.1433, + "step": 3729 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026596303000024337, + "loss": 0.2711, + "step": 3730 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026594468999928877, + "loss": 0.1032, + "step": 3731 + }, + { + "epoch": 1.13, + "learning_rate": 0.000265926345691329, + "loss": 0.1518, + "step": 3732 + }, + { + "epoch": 1.13, + "learning_rate": 0.00026590799707704545, + "loss": 0.1114, + "step": 3733 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002658896441571197, + "loss": 0.1719, + "step": 3734 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002658712869322335, + "loss": 0.2186, + "step": 3735 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002658529254030688, + "loss": 0.178, + "step": 3736 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002658345595703077, + "loss": 0.21, + "step": 3737 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002658161894346323, + "loss": 0.1566, + "step": 3738 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002657978149967251, + "loss": 0.1673, + "step": 3739 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002657794362572687, + "loss": 0.2138, + "step": 3740 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002657610532169457, + "loss": 0.0823, + "step": 3741 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002657426658764391, + "loss": 0.1131, + "step": 3742 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026572427423643185, + "loss": 0.2023, + "step": 3743 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026570587829760715, + "loss": 0.1865, + "step": 3744 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026568747806064843, + "loss": 0.1588, + "step": 3745 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026566907352623914, + "loss": 0.1416, + "step": 3746 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002656506646950629, + "loss": 0.0707, + "step": 3747 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026563225156780366, + "loss": 0.1928, + "step": 3748 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026561383414514536, + "loss": 0.3247, + "step": 3749 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026559541242777214, + "loss": 0.1617, + "step": 3750 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026557698641636835, + "loss": 0.2316, + "step": 3751 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026555855611161845, + "loss": 0.1155, + "step": 3752 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002655401215142071, + "loss": 0.1152, + "step": 3753 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026552168262481893, + "loss": 0.1809, + "step": 3754 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002655032394441391, + "loss": 0.1559, + "step": 3755 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026548479197285266, + "loss": 0.1666, + "step": 3756 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002654663402116448, + "loss": 0.1511, + "step": 3757 + }, + { + "epoch": 1.14, + "learning_rate": 0.000265447884161201, + "loss": 0.1595, + "step": 3758 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002654294238222069, + "loss": 0.2037, + "step": 3759 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026541095919534814, + "loss": 0.2073, + "step": 3760 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002653924902813107, + "loss": 0.0684, + "step": 3761 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026537401708078066, + "loss": 0.1792, + "step": 3762 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002653555395944442, + "loss": 0.1384, + "step": 3763 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026533705782298776, + "loss": 0.1668, + "step": 3764 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026531857176709776, + "loss": 0.1861, + "step": 3765 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026530008142746107, + "loss": 0.2376, + "step": 3766 + }, + { + "epoch": 1.14, + "learning_rate": 0.00026528158680476447, + "loss": 0.2099, + "step": 3767 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002652630878996949, + "loss": 0.2086, + "step": 3768 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002652445847129397, + "loss": 0.2454, + "step": 3769 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002652260772451861, + "loss": 0.1789, + "step": 3770 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002652075654971216, + "loss": 0.2834, + "step": 3771 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026518904946943396, + "loss": 0.1846, + "step": 3772 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026517052916281086, + "loss": 0.1971, + "step": 3773 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026515200457794035, + "loss": 0.2402, + "step": 3774 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002651334757155106, + "loss": 0.251, + "step": 3775 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026511494257620973, + "loss": 0.0696, + "step": 3776 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002650964051607264, + "loss": 0.1461, + "step": 3777 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002650778634697491, + "loss": 0.2822, + "step": 3778 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002650593175039667, + "loss": 0.2344, + "step": 3779 + }, + { + "epoch": 1.15, + "learning_rate": 0.000265040767264068, + "loss": 0.1469, + "step": 3780 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002650222127507422, + "loss": 0.238, + "step": 3781 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026500365396467845, + "loss": 0.0659, + "step": 3782 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002649850909065662, + "loss": 0.1172, + "step": 3783 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026496652357709503, + "loss": 0.1031, + "step": 3784 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002649479519769547, + "loss": 0.1643, + "step": 3785 + }, + { + "epoch": 1.15, + "learning_rate": 0.000264929376106835, + "loss": 0.3211, + "step": 3786 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026491079596742596, + "loss": 0.2551, + "step": 3787 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026489221155941785, + "loss": 0.2387, + "step": 3788 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026487362288350096, + "loss": 0.187, + "step": 3789 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002648550299403659, + "loss": 0.2555, + "step": 3790 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002648364327307033, + "loss": 0.1631, + "step": 3791 + }, + { + "epoch": 1.15, + "learning_rate": 0.000264817831255204, + "loss": 0.2536, + "step": 3792 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002647992255145589, + "loss": 0.0432, + "step": 3793 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002647806155094592, + "loss": 0.3191, + "step": 3794 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002647620012405963, + "loss": 0.2235, + "step": 3795 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026474338270866154, + "loss": 0.1158, + "step": 3796 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002647247599143466, + "loss": 0.1545, + "step": 3797 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002647061328583433, + "loss": 0.1291, + "step": 3798 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002646875015413435, + "loss": 0.2717, + "step": 3799 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002646688659640393, + "loss": 0.2376, + "step": 3800 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026465022612712306, + "loss": 0.1332, + "step": 3801 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026463158203128713, + "loss": 0.3079, + "step": 3802 + }, + { + "epoch": 1.15, + "learning_rate": 0.00026461293367722403, + "loss": 0.0968, + "step": 3803 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002645942810656266, + "loss": 0.195, + "step": 3804 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002645756241971876, + "loss": 0.1677, + "step": 3805 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002645569630726002, + "loss": 0.1499, + "step": 3806 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026453829769255754, + "loss": 0.0622, + "step": 3807 + }, + { + "epoch": 1.16, + "learning_rate": 0.000264519628057753, + "loss": 0.1424, + "step": 3808 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002645009541688801, + "loss": 0.2397, + "step": 3809 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002644822760266326, + "loss": 0.1553, + "step": 3810 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026446359363170417, + "loss": 0.1589, + "step": 3811 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026444490698478894, + "loss": 0.0727, + "step": 3812 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026442621608658103, + "loss": 0.2694, + "step": 3813 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002644075209377747, + "loss": 0.248, + "step": 3814 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026438882153906454, + "loss": 0.2028, + "step": 3815 + }, + { + "epoch": 1.16, + "learning_rate": 0.000264370117891145, + "loss": 0.256, + "step": 3816 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026435140999471105, + "loss": 0.17, + "step": 3817 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026433269785045753, + "loss": 0.1803, + "step": 3818 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026431398145907955, + "loss": 0.1311, + "step": 3819 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002642952608212724, + "loss": 0.2244, + "step": 3820 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002642765359377315, + "loss": 0.1509, + "step": 3821 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002642578068091523, + "loss": 0.165, + "step": 3822 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002642390734362307, + "loss": 0.1023, + "step": 3823 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026422033581966257, + "loss": 0.1476, + "step": 3824 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026420159396014386, + "loss": 0.2295, + "step": 3825 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026418284785837087, + "loss": 0.1383, + "step": 3826 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002641640975150399, + "loss": 0.1182, + "step": 3827 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026414534293084743, + "loss": 0.1107, + "step": 3828 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002641265841064903, + "loss": 0.1112, + "step": 3829 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002641078210426652, + "loss": 0.163, + "step": 3830 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002640890537400691, + "loss": 0.1887, + "step": 3831 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002640702821993993, + "loss": 0.1598, + "step": 3832 + }, + { + "epoch": 1.16, + "learning_rate": 0.000264051506421353, + "loss": 0.2531, + "step": 3833 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026403272640662765, + "loss": 0.2543, + "step": 3834 + }, + { + "epoch": 1.16, + "learning_rate": 0.000264013942155921, + "loss": 0.2386, + "step": 3835 + }, + { + "epoch": 1.16, + "learning_rate": 0.00026399515366993067, + "loss": 0.2864, + "step": 3836 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026397636094935474, + "loss": 0.2927, + "step": 3837 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002639575639948912, + "loss": 0.0652, + "step": 3838 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002639387628072383, + "loss": 0.2077, + "step": 3839 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002639199573870945, + "loss": 0.2993, + "step": 3840 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002639011477351584, + "loss": 0.1745, + "step": 3841 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026388233385212856, + "loss": 0.1296, + "step": 3842 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026386351573870405, + "loss": 0.2109, + "step": 3843 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026384469339558385, + "loss": 0.2261, + "step": 3844 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026382586682346715, + "loss": 0.1412, + "step": 3845 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002638070360230532, + "loss": 0.1514, + "step": 3846 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002637882009950417, + "loss": 0.1814, + "step": 3847 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026376936174013217, + "loss": 0.1747, + "step": 3848 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026375051825902444, + "loss": 0.1733, + "step": 3849 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026373167055241856, + "loss": 0.2191, + "step": 3850 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026371281862101466, + "loss": 0.2079, + "step": 3851 + }, + { + "epoch": 1.17, + "learning_rate": 0.000263693962465513, + "loss": 0.1111, + "step": 3852 + }, + { + "epoch": 1.17, + "learning_rate": 0.000263675102086614, + "loss": 0.1886, + "step": 3853 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026365623748501833, + "loss": 0.1646, + "step": 3854 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026363736866142674, + "loss": 0.1133, + "step": 3855 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002636184956165401, + "loss": 0.1718, + "step": 3856 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002635996183510596, + "loss": 0.1357, + "step": 3857 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002635807368656864, + "loss": 0.2566, + "step": 3858 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026356185116112183, + "loss": 0.2485, + "step": 3859 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026354296123806757, + "loss": 0.1726, + "step": 3860 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026352406709722524, + "loss": 0.1269, + "step": 3861 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002635051687392967, + "loss": 0.0873, + "step": 3862 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026348626616498405, + "loss": 0.1873, + "step": 3863 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002634673593749894, + "loss": 0.1285, + "step": 3864 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026344844837001506, + "loss": 0.0538, + "step": 3865 + }, + { + "epoch": 1.17, + "learning_rate": 0.00026342953315076353, + "loss": 0.0833, + "step": 3866 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002634106137179375, + "loss": 0.1665, + "step": 3867 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002633916900722397, + "loss": 0.1998, + "step": 3868 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002633727622143732, + "loss": 0.1203, + "step": 3869 + }, + { + "epoch": 1.18, + "learning_rate": 0.000263353830145041, + "loss": 0.1223, + "step": 3870 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026333489386494647, + "loss": 0.1123, + "step": 3871 + }, + { + "epoch": 1.18, + "learning_rate": 0.000263315953374793, + "loss": 0.215, + "step": 3872 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002632970086752841, + "loss": 0.2643, + "step": 3873 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002632780597671235, + "loss": 0.1208, + "step": 3874 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002632591066510153, + "loss": 0.1534, + "step": 3875 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026324014932766333, + "loss": 0.1991, + "step": 3876 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002632211877977719, + "loss": 0.1587, + "step": 3877 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002632022220620454, + "loss": 0.2054, + "step": 3878 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002631832521211883, + "loss": 0.1169, + "step": 3879 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002631642779759053, + "loss": 0.2059, + "step": 3880 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026314529962690115, + "loss": 0.1549, + "step": 3881 + }, + { + "epoch": 1.18, + "learning_rate": 0.000263126317074881, + "loss": 0.1585, + "step": 3882 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002631073303205499, + "loss": 0.1481, + "step": 3883 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002630883393646131, + "loss": 0.1674, + "step": 3884 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002630693442077762, + "loss": 0.158, + "step": 3885 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026305034485074466, + "loss": 0.2185, + "step": 3886 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026303134129422443, + "loss": 0.1097, + "step": 3887 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002630123335389212, + "loss": 0.1056, + "step": 3888 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002629933215855413, + "loss": 0.1212, + "step": 3889 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002629743054347908, + "loss": 0.1111, + "step": 3890 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026295528508737617, + "loss": 0.0915, + "step": 3891 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002629362605440039, + "loss": 0.1216, + "step": 3892 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002629172318053808, + "loss": 0.2944, + "step": 3893 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026289819887221364, + "loss": 0.2602, + "step": 3894 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026287916174520945, + "loss": 0.2067, + "step": 3895 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026286012042507536, + "loss": 0.2782, + "step": 3896 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002628410749125188, + "loss": 0.0836, + "step": 3897 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026282202520824723, + "loss": 0.1202, + "step": 3898 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002628029713129683, + "loss": 0.0783, + "step": 3899 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002627839132273897, + "loss": 0.1917, + "step": 3900 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002627648509522195, + "loss": 0.1116, + "step": 3901 + }, + { + "epoch": 1.18, + "learning_rate": 0.00026274578448816577, + "loss": 0.1813, + "step": 3902 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002627267138359368, + "loss": 0.1743, + "step": 3903 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026270763899624095, + "loss": 0.1523, + "step": 3904 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026268855996978676, + "loss": 0.2509, + "step": 3905 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002626694767572831, + "loss": 0.2704, + "step": 3906 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002626503893594388, + "loss": 0.1339, + "step": 3907 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002626312977769628, + "loss": 0.088, + "step": 3908 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026261220201056443, + "loss": 0.2307, + "step": 3909 + }, + { + "epoch": 1.19, + "learning_rate": 0.000262593102060953, + "loss": 0.219, + "step": 3910 + }, + { + "epoch": 1.19, + "learning_rate": 0.000262573997928838, + "loss": 0.2331, + "step": 3911 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026255488961492905, + "loss": 0.2133, + "step": 3912 + }, + { + "epoch": 1.19, + "learning_rate": 0.000262535777119936, + "loss": 0.3963, + "step": 3913 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002625166604445689, + "loss": 0.0361, + "step": 3914 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002624975395895378, + "loss": 0.1547, + "step": 3915 + }, + { + "epoch": 1.19, + "learning_rate": 0.000262478414555553, + "loss": 0.0771, + "step": 3916 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026245928534332497, + "loss": 0.1299, + "step": 3917 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002624401519535643, + "loss": 0.1175, + "step": 3918 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002624210143869816, + "loss": 0.2305, + "step": 3919 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002624018726442879, + "loss": 0.0791, + "step": 3920 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002623827267261943, + "loss": 0.118, + "step": 3921 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026236357663341197, + "loss": 0.2312, + "step": 3922 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026234442236665225, + "loss": 0.3259, + "step": 3923 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026232526392662664, + "loss": 0.1091, + "step": 3924 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026230610131404696, + "loss": 0.1878, + "step": 3925 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026228693452962494, + "loss": 0.2935, + "step": 3926 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002622677635740725, + "loss": 0.2647, + "step": 3927 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002622485884481019, + "loss": 0.2307, + "step": 3928 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002622294091524254, + "loss": 0.2233, + "step": 3929 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002622102256877555, + "loss": 0.0594, + "step": 3930 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002621910380548048, + "loss": 0.1366, + "step": 3931 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026217184625428595, + "loss": 0.349, + "step": 3932 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026215265028691197, + "loss": 0.1407, + "step": 3933 + }, + { + "epoch": 1.19, + "learning_rate": 0.00026213345015339596, + "loss": 0.1181, + "step": 3934 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002621142458544511, + "loss": 0.0668, + "step": 3935 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002620950373907907, + "loss": 0.164, + "step": 3936 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026207582476312847, + "loss": 0.2102, + "step": 3937 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026205660797217796, + "loss": 0.3387, + "step": 3938 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002620373870186531, + "loss": 0.1672, + "step": 3939 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002620181619032678, + "loss": 0.2108, + "step": 3940 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002619989326267363, + "loss": 0.2563, + "step": 3941 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002619796991897729, + "loss": 0.0993, + "step": 3942 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002619604615930921, + "loss": 0.2964, + "step": 3943 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002619412198374084, + "loss": 0.0285, + "step": 3944 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026192197392343666, + "loss": 0.1662, + "step": 3945 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026190272385189184, + "loss": 0.2739, + "step": 3946 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026188346962348894, + "loss": 0.0462, + "step": 3947 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002618642112389433, + "loss": 0.1726, + "step": 3948 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002618449486989702, + "loss": 0.0405, + "step": 3949 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002618256820042852, + "loss": 0.1674, + "step": 3950 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002618064111556041, + "loss": 0.1846, + "step": 3951 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026178713615364265, + "loss": 0.206, + "step": 3952 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026176785699911695, + "loss": 0.1263, + "step": 3953 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026174857369274307, + "loss": 0.1085, + "step": 3954 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026172928623523743, + "loss": 0.1195, + "step": 3955 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026170999462731643, + "loss": 0.2139, + "step": 3956 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026169069886969673, + "loss": 0.1215, + "step": 3957 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002616713989630951, + "loss": 0.1107, + "step": 3958 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026165209490822843, + "loss": 0.3585, + "step": 3959 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026163278670581386, + "loss": 0.1092, + "step": 3960 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002616134743565687, + "loss": 0.1964, + "step": 3961 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002615941578612102, + "loss": 0.1071, + "step": 3962 + }, + { + "epoch": 1.2, + "learning_rate": 0.000261574837220456, + "loss": 0.1764, + "step": 3963 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026155551243502373, + "loss": 0.1171, + "step": 3964 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026153618350563136, + "loss": 0.2148, + "step": 3965 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026151685043299686, + "loss": 0.1306, + "step": 3966 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026149751321783836, + "loss": 0.1231, + "step": 3967 + }, + { + "epoch": 1.2, + "learning_rate": 0.00026147817186087427, + "loss": 0.0769, + "step": 3968 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002614588263628229, + "loss": 0.2408, + "step": 3969 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026143947672440304, + "loss": 0.1613, + "step": 3970 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002614201229463334, + "loss": 0.3067, + "step": 3971 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026140076502933297, + "loss": 0.1379, + "step": 3972 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002613814029741208, + "loss": 0.2535, + "step": 3973 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002613620367814161, + "loss": 0.2004, + "step": 3974 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002613426664519383, + "loss": 0.1115, + "step": 3975 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002613232919864069, + "loss": 0.0887, + "step": 3976 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026130391338554174, + "loss": 0.2267, + "step": 3977 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026128453065006255, + "loss": 0.0274, + "step": 3978 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002612651437806894, + "loss": 0.2667, + "step": 3979 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026124575277814246, + "loss": 0.1268, + "step": 3980 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026122635764314197, + "loss": 0.0758, + "step": 3981 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026120695837640855, + "loss": 0.1685, + "step": 3982 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026118755497866265, + "loss": 0.0902, + "step": 3983 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026116814745062516, + "loss": 0.1521, + "step": 3984 + }, + { + "epoch": 1.21, + "learning_rate": 0.000261148735793017, + "loss": 0.1715, + "step": 3985 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026112932000655926, + "loss": 0.3303, + "step": 3986 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026110990009197316, + "loss": 0.1132, + "step": 3987 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002610904760499801, + "loss": 0.0971, + "step": 3988 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002610710478813016, + "loss": 0.1677, + "step": 3989 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026105161558665944, + "loss": 0.1902, + "step": 3990 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026103217916677535, + "loss": 0.2944, + "step": 3991 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026101273862237146, + "loss": 0.1923, + "step": 3992 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002609932939541698, + "loss": 0.0968, + "step": 3993 + }, + { + "epoch": 1.21, + "learning_rate": 0.00026097384516289286, + "loss": 0.0665, + "step": 3994 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002609543922492629, + "loss": 0.249, + "step": 3995 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002609349352140027, + "loss": 0.1739, + "step": 3996 + }, + { + "epoch": 1.21, + "learning_rate": 0.000260915474057835, + "loss": 0.1958, + "step": 3997 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002608960087814826, + "loss": 0.1598, + "step": 3998 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002608765393856687, + "loss": 0.0974, + "step": 3999 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002608570658711166, + "loss": 0.2957, + "step": 4000 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002608375882385495, + "loss": 0.0935, + "step": 4001 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026081810648869103, + "loss": 0.2236, + "step": 4002 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026079862062226486, + "loss": 0.153, + "step": 4003 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002607791306399949, + "loss": 0.2576, + "step": 4004 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026075963654260503, + "loss": 0.1763, + "step": 4005 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002607401383308195, + "loss": 0.1344, + "step": 4006 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002607206360053626, + "loss": 0.2049, + "step": 4007 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026070112956695873, + "loss": 0.2285, + "step": 4008 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002606816190163325, + "loss": 0.0866, + "step": 4009 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026066210435420875, + "loss": 0.1844, + "step": 4010 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002606425855813123, + "loss": 0.046, + "step": 4011 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002606230626983683, + "loss": 0.1398, + "step": 4012 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002606035357061018, + "loss": 0.2996, + "step": 4013 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002605840046052384, + "loss": 0.2093, + "step": 4014 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026056446939650346, + "loss": 0.0018, + "step": 4015 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002605449300806228, + "loss": 0.1312, + "step": 4016 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026052538665832206, + "loss": 0.1281, + "step": 4017 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026050583913032736, + "loss": 0.2583, + "step": 4018 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026048628749736477, + "loss": 0.1789, + "step": 4019 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026046673176016065, + "loss": 0.191, + "step": 4020 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002604471719194413, + "loss": 0.1299, + "step": 4021 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026042760797593346, + "loss": 0.1871, + "step": 4022 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002604080399303638, + "loss": 0.0448, + "step": 4023 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002603884677834592, + "loss": 0.1128, + "step": 4024 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026036889153594676, + "loss": 0.2422, + "step": 4025 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026034931118855366, + "loss": 0.1204, + "step": 4026 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026032972674200724, + "loss": 0.1888, + "step": 4027 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026031013819703496, + "loss": 0.1291, + "step": 4028 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002602905455543646, + "loss": 0.1568, + "step": 4029 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002602709488147239, + "loss": 0.375, + "step": 4030 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026025134797884076, + "loss": 0.1759, + "step": 4031 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002602317430474434, + "loss": 0.2703, + "step": 4032 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026021213402125997, + "loss": 0.2689, + "step": 4033 + }, + { + "epoch": 1.22, + "learning_rate": 0.000260192520901019, + "loss": 0.1667, + "step": 4034 + }, + { + "epoch": 1.23, + "learning_rate": 0.00026017290368744907, + "loss": 0.2239, + "step": 4035 + }, + { + "epoch": 1.23, + "learning_rate": 0.00026015328238127873, + "loss": 0.278, + "step": 4036 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002601336569832371, + "loss": 0.1687, + "step": 4037 + }, + { + "epoch": 1.23, + "learning_rate": 0.00026011402749405297, + "loss": 0.1662, + "step": 4038 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002600943939144557, + "loss": 0.2879, + "step": 4039 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002600747562451745, + "loss": 0.1574, + "step": 4040 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002600551144869389, + "loss": 0.3363, + "step": 4041 + }, + { + "epoch": 1.23, + "learning_rate": 0.00026003546864047857, + "loss": 0.1504, + "step": 4042 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002600158187065232, + "loss": 0.185, + "step": 4043 + }, + { + "epoch": 1.23, + "learning_rate": 0.00025999616468580283, + "loss": 0.1259, + "step": 4044 + }, + { + "epoch": 1.23, + "learning_rate": 0.00025997650657904746, + "loss": 0.2887, + "step": 4045 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002599568443869874, + "loss": 0.1495, + "step": 4046 + }, + { + "epoch": 1.23, + "learning_rate": 0.00025993717811035303, + "loss": 0.1491, + "step": 4047 + }, + { + "epoch": 1.23, + "learning_rate": 0.00025991750774987483, + "loss": 0.1101, + "step": 4048 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002598978333062836, + "loss": 0.0524, + "step": 4049 + }, + { + "epoch": 1.23, + "learning_rate": 0.00025987815478031016, + "loss": 0.3387, + "step": 4050 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002598584721726854, + "loss": 0.0715, + "step": 4051 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002598387854841406, + "loss": 0.1726, + "step": 4052 + }, + { + "epoch": 1.23, + "learning_rate": 0.00025981909471540707, + "loss": 0.2841, + "step": 4053 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002597993998672161, + "loss": 0.1178, + "step": 4054 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002597797009402995, + "loss": 0.2284, + "step": 4055 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002597599979353889, + "loss": 0.2448, + "step": 4056 + }, + { + "epoch": 1.23, + "learning_rate": 0.00025974029085321624, + "loss": 0.2167, + "step": 4057 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002597205796945136, + "loss": 0.287, + "step": 4058 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002597008644600132, + "loss": 0.2035, + "step": 4059 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002596811451504474, + "loss": 0.2791, + "step": 4060 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002596614217665486, + "loss": 0.1181, + "step": 4061 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002596416943090496, + "loss": 0.2179, + "step": 4062 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002596219627786832, + "loss": 0.1001, + "step": 4063 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002596022271761823, + "loss": 0.0858, + "step": 4064 + }, + { + "epoch": 1.23, + "learning_rate": 0.00025958248750228014, + "loss": 0.2815, + "step": 4065 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002595627437577099, + "loss": 0.0753, + "step": 4066 + }, + { + "epoch": 1.23, + "learning_rate": 0.000259542995943205, + "loss": 0.1669, + "step": 4067 + }, + { + "epoch": 1.24, + "learning_rate": 0.000259523244059499, + "loss": 0.2817, + "step": 4068 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002595034881073257, + "loss": 0.1117, + "step": 4069 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025948372808741894, + "loss": 0.1743, + "step": 4070 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002594639640005127, + "loss": 0.2263, + "step": 4071 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002594441958473412, + "loss": 0.1546, + "step": 4072 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025942442362863886, + "loss": 0.084, + "step": 4073 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025940464734514, + "loss": 0.0801, + "step": 4074 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002593848669975793, + "loss": 0.3001, + "step": 4075 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025936508258669156, + "loss": 0.2175, + "step": 4076 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002593452941132117, + "loss": 0.3239, + "step": 4077 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025932550157787486, + "loss": 0.2916, + "step": 4078 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002593057049814162, + "loss": 0.2788, + "step": 4079 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002592859043245711, + "loss": 0.2021, + "step": 4080 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025926609960807523, + "loss": 0.1741, + "step": 4081 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025924629083266414, + "loss": 0.0865, + "step": 4082 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002592264779990737, + "loss": 0.1489, + "step": 4083 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002592066611080399, + "loss": 0.0856, + "step": 4084 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025918684016029887, + "loss": 0.139, + "step": 4085 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025916701515658696, + "loss": 0.1729, + "step": 4086 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025914718609764054, + "loss": 0.1573, + "step": 4087 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002591273529841962, + "loss": 0.2619, + "step": 4088 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025910751581699075, + "loss": 0.2102, + "step": 4089 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025908767459676107, + "loss": 0.1005, + "step": 4090 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002590678293242441, + "loss": 0.1685, + "step": 4091 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025904798000017713, + "loss": 0.14, + "step": 4092 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002590281266252975, + "loss": 0.2498, + "step": 4093 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002590082692003427, + "loss": 0.1318, + "step": 4094 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025898840772605033, + "loss": 0.1808, + "step": 4095 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002589685422031583, + "loss": 0.0863, + "step": 4096 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002589486726324044, + "loss": 0.0017, + "step": 4097 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002589287990145268, + "loss": 0.1309, + "step": 4098 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002589089213502638, + "loss": 0.3018, + "step": 4099 + }, + { + "epoch": 1.24, + "learning_rate": 0.00025888903964035366, + "loss": 0.1494, + "step": 4100 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025886915388553507, + "loss": 0.1619, + "step": 4101 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025884926408654665, + "loss": 0.1953, + "step": 4102 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025882937024412727, + "loss": 0.1316, + "step": 4103 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002588094723590159, + "loss": 0.1293, + "step": 4104 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002587895704319518, + "loss": 0.1175, + "step": 4105 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025876966446367413, + "loss": 0.1413, + "step": 4106 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002587497544549224, + "loss": 0.2212, + "step": 4107 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025872984040643614, + "loss": 0.2501, + "step": 4108 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002587099223189552, + "loss": 0.0461, + "step": 4109 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002586900001932194, + "loss": 0.2056, + "step": 4110 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002586700740299689, + "loss": 0.1539, + "step": 4111 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002586501438299438, + "loss": 0.1334, + "step": 4112 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025863020959388444, + "loss": 0.2087, + "step": 4113 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002586102713225314, + "loss": 0.1274, + "step": 4114 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002585903290166252, + "loss": 0.2935, + "step": 4115 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002585703826769067, + "loss": 0.0552, + "step": 4116 + }, + { + "epoch": 1.25, + "learning_rate": 0.000258550432304117, + "loss": 0.0206, + "step": 4117 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025853047789899694, + "loss": 0.2062, + "step": 4118 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025851051946228795, + "loss": 0.0972, + "step": 4119 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002584905569947313, + "loss": 0.2999, + "step": 4120 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002584705904970687, + "loss": 0.1394, + "step": 4121 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002584506199700417, + "loss": 0.0736, + "step": 4122 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025843064541439216, + "loss": 0.1385, + "step": 4123 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025841066683086216, + "loss": 0.1611, + "step": 4124 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002583906842201938, + "loss": 0.1, + "step": 4125 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025837069758312934, + "loss": 0.1623, + "step": 4126 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025835070692041133, + "loss": 0.4328, + "step": 4127 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025833071223278217, + "loss": 0.1126, + "step": 4128 + }, + { + "epoch": 1.25, + "learning_rate": 0.00025831071352098485, + "loss": 0.2002, + "step": 4129 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002582907107857621, + "loss": 0.1307, + "step": 4130 + }, + { + "epoch": 1.25, + "learning_rate": 0.000258270704027857, + "loss": 0.2638, + "step": 4131 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002582506932480127, + "loss": 0.2019, + "step": 4132 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002582306784469727, + "loss": 0.105, + "step": 4133 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002582106596254803, + "loss": 0.2216, + "step": 4134 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002581906367842792, + "loss": 0.1634, + "step": 4135 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002581706099241132, + "loss": 0.1515, + "step": 4136 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002581505790457263, + "loss": 0.2385, + "step": 4137 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025813054414986256, + "loss": 0.2257, + "step": 4138 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002581105052372662, + "loss": 0.2508, + "step": 4139 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002580904623086815, + "loss": 0.0496, + "step": 4140 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002580704153648532, + "loss": 0.2543, + "step": 4141 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002580503644065258, + "loss": 0.2127, + "step": 4142 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002580303094344443, + "loss": 0.1743, + "step": 4143 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002580102504493535, + "loss": 0.0996, + "step": 4144 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025799018745199874, + "loss": 0.2026, + "step": 4145 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025797012044312514, + "loss": 0.1117, + "step": 4146 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025795004942347823, + "loss": 0.2989, + "step": 4147 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002579299743938035, + "loss": 0.1531, + "step": 4148 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025790989535484676, + "loss": 0.129, + "step": 4149 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002578898123073538, + "loss": 0.1744, + "step": 4150 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025786972525207073, + "loss": 0.1659, + "step": 4151 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002578496341897437, + "loss": 0.1604, + "step": 4152 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025782953912111895, + "loss": 0.2314, + "step": 4153 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002578094400469431, + "loss": 0.1702, + "step": 4154 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002577893369679627, + "loss": 0.2042, + "step": 4155 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002577692298849245, + "loss": 0.1269, + "step": 4156 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025774911879857546, + "loss": 0.2521, + "step": 4157 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002577290037096626, + "loss": 0.1738, + "step": 4158 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002577088846189332, + "loss": 0.1156, + "step": 4159 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002576887615271346, + "loss": 0.2492, + "step": 4160 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002576686344350143, + "loss": 0.1232, + "step": 4161 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002576485033433199, + "loss": 0.2616, + "step": 4162 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025762836825279933, + "loss": 0.1588, + "step": 4163 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002576082291642005, + "loss": 0.1229, + "step": 4164 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002575880860782715, + "loss": 0.068, + "step": 4165 + }, + { + "epoch": 1.26, + "learning_rate": 0.00025756793899576065, + "loss": 0.2273, + "step": 4166 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025754778791741627, + "loss": 0.3106, + "step": 4167 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002575276328439869, + "loss": 0.3169, + "step": 4168 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025750747377622135, + "loss": 0.1428, + "step": 4169 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002574873107148684, + "loss": 0.1609, + "step": 4170 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025746714366067704, + "loss": 0.3146, + "step": 4171 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025744697261439645, + "loss": 0.2274, + "step": 4172 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025742679757677594, + "loss": 0.0857, + "step": 4173 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002574066185485649, + "loss": 0.1218, + "step": 4174 + }, + { + "epoch": 1.27, + "learning_rate": 0.000257386435530513, + "loss": 0.2525, + "step": 4175 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002573662485233699, + "loss": 0.1785, + "step": 4176 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025734605752788544, + "loss": 0.1079, + "step": 4177 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002573258625448098, + "loss": 0.1846, + "step": 4178 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002573056635748931, + "loss": 0.0668, + "step": 4179 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002572854606188857, + "loss": 0.2226, + "step": 4180 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025726525367753797, + "loss": 0.1816, + "step": 4181 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002572450427516007, + "loss": 0.0426, + "step": 4182 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025722482784182446, + "loss": 0.1138, + "step": 4183 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025720460894896046, + "loss": 0.1624, + "step": 4184 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002571843860737595, + "loss": 0.1852, + "step": 4185 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025716415921697293, + "loss": 0.3743, + "step": 4186 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002571439283793521, + "loss": 0.2182, + "step": 4187 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002571236935616486, + "loss": 0.1438, + "step": 4188 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025710345476461394, + "loss": 0.1669, + "step": 4189 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025708321198900005, + "loss": 0.1701, + "step": 4190 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002570629652355588, + "loss": 0.1603, + "step": 4191 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025704271450504245, + "loss": 0.1338, + "step": 4192 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002570224597982031, + "loss": 0.1395, + "step": 4193 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002570022011157932, + "loss": 0.0354, + "step": 4194 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002569819384585653, + "loss": 0.1264, + "step": 4195 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025696167182727214, + "loss": 0.2002, + "step": 4196 + }, + { + "epoch": 1.27, + "learning_rate": 0.00025694140122266653, + "loss": 0.1809, + "step": 4197 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002569211266455015, + "loss": 0.2513, + "step": 4198 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002569008480965301, + "loss": 0.2597, + "step": 4199 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002568805655765057, + "loss": 0.2595, + "step": 4200 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002568602790861817, + "loss": 0.2388, + "step": 4201 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002568399886263117, + "loss": 0.1399, + "step": 4202 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002568196941976495, + "loss": 0.1252, + "step": 4203 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002567993958009488, + "loss": 0.279, + "step": 4204 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002567790934369638, + "loss": 0.2732, + "step": 4205 + }, + { + "epoch": 1.28, + "learning_rate": 0.00025675878710644854, + "loss": 0.2023, + "step": 4206 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002567384768101575, + "loss": 0.1929, + "step": 4207 + }, + { + "epoch": 1.28, + "learning_rate": 0.000256718162548845, + "loss": 0.2213, + "step": 4208 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002566978443232657, + "loss": 0.2138, + "step": 4209 + }, + { + "epoch": 1.28, + "learning_rate": 0.00025667752213417435, + "loss": 0.2451, + "step": 4210 + }, + { + "epoch": 1.28, + "learning_rate": 0.00025665719598232594, + "loss": 0.2367, + "step": 4211 + }, + { + "epoch": 1.28, + "learning_rate": 0.00025663686586847547, + "loss": 0.119, + "step": 4212 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002566165317933781, + "loss": 0.2256, + "step": 4213 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002565961937577892, + "loss": 0.1422, + "step": 4214 + }, + { + "epoch": 1.28, + "learning_rate": 0.00025657585176246437, + "loss": 0.0643, + "step": 4215 + }, + { + "epoch": 1.28, + "learning_rate": 0.00025655550580815914, + "loss": 0.2553, + "step": 4216 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002565351558956293, + "loss": 0.1921, + "step": 4217 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002565148020256309, + "loss": 0.2913, + "step": 4218 + }, + { + "epoch": 1.28, + "learning_rate": 0.00025649444419891996, + "loss": 0.2056, + "step": 4219 + }, + { + "epoch": 1.28, + "learning_rate": 0.00025647408241625267, + "loss": 0.1966, + "step": 4220 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002564537166783855, + "loss": 0.2246, + "step": 4221 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002564333469860749, + "loss": 0.2523, + "step": 4222 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002564129733400775, + "loss": 0.0024, + "step": 4223 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002563925957411503, + "loss": 0.2817, + "step": 4224 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002563722141900501, + "loss": 0.1895, + "step": 4225 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002563518286875341, + "loss": 0.1255, + "step": 4226 + }, + { + "epoch": 1.28, + "learning_rate": 0.00025633143923435955, + "loss": 0.1655, + "step": 4227 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002563110458312838, + "loss": 0.1441, + "step": 4228 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002562906484790645, + "loss": 0.2063, + "step": 4229 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002562702471784593, + "loss": 0.2122, + "step": 4230 + }, + { + "epoch": 1.28, + "learning_rate": 0.000256249841930226, + "loss": 0.2318, + "step": 4231 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002562294327351227, + "loss": 0.1752, + "step": 4232 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025620901959390747, + "loss": 0.3459, + "step": 4233 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025618860250733864, + "loss": 0.2903, + "step": 4234 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002561681814761746, + "loss": 0.2404, + "step": 4235 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025614775650117396, + "loss": 0.1463, + "step": 4236 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025612732758309545, + "loss": 0.2625, + "step": 4237 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002561068947226979, + "loss": 0.3019, + "step": 4238 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025608645792074043, + "loss": 0.1873, + "step": 4239 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025606601717798207, + "loss": 0.1612, + "step": 4240 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002560455724951823, + "loss": 0.2073, + "step": 4241 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002560251238731004, + "loss": 0.1716, + "step": 4242 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002560046713124961, + "loss": 0.008, + "step": 4243 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025598421481412917, + "loss": 0.1354, + "step": 4244 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002559637543787594, + "loss": 0.2173, + "step": 4245 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025594329000714684, + "loss": 0.2582, + "step": 4246 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025592282170005176, + "loss": 0.1655, + "step": 4247 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025590234945823456, + "loss": 0.1738, + "step": 4248 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002558818732824556, + "loss": 0.1009, + "step": 4249 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025586139317347546, + "loss": 0.2141, + "step": 4250 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025584090913205503, + "loss": 0.1251, + "step": 4251 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025582042115895523, + "loss": 0.1487, + "step": 4252 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002557999292549371, + "loss": 0.0953, + "step": 4253 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002557794334207619, + "loss": 0.2014, + "step": 4254 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025575893365719087, + "loss": 0.2347, + "step": 4255 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002557384299649856, + "loss": 0.1739, + "step": 4256 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025571792234490773, + "loss": 0.2156, + "step": 4257 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025569741079771907, + "loss": 0.095, + "step": 4258 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002556768953241816, + "loss": 0.04, + "step": 4259 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002556563759250573, + "loss": 0.3647, + "step": 4260 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025563585260110846, + "loss": 0.0762, + "step": 4261 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002556153253530975, + "loss": 0.2275, + "step": 4262 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002555947941817869, + "loss": 0.2045, + "step": 4263 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002555742590879394, + "loss": 0.1859, + "step": 4264 + }, + { + "epoch": 1.29, + "learning_rate": 0.00025555372007231774, + "loss": 0.2858, + "step": 4265 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025553317713568487, + "loss": 0.0979, + "step": 4266 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025551263027880403, + "loss": 0.255, + "step": 4267 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025549207950243834, + "loss": 0.2457, + "step": 4268 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025547152480735123, + "loss": 0.0759, + "step": 4269 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002554509661943063, + "loss": 0.121, + "step": 4270 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002554304036640672, + "loss": 0.1347, + "step": 4271 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025540983721739774, + "loss": 0.2941, + "step": 4272 + }, + { + "epoch": 1.3, + "learning_rate": 0.000255389266855062, + "loss": 0.2435, + "step": 4273 + }, + { + "epoch": 1.3, + "learning_rate": 0.000255368692577824, + "loss": 0.1708, + "step": 4274 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002553481143864481, + "loss": 0.089, + "step": 4275 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025532753228169866, + "loss": 0.1432, + "step": 4276 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002553069462643403, + "loss": 0.1267, + "step": 4277 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025528635633513763, + "loss": 0.1915, + "step": 4278 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002552657624948556, + "loss": 0.0586, + "step": 4279 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025524516474425917, + "loss": 0.1039, + "step": 4280 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025522456308411354, + "loss": 0.2094, + "step": 4281 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025520395751518397, + "loss": 0.1204, + "step": 4282 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025518334803823584, + "loss": 0.137, + "step": 4283 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025516273465403473, + "loss": 0.2003, + "step": 4284 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002551421173633465, + "loss": 0.2184, + "step": 4285 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025512149616693694, + "loss": 0.2458, + "step": 4286 + }, + { + "epoch": 1.3, + "learning_rate": 0.000255100871065572, + "loss": 0.2783, + "step": 4287 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025508024206001795, + "loss": 0.2701, + "step": 4288 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025505960915104105, + "loss": 0.104, + "step": 4289 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002550389723394078, + "loss": 0.2904, + "step": 4290 + }, + { + "epoch": 1.3, + "learning_rate": 0.00025501833162588466, + "loss": 0.3025, + "step": 4291 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002549976870112385, + "loss": 0.155, + "step": 4292 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002549770384962362, + "loss": 0.2118, + "step": 4293 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002549563860816447, + "loss": 0.0679, + "step": 4294 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002549357297682313, + "loss": 0.2418, + "step": 4295 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002549150695567632, + "loss": 0.245, + "step": 4296 + }, + { + "epoch": 1.3, + "learning_rate": 0.000254894405448008, + "loss": 0.1558, + "step": 4297 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002548737374427332, + "loss": 0.1318, + "step": 4298 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002548530655417066, + "loss": 0.1733, + "step": 4299 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002548323897456961, + "loss": 0.3409, + "step": 4300 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025481171005546974, + "loss": 0.3033, + "step": 4301 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025479102647179566, + "loss": 0.1971, + "step": 4302 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025477033899544226, + "loss": 0.1816, + "step": 4303 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025474964762717804, + "loss": 0.2536, + "step": 4304 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002547289523677716, + "loss": 0.1259, + "step": 4305 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002547082532179917, + "loss": 0.183, + "step": 4306 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002546875501786072, + "loss": 0.192, + "step": 4307 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002546668432503872, + "loss": 0.0433, + "step": 4308 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002546461324341009, + "loss": 0.2677, + "step": 4309 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025462541773051773, + "loss": 0.1748, + "step": 4310 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025460469914040704, + "loss": 0.2153, + "step": 4311 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025458397666453853, + "loss": 0.2379, + "step": 4312 + }, + { + "epoch": 1.31, + "learning_rate": 0.000254563250303682, + "loss": 0.2173, + "step": 4313 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002545425200586073, + "loss": 0.2508, + "step": 4314 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002545217859300846, + "loss": 0.2203, + "step": 4315 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025450104791888405, + "loss": 0.2244, + "step": 4316 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025448030602577597, + "loss": 0.0922, + "step": 4317 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025445956025153094, + "loss": 0.1845, + "step": 4318 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002544388105969196, + "loss": 0.0785, + "step": 4319 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002544180570627127, + "loss": 0.1685, + "step": 4320 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025439729964968116, + "loss": 0.1556, + "step": 4321 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002543765383585961, + "loss": 0.11, + "step": 4322 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025435577319022875, + "loss": 0.2852, + "step": 4323 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025433500414535036, + "loss": 0.1023, + "step": 4324 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002543142312247326, + "loss": 0.1445, + "step": 4325 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025429345442914704, + "loss": 0.0697, + "step": 4326 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025427267375936546, + "loss": 0.0863, + "step": 4327 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025425188921615984, + "loss": 0.2336, + "step": 4328 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025423110080030224, + "loss": 0.1306, + "step": 4329 + }, + { + "epoch": 1.31, + "learning_rate": 0.00025421030851256494, + "loss": 0.1255, + "step": 4330 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025418951235372026, + "loss": 0.0936, + "step": 4331 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002541687123245407, + "loss": 0.2132, + "step": 4332 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002541479084257989, + "loss": 0.0508, + "step": 4333 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002541271006582678, + "loss": 0.0679, + "step": 4334 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025410628902272024, + "loss": 0.2652, + "step": 4335 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025408547351992935, + "loss": 0.2264, + "step": 4336 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002540646541506683, + "loss": 0.1389, + "step": 4337 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025404383091571055, + "loss": 0.1385, + "step": 4338 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002540230038158296, + "loss": 0.1618, + "step": 4339 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002540021728517991, + "loss": 0.3054, + "step": 4340 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002539813380243928, + "loss": 0.0295, + "step": 4341 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002539604993343848, + "loss": 0.2409, + "step": 4342 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025393965678254904, + "loss": 0.2007, + "step": 4343 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002539188103696599, + "loss": 0.0768, + "step": 4344 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002538979600964916, + "loss": 0.1539, + "step": 4345 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025387710596381884, + "loss": 0.354, + "step": 4346 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025385624797241615, + "loss": 0.1321, + "step": 4347 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002538353861230585, + "loss": 0.1979, + "step": 4348 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002538145204165207, + "loss": 0.3339, + "step": 4349 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025379365085357783, + "loss": 0.2354, + "step": 4350 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025377277743500535, + "loss": 0.1243, + "step": 4351 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025375190016157834, + "loss": 0.1598, + "step": 4352 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002537310190340726, + "loss": 0.157, + "step": 4353 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025371013405326364, + "loss": 0.1468, + "step": 4354 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025368924521992735, + "loss": 0.2653, + "step": 4355 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025366835253483967, + "loss": 0.106, + "step": 4356 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002536474559987767, + "loss": 0.3201, + "step": 4357 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002536265556125146, + "loss": 0.159, + "step": 4358 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002536056513768299, + "loss": 0.1555, + "step": 4359 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002535847432924991, + "loss": 0.2708, + "step": 4360 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025356383136029885, + "loss": 0.2348, + "step": 4361 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025354291558100597, + "loss": 0.1267, + "step": 4362 + }, + { + "epoch": 1.32, + "learning_rate": 0.00025352199595539735, + "loss": 0.1971, + "step": 4363 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025350107248425026, + "loss": 0.1252, + "step": 4364 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002534801451683417, + "loss": 0.1865, + "step": 4365 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025345921400844935, + "loss": 0.2762, + "step": 4366 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025343827900535043, + "loss": 0.2051, + "step": 4367 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002534173401598229, + "loss": 0.1932, + "step": 4368 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002533963974726444, + "loss": 0.1648, + "step": 4369 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025337545094459294, + "loss": 0.051, + "step": 4370 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002533545005764467, + "loss": 0.2234, + "step": 4371 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025333354636898376, + "loss": 0.1073, + "step": 4372 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002533125883229826, + "loss": 0.2127, + "step": 4373 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002532916264392218, + "loss": 0.2292, + "step": 4374 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002532706607184799, + "loss": 0.1945, + "step": 4375 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025324969116153587, + "loss": 0.1662, + "step": 4376 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025322871776916857, + "loss": 0.1819, + "step": 4377 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002532077405421571, + "loss": 0.1208, + "step": 4378 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025318675948128073, + "loss": 0.2181, + "step": 4379 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025316577458731884, + "loss": 0.1249, + "step": 4380 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025314478586105094, + "loss": 0.0371, + "step": 4381 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002531237933032567, + "loss": 0.2304, + "step": 4382 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025310279691471597, + "loss": 0.1941, + "step": 4383 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002530817966962088, + "loss": 0.1845, + "step": 4384 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025306079264851496, + "loss": 0.2078, + "step": 4385 + }, + { + "epoch": 1.33, + "learning_rate": 0.000253039784772415, + "loss": 0.1926, + "step": 4386 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025301877306868923, + "loss": 0.1454, + "step": 4387 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002529977575381181, + "loss": 0.1107, + "step": 4388 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002529767381814823, + "loss": 0.2026, + "step": 4389 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025295571499956267, + "loss": 0.2475, + "step": 4390 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002529346879931401, + "loss": 0.2163, + "step": 4391 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002529136571629958, + "loss": 0.1569, + "step": 4392 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025289262250991086, + "loss": 0.2753, + "step": 4393 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002528715840346668, + "loss": 0.2022, + "step": 4394 + }, + { + "epoch": 1.33, + "learning_rate": 0.000252850541738045, + "loss": 0.0909, + "step": 4395 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002528294956208272, + "loss": 0.2593, + "step": 4396 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002528084456837952, + "loss": 0.1326, + "step": 4397 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002527873919277309, + "loss": 0.1894, + "step": 4398 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025276633435341645, + "loss": 0.1443, + "step": 4399 + }, + { + "epoch": 1.34, + "learning_rate": 0.000252745272961634, + "loss": 0.1819, + "step": 4400 + }, + { + "epoch": 1.34, + "learning_rate": 0.000252724207753166, + "loss": 0.1046, + "step": 4401 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025270313872879485, + "loss": 0.1032, + "step": 4402 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002526820658893033, + "loss": 0.1733, + "step": 4403 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002526609892354741, + "loss": 0.2367, + "step": 4404 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002526399087680902, + "loss": 0.1463, + "step": 4405 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002526188244879347, + "loss": 0.175, + "step": 4406 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025259773639579083, + "loss": 0.231, + "step": 4407 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025257664449244185, + "loss": 0.2179, + "step": 4408 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002525555487786714, + "loss": 0.1904, + "step": 4409 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025253444925526304, + "loss": 0.2491, + "step": 4410 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025251334592300053, + "loss": 0.195, + "step": 4411 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025249223878266784, + "loss": 0.1501, + "step": 4412 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025247112783504907, + "loss": 0.19, + "step": 4413 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025245001308092836, + "loss": 0.161, + "step": 4414 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025242889452109014, + "loss": 0.1791, + "step": 4415 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002524077721563188, + "loss": 0.2351, + "step": 4416 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002523866459873991, + "loss": 0.1969, + "step": 4417 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002523655160151157, + "loss": 0.2822, + "step": 4418 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025234438224025355, + "loss": 0.1521, + "step": 4419 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025232324466359775, + "loss": 0.1844, + "step": 4420 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002523021032859335, + "loss": 0.1376, + "step": 4421 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002522809581080461, + "loss": 0.1478, + "step": 4422 + }, + { + "epoch": 1.34, + "learning_rate": 0.000252259809130721, + "loss": 0.2387, + "step": 4423 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002522386563547439, + "loss": 0.1397, + "step": 4424 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025221749978090053, + "loss": 0.0633, + "step": 4425 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025219633940997674, + "loss": 0.2911, + "step": 4426 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002521751752427587, + "loss": 0.2087, + "step": 4427 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025215400728003246, + "loss": 0.172, + "step": 4428 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025213283552258446, + "loss": 0.0438, + "step": 4429 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025211165997120113, + "loss": 0.2135, + "step": 4430 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025209048062666906, + "loss": 0.3009, + "step": 4431 + }, + { + "epoch": 1.35, + "learning_rate": 0.000252069297489775, + "loss": 0.1806, + "step": 4432 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002520481105613059, + "loss": 0.1209, + "step": 4433 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002520269198420487, + "loss": 0.2764, + "step": 4434 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002520057253327906, + "loss": 0.1753, + "step": 4435 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025198452703431893, + "loss": 0.1249, + "step": 4436 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025196332494742125, + "loss": 0.3128, + "step": 4437 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002519421190728849, + "loss": 0.1852, + "step": 4438 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002519209094114979, + "loss": 0.2082, + "step": 4439 + }, + { + "epoch": 1.35, + "learning_rate": 0.000251899695964048, + "loss": 0.0786, + "step": 4440 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002518784787313231, + "loss": 0.2009, + "step": 4441 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002518572577141115, + "loss": 0.1996, + "step": 4442 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025183603291320153, + "loss": 0.2742, + "step": 4443 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002518148043293815, + "loss": 0.1295, + "step": 4444 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002517935719634402, + "loss": 0.1328, + "step": 4445 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025177233581616604, + "loss": 0.0894, + "step": 4446 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025175109588834813, + "loss": 0.2079, + "step": 4447 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002517298521807754, + "loss": 0.2586, + "step": 4448 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002517086046942369, + "loss": 0.1332, + "step": 4449 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002516873534295221, + "loss": 0.0649, + "step": 4450 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002516660983874203, + "loss": 0.1993, + "step": 4451 + }, + { + "epoch": 1.35, + "learning_rate": 0.000251644839568721, + "loss": 0.1813, + "step": 4452 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002516235769742141, + "loss": 0.0845, + "step": 4453 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002516023106046892, + "loss": 0.2498, + "step": 4454 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025158104046093643, + "loss": 0.1021, + "step": 4455 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002515597665437459, + "loss": 0.2819, + "step": 4456 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002515384888539079, + "loss": 0.2384, + "step": 4457 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002515172073922128, + "loss": 0.1955, + "step": 4458 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025149592215945114, + "loss": 0.2392, + "step": 4459 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002514746331564136, + "loss": 0.1652, + "step": 4460 + }, + { + "epoch": 1.35, + "learning_rate": 0.000251453340383891, + "loss": 0.2183, + "step": 4461 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002514320438426743, + "loss": 0.1652, + "step": 4462 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025141074353355464, + "loss": 0.1744, + "step": 4463 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002513894394573232, + "loss": 0.1655, + "step": 4464 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025136813161477146, + "loss": 0.0833, + "step": 4465 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002513468200066909, + "loss": 0.1104, + "step": 4466 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025132550463387315, + "loss": 0.1719, + "step": 4467 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025130418549711007, + "loss": 0.1918, + "step": 4468 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002512828625971935, + "loss": 0.1336, + "step": 4469 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002512615359349156, + "loss": 0.0794, + "step": 4470 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002512402055110686, + "loss": 0.0685, + "step": 4471 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025121887132644485, + "loss": 0.165, + "step": 4472 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002511975333818369, + "loss": 0.2185, + "step": 4473 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002511761916780373, + "loss": 0.1996, + "step": 4474 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002511548462158389, + "loss": 0.0571, + "step": 4475 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025113349699603456, + "loss": 0.2124, + "step": 4476 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025111214401941744, + "loss": 0.1197, + "step": 4477 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025109078728678067, + "loss": 0.1872, + "step": 4478 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025106942679891753, + "loss": 0.3016, + "step": 4479 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002510480625566216, + "loss": 0.2502, + "step": 4480 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025102669456068653, + "loss": 0.1183, + "step": 4481 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002510053228119059, + "loss": 0.1569, + "step": 4482 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002509839473110738, + "loss": 0.2077, + "step": 4483 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025096256805898416, + "loss": 0.1041, + "step": 4484 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025094118505643125, + "loss": 0.2553, + "step": 4485 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025091979830420925, + "loss": 0.1364, + "step": 4486 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002508984078031127, + "loss": 0.1936, + "step": 4487 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025087701355393623, + "loss": 0.2376, + "step": 4488 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002508556155574745, + "loss": 0.1741, + "step": 4489 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025083421381452237, + "loss": 0.3077, + "step": 4490 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025081280832587493, + "loss": 0.2122, + "step": 4491 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025079139909232727, + "loss": 0.2329, + "step": 4492 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002507699861146747, + "loss": 0.0873, + "step": 4493 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002507485693937127, + "loss": 0.2268, + "step": 4494 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025072714893023675, + "loss": 0.3365, + "step": 4495 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025070572472504256, + "loss": 0.1961, + "step": 4496 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002506842967789261, + "loss": 0.1364, + "step": 4497 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025066286509268316, + "loss": 0.0663, + "step": 4498 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025064142966711007, + "loss": 0.1246, + "step": 4499 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025061999050300296, + "loss": 0.3263, + "step": 4500 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002505985476011583, + "loss": 0.1275, + "step": 4501 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025057710096237256, + "loss": 0.2249, + "step": 4502 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025055565058744244, + "loss": 0.2208, + "step": 4503 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025053419647716484, + "loss": 0.2069, + "step": 4504 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002505127386323366, + "loss": 0.0964, + "step": 4505 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002504912770537549, + "loss": 0.1233, + "step": 4506 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002504698117422169, + "loss": 0.0977, + "step": 4507 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025044834269852007, + "loss": 0.156, + "step": 4508 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025042686992346187, + "loss": 0.1866, + "step": 4509 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025040539341783996, + "loss": 0.1176, + "step": 4510 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025038391318245215, + "loss": 0.1821, + "step": 4511 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025036242921809625, + "loss": 0.1511, + "step": 4512 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025034094152557044, + "loss": 0.2218, + "step": 4513 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025031945010567293, + "loss": 0.2324, + "step": 4514 + }, + { + "epoch": 1.37, + "learning_rate": 0.000250297954959202, + "loss": 0.1676, + "step": 4515 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002502764560869562, + "loss": 0.1255, + "step": 4516 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002502549534897341, + "loss": 0.1794, + "step": 4517 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025023344716833447, + "loss": 0.2009, + "step": 4518 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025021193712355624, + "loss": 0.2092, + "step": 4519 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025019042335619837, + "loss": 0.1305, + "step": 4520 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025016890586706007, + "loss": 0.2708, + "step": 4521 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002501473846569407, + "loss": 0.0325, + "step": 4522 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002501258597266397, + "loss": 0.2157, + "step": 4523 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002501043310769565, + "loss": 0.1326, + "step": 4524 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025008279870869103, + "loss": 0.2474, + "step": 4525 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025006126262264313, + "loss": 0.1245, + "step": 4526 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002500397228196127, + "loss": 0.0861, + "step": 4527 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025001817930039996, + "loss": 0.2007, + "step": 4528 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002499966320658051, + "loss": 0.1794, + "step": 4529 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002499750811166286, + "loss": 0.1525, + "step": 4530 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002499535264536711, + "loss": 0.1003, + "step": 4531 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002499319680777331, + "loss": 0.1705, + "step": 4532 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024991040598961557, + "loss": 0.0757, + "step": 4533 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002498888401901195, + "loss": 0.1418, + "step": 4534 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024986727068004584, + "loss": 0.231, + "step": 4535 + }, + { + "epoch": 1.38, + "learning_rate": 0.000249845697460196, + "loss": 0.0883, + "step": 4536 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002498241205313713, + "loss": 0.0655, + "step": 4537 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002498025398943732, + "loss": 0.2328, + "step": 4538 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002497809555500035, + "loss": 0.106, + "step": 4539 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024975936749906385, + "loss": 0.0983, + "step": 4540 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002497377757423562, + "loss": 0.1577, + "step": 4541 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002497161802806827, + "loss": 0.2543, + "step": 4542 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024969458111484555, + "loss": 0.2453, + "step": 4543 + }, + { + "epoch": 1.38, + "learning_rate": 0.000249672978245647, + "loss": 0.1925, + "step": 4544 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002496513716738896, + "loss": 0.0977, + "step": 4545 + }, + { + "epoch": 1.38, + "learning_rate": 0.000249629761400376, + "loss": 0.2412, + "step": 4546 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024960814742590895, + "loss": 0.0896, + "step": 4547 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002495865297512913, + "loss": 0.1392, + "step": 4548 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024956490837732613, + "loss": 0.1788, + "step": 4549 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002495432833048165, + "loss": 0.3039, + "step": 4550 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024952165453456587, + "loss": 0.1599, + "step": 4551 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002495000220673776, + "loss": 0.0631, + "step": 4552 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002494783859040553, + "loss": 0.2225, + "step": 4553 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002494567460454027, + "loss": 0.1561, + "step": 4554 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024943510249222364, + "loss": 0.2807, + "step": 4555 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024941345524532205, + "loss": 0.0758, + "step": 4556 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024939180430550215, + "loss": 0.1559, + "step": 4557 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024937014967356825, + "loss": 0.1936, + "step": 4558 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002493484913503246, + "loss": 0.1285, + "step": 4559 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002493268293365759, + "loss": 0.1334, + "step": 4560 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002493051636331267, + "loss": 0.3719, + "step": 4561 + }, + { + "epoch": 1.39, + "learning_rate": 0.000249283494240782, + "loss": 0.107, + "step": 4562 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002492618211603465, + "loss": 0.1071, + "step": 4563 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002492401443926255, + "loss": 0.1779, + "step": 4564 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002492184639384241, + "loss": 0.2345, + "step": 4565 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024919677979854776, + "loss": 0.1669, + "step": 4566 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024917509197380197, + "loss": 0.0824, + "step": 4567 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002491534004649923, + "loss": 0.1376, + "step": 4568 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024913170527292457, + "loss": 0.3177, + "step": 4569 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002491100063984047, + "loss": 0.2267, + "step": 4570 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002490883038422387, + "loss": 0.1583, + "step": 4571 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024906659760523287, + "loss": 0.1015, + "step": 4572 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002490448876881934, + "loss": 0.2095, + "step": 4573 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024902317409192676, + "loss": 0.2244, + "step": 4574 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024900145681723963, + "loss": 0.2509, + "step": 4575 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002489797358649386, + "loss": 0.1037, + "step": 4576 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002489580112358308, + "loss": 0.2451, + "step": 4577 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024893628293072296, + "loss": 0.2685, + "step": 4578 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024891455095042236, + "loss": 0.247, + "step": 4579 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002488928152957364, + "loss": 0.1864, + "step": 4580 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002488710759674721, + "loss": 0.2308, + "step": 4581 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024884933296643745, + "loss": 0.1983, + "step": 4582 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024882758629343987, + "loss": 0.1911, + "step": 4583 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024880583594928733, + "loss": 0.2323, + "step": 4584 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024878408193478767, + "loss": 0.0357, + "step": 4585 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002487623242507491, + "loss": 0.1101, + "step": 4586 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024874056289797975, + "loss": 0.1499, + "step": 4587 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024871879787728804, + "loss": 0.3232, + "step": 4588 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002486970291894825, + "loss": 0.24, + "step": 4589 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002486752568353718, + "loss": 0.2443, + "step": 4590 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002486534808157646, + "loss": 0.1499, + "step": 4591 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002486317011314699, + "loss": 0.0788, + "step": 4592 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002486099177832968, + "loss": 0.1831, + "step": 4593 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024858813077205437, + "loss": 0.0793, + "step": 4594 + }, + { + "epoch": 1.4, + "learning_rate": 0.000248566340098552, + "loss": 0.1333, + "step": 4595 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024854454576359915, + "loss": 0.3416, + "step": 4596 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002485227477680054, + "loss": 0.0712, + "step": 4597 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024850094611258045, + "loss": 0.2153, + "step": 4598 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024847914079813424, + "loss": 0.1488, + "step": 4599 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024845733182547676, + "loss": 0.2401, + "step": 4600 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002484355191954181, + "loss": 0.2089, + "step": 4601 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002484137029087685, + "loss": 0.3656, + "step": 4602 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024839188296633854, + "loss": 0.1557, + "step": 4603 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024837005936893857, + "loss": 0.0458, + "step": 4604 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002483482321173794, + "loss": 0.281, + "step": 4605 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002483264012124718, + "loss": 0.1865, + "step": 4606 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024830456665502675, + "loss": 0.1382, + "step": 4607 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024828272844585535, + "loss": 0.2776, + "step": 4608 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024826088658576876, + "loss": 0.2243, + "step": 4609 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002482390410755784, + "loss": 0.1688, + "step": 4610 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024821719191609577, + "loss": 0.1907, + "step": 4611 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002481953391081324, + "loss": 0.1841, + "step": 4612 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002481734826525002, + "loss": 0.2593, + "step": 4613 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024815162255001095, + "loss": 0.2483, + "step": 4614 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002481297588014769, + "loss": 0.2913, + "step": 4615 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002481078914077099, + "loss": 0.0452, + "step": 4616 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024808602036952254, + "loss": 0.1833, + "step": 4617 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002480641456877271, + "loss": 0.242, + "step": 4618 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024804226736313626, + "loss": 0.1588, + "step": 4619 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024802038539656265, + "loss": 0.2175, + "step": 4620 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024799849978881925, + "loss": 0.2072, + "step": 4621 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024797661054071885, + "loss": 0.3147, + "step": 4622 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024795471765307476, + "loss": 0.2008, + "step": 4623 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002479328211267001, + "loss": 0.0743, + "step": 4624 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002479109209624084, + "loss": 0.1957, + "step": 4625 + }, + { + "epoch": 1.4, + "learning_rate": 0.000247889017161013, + "loss": 0.1676, + "step": 4626 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024786710972332773, + "loss": 0.0548, + "step": 4627 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024784519865016637, + "loss": 0.1944, + "step": 4628 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024782328394234277, + "loss": 0.1307, + "step": 4629 + }, + { + "epoch": 1.41, + "learning_rate": 0.000247801365600671, + "loss": 0.2305, + "step": 4630 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024777944362596534, + "loss": 0.1908, + "step": 4631 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024775751801904, + "loss": 0.2267, + "step": 4632 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002477355887807097, + "loss": 0.1953, + "step": 4633 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002477136559117888, + "loss": 0.1556, + "step": 4634 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002476917194130921, + "loss": 0.1459, + "step": 4635 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002476697792854345, + "loss": 0.211, + "step": 4636 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002476478355296311, + "loss": 0.1096, + "step": 4637 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024762588814649685, + "loss": 0.269, + "step": 4638 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002476039371368472, + "loss": 0.1464, + "step": 4639 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002475819825014975, + "loss": 0.1802, + "step": 4640 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002475600242412633, + "loss": 0.1766, + "step": 4641 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002475380623569603, + "loss": 0.1388, + "step": 4642 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002475160968494043, + "loss": 0.2625, + "step": 4643 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002474941277194113, + "loss": 0.0437, + "step": 4644 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002474721549677973, + "loss": 0.1918, + "step": 4645 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002474501785953786, + "loss": 0.0012, + "step": 4646 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002474281986029716, + "loss": 0.3198, + "step": 4647 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002474062149913926, + "loss": 0.3029, + "step": 4648 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002473842277614584, + "loss": 0.3896, + "step": 4649 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024736223691398577, + "loss": 0.2223, + "step": 4650 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002473402424497915, + "loss": 0.2823, + "step": 4651 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024731824436969273, + "loss": 0.165, + "step": 4652 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002472962426745065, + "loss": 0.1656, + "step": 4653 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002472742373650502, + "loss": 0.2319, + "step": 4654 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024725222844214123, + "loss": 0.1988, + "step": 4655 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002472302159065972, + "loss": 0.1534, + "step": 4656 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002472081997592357, + "loss": 0.145, + "step": 4657 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002471861800008747, + "loss": 0.2027, + "step": 4658 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024716415663233207, + "loss": 0.1636, + "step": 4659 + }, + { + "epoch": 1.41, + "learning_rate": 0.000247142129654426, + "loss": 0.2753, + "step": 4660 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002471200990679746, + "loss": 0.2453, + "step": 4661 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002470980648737964, + "loss": 0.1659, + "step": 4662 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024707602707270984, + "loss": 0.0909, + "step": 4663 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024705398566553347, + "loss": 0.1746, + "step": 4664 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024703194065308617, + "loss": 0.0105, + "step": 4665 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024700989203618685, + "loss": 0.2931, + "step": 4666 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024698783981565446, + "loss": 0.1265, + "step": 4667 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002469657839923083, + "loss": 0.1212, + "step": 4668 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002469437245669676, + "loss": 0.0003, + "step": 4669 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002469216615404518, + "loss": 0.1713, + "step": 4670 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002468995949135805, + "loss": 0.201, + "step": 4671 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024687752468717343, + "loss": 0.2364, + "step": 4672 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024685545086205037, + "loss": 0.3092, + "step": 4673 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024683337343903134, + "loss": 0.2201, + "step": 4674 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002468112924189365, + "loss": 0.2696, + "step": 4675 + }, + { + "epoch": 1.42, + "learning_rate": 0.000246789207802586, + "loss": 0.0685, + "step": 4676 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002467671195908002, + "loss": 0.2495, + "step": 4677 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024674502778439976, + "loss": 0.1893, + "step": 4678 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002467229323842052, + "loss": 0.2198, + "step": 4679 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024670083339103737, + "loss": 0.2061, + "step": 4680 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024667873080571717, + "loss": 0.2331, + "step": 4681 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002466566246290656, + "loss": 0.1093, + "step": 4682 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024663451486190384, + "loss": 0.2158, + "step": 4683 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002466124015050533, + "loss": 0.0633, + "step": 4684 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002465902845593353, + "loss": 0.2031, + "step": 4685 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002465681640255715, + "loss": 0.0305, + "step": 4686 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002465460399045836, + "loss": 0.2408, + "step": 4687 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002465239121971934, + "loss": 0.2876, + "step": 4688 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024650178090422295, + "loss": 0.2108, + "step": 4689 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002464796460264943, + "loss": 0.1216, + "step": 4690 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024645750756482974, + "loss": 0.2549, + "step": 4691 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002464353655200516, + "loss": 0.2445, + "step": 4692 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024641321989298247, + "loss": 0.1324, + "step": 4693 + }, + { + "epoch": 1.43, + "learning_rate": 0.000246391070684445, + "loss": 0.3039, + "step": 4694 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002463689178952618, + "loss": 0.2079, + "step": 4695 + }, + { + "epoch": 1.43, + "learning_rate": 0.000246346761526256, + "loss": 0.0796, + "step": 4696 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024632460157825045, + "loss": 0.1637, + "step": 4697 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024630243805206847, + "loss": 0.0894, + "step": 4698 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024628027094853334, + "loss": 0.105, + "step": 4699 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002462581002684685, + "loss": 0.2078, + "step": 4700 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024623592601269743, + "loss": 0.2091, + "step": 4701 + }, + { + "epoch": 1.43, + "learning_rate": 0.000246213748182044, + "loss": 0.1198, + "step": 4702 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002461915667773319, + "loss": 0.1615, + "step": 4703 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002461693817993852, + "loss": 0.2517, + "step": 4704 + }, + { + "epoch": 1.43, + "learning_rate": 0.000246147193249028, + "loss": 0.2487, + "step": 4705 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002461250011270845, + "loss": 0.2083, + "step": 4706 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024610280543437913, + "loss": 0.0739, + "step": 4707 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002460806061717363, + "loss": 0.229, + "step": 4708 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002460584033399808, + "loss": 0.2723, + "step": 4709 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002460361969399372, + "loss": 0.2879, + "step": 4710 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024601398697243055, + "loss": 0.1714, + "step": 4711 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024599177343828584, + "loss": 0.184, + "step": 4712 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002459695563383282, + "loss": 0.297, + "step": 4713 + }, + { + "epoch": 1.43, + "learning_rate": 0.000245947335673383, + "loss": 0.1436, + "step": 4714 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024592511144427566, + "loss": 0.1914, + "step": 4715 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002459028836518317, + "loss": 0.1877, + "step": 4716 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002458806522968769, + "loss": 0.1877, + "step": 4717 + }, + { + "epoch": 1.43, + "learning_rate": 0.000245858417380237, + "loss": 0.2509, + "step": 4718 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024583617890273804, + "loss": 0.1632, + "step": 4719 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024581393686520596, + "loss": 0.1159, + "step": 4720 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024579169126846714, + "loss": 0.1338, + "step": 4721 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024576944211334795, + "loss": 0.1281, + "step": 4722 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002457471894006748, + "loss": 0.1804, + "step": 4723 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024572493313127435, + "loss": 0.2675, + "step": 4724 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024570267330597335, + "loss": 0.212, + "step": 4725 + }, + { + "epoch": 1.43, + "learning_rate": 0.00024568040992559864, + "loss": 0.1415, + "step": 4726 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024565814299097735, + "loss": 0.1386, + "step": 4727 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002456358725029365, + "loss": 0.2778, + "step": 4728 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002456135984623034, + "loss": 0.1808, + "step": 4729 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002455913208699056, + "loss": 0.0621, + "step": 4730 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002455690397265705, + "loss": 0.2178, + "step": 4731 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024554675503312583, + "loss": 0.2311, + "step": 4732 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002455244667903994, + "loss": 0.0812, + "step": 4733 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002455021749992192, + "loss": 0.1104, + "step": 4734 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002454798796604132, + "loss": 0.1513, + "step": 4735 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002454575807748097, + "loss": 0.3219, + "step": 4736 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002454352783432369, + "loss": 0.1014, + "step": 4737 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002454129723665235, + "loss": 0.218, + "step": 4738 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002453906628454979, + "loss": 0.0814, + "step": 4739 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024536834978098894, + "loss": 0.1312, + "step": 4740 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002453460331738254, + "loss": 0.1668, + "step": 4741 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002453237130248364, + "loss": 0.2353, + "step": 4742 + }, + { + "epoch": 1.44, + "learning_rate": 0.000245301389334851, + "loss": 0.1795, + "step": 4743 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002452790621046984, + "loss": 0.1822, + "step": 4744 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024525673133520806, + "loss": 0.2078, + "step": 4745 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024523439702720944, + "loss": 0.2235, + "step": 4746 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002452120591815323, + "loss": 0.1947, + "step": 4747 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024518971779900636, + "loss": 0.0717, + "step": 4748 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024516737288046155, + "loss": 0.1344, + "step": 4749 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024514502442672795, + "loss": 0.1137, + "step": 4750 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024512267243863566, + "loss": 0.3489, + "step": 4751 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024510031691701503, + "loss": 0.1912, + "step": 4752 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002450779578626965, + "loss": 0.1609, + "step": 4753 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002450555952765107, + "loss": 0.1677, + "step": 4754 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002450332291592882, + "loss": 0.1153, + "step": 4755 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024501085951186, + "loss": 0.1126, + "step": 4756 + }, + { + "epoch": 1.44, + "learning_rate": 0.00024498848633505694, + "loss": 0.08, + "step": 4757 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002449661096297102, + "loss": 0.2, + "step": 4758 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002449437293966509, + "loss": 0.1255, + "step": 4759 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002449213456367106, + "loss": 0.0787, + "step": 4760 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002448989583507205, + "loss": 0.1561, + "step": 4761 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002448765675395125, + "loss": 0.1256, + "step": 4762 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024485417320391825, + "loss": 0.1889, + "step": 4763 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024483177534476955, + "loss": 0.1415, + "step": 4764 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024480937396289854, + "loss": 0.2955, + "step": 4765 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002447869690591373, + "loss": 0.2995, + "step": 4766 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002447645606343181, + "loss": 0.2161, + "step": 4767 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002447421486892734, + "loss": 0.1068, + "step": 4768 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002447197332248357, + "loss": 0.1811, + "step": 4769 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002446973142418376, + "loss": 0.0418, + "step": 4770 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024467489174111206, + "loss": 0.1224, + "step": 4771 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024465246572349193, + "loss": 0.2822, + "step": 4772 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024463003618981023, + "loss": 0.0779, + "step": 4773 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024460760314090016, + "loss": 0.3276, + "step": 4774 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024458516657759516, + "loss": 0.2812, + "step": 4775 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002445627265007285, + "loss": 0.1717, + "step": 4776 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002445402829111339, + "loss": 0.031, + "step": 4777 + }, + { + "epoch": 1.45, + "learning_rate": 0.000244517835809645, + "loss": 0.1756, + "step": 4778 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024449538519709564, + "loss": 0.2788, + "step": 4779 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002444729310743199, + "loss": 0.1941, + "step": 4780 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024445047344215175, + "loss": 0.127, + "step": 4781 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002444280123014255, + "loss": 0.1348, + "step": 4782 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024440554765297554, + "loss": 0.1381, + "step": 4783 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024438307949763627, + "loss": 0.2509, + "step": 4784 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024436060783624244, + "loss": 0.1676, + "step": 4785 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002443381326696287, + "loss": 0.2935, + "step": 4786 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024431565399862993, + "loss": 0.1558, + "step": 4787 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024429317182408125, + "loss": 0.2759, + "step": 4788 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024427068614681774, + "loss": 0.1641, + "step": 4789 + }, + { + "epoch": 1.45, + "learning_rate": 0.00024424819696767467, + "loss": 0.2955, + "step": 4790 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002442257042874875, + "loss": 0.0562, + "step": 4791 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002442032081070917, + "loss": 0.2184, + "step": 4792 + }, + { + "epoch": 1.46, + "learning_rate": 0.000244180708427323, + "loss": 0.1941, + "step": 4793 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002441582052490171, + "loss": 0.1586, + "step": 4794 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024413569857301005, + "loss": 0.1463, + "step": 4795 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024411318840013784, + "loss": 0.045, + "step": 4796 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024409067473123662, + "loss": 0.1648, + "step": 4797 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002440681575671428, + "loss": 0.1849, + "step": 4798 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024404563690869278, + "loss": 0.0893, + "step": 4799 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024402311275672313, + "loss": 0.188, + "step": 4800 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024400058511207055, + "loss": 0.1182, + "step": 4801 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002439780539755719, + "loss": 0.1262, + "step": 4802 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024395551934806414, + "loss": 0.0496, + "step": 4803 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002439329812303843, + "loss": 0.1644, + "step": 4804 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024391043962336973, + "loss": 0.2357, + "step": 4805 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024388789452785766, + "loss": 0.2399, + "step": 4806 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002438653459446857, + "loss": 0.1819, + "step": 4807 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024384279387469135, + "loss": 0.2445, + "step": 4808 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002438202383187124, + "loss": 0.0869, + "step": 4809 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024379767927758673, + "loss": 0.2523, + "step": 4810 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024377511675215236, + "loss": 0.108, + "step": 4811 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024375255074324736, + "loss": 0.1193, + "step": 4812 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024372998125171005, + "loss": 0.0545, + "step": 4813 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024370740827837881, + "loss": 0.2476, + "step": 4814 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002436848318240921, + "loss": 0.243, + "step": 4815 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024366225188968864, + "loss": 0.1162, + "step": 4816 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002436396684760072, + "loss": 0.1327, + "step": 4817 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024361708158388666, + "loss": 0.1892, + "step": 4818 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024359449121416602, + "loss": 0.2925, + "step": 4819 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024357189736768457, + "loss": 0.0239, + "step": 4820 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002435493000452815, + "loss": 0.2373, + "step": 4821 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002435266992477963, + "loss": 0.2658, + "step": 4822 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002435040949760684, + "loss": 0.1807, + "step": 4823 + }, + { + "epoch": 1.46, + "learning_rate": 0.00024348148723093765, + "loss": 0.2083, + "step": 4824 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024345887601324374, + "loss": 0.2167, + "step": 4825 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002434362613238267, + "loss": 0.263, + "step": 4826 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024341364316352647, + "loss": 0.1788, + "step": 4827 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002433910215331834, + "loss": 0.28, + "step": 4828 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024336839643363773, + "loss": 0.2452, + "step": 4829 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002433457678657299, + "loss": 0.1462, + "step": 4830 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024332313583030058, + "loss": 0.1457, + "step": 4831 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024330050032819045, + "loss": 0.2081, + "step": 4832 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002432778613602403, + "loss": 0.1631, + "step": 4833 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002432552189272911, + "loss": 0.219, + "step": 4834 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024323257303018403, + "loss": 0.1134, + "step": 4835 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024320992366976023, + "loss": 0.1245, + "step": 4836 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024318727084686115, + "loss": 0.0891, + "step": 4837 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024316461456232817, + "loss": 0.1657, + "step": 4838 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024314195481700302, + "loss": 0.2089, + "step": 4839 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024311929161172733, + "loss": 0.2128, + "step": 4840 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002430966249473431, + "loss": 0.2262, + "step": 4841 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024307395482469222, + "loss": 0.3254, + "step": 4842 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024305128124461683, + "loss": 0.1541, + "step": 4843 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024302860420795925, + "loss": 0.213, + "step": 4844 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002430059237155618, + "loss": 0.2919, + "step": 4845 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024298323976826706, + "loss": 0.1862, + "step": 4846 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002429605523669176, + "loss": 0.1482, + "step": 4847 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024293786151235623, + "loss": 0.2905, + "step": 4848 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024291516720542587, + "loss": 0.3043, + "step": 4849 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024289246944696945, + "loss": 0.1277, + "step": 4850 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024286976823783028, + "loss": 0.1878, + "step": 4851 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024284706357885152, + "loss": 0.1704, + "step": 4852 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024282435547087658, + "loss": 0.1191, + "step": 4853 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024280164391474909, + "loss": 0.0757, + "step": 4854 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024277892891131264, + "loss": 0.2662, + "step": 4855 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024275621046141108, + "loss": 0.1093, + "step": 4856 + }, + { + "epoch": 1.47, + "learning_rate": 0.00024273348856588832, + "loss": 0.1464, + "step": 4857 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024271076322558835, + "loss": 0.1478, + "step": 4858 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002426880344413554, + "loss": 0.2105, + "step": 4859 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002426653022140338, + "loss": 0.0678, + "step": 4860 + }, + { + "epoch": 1.48, + "learning_rate": 0.000242642566544468, + "loss": 0.1581, + "step": 4861 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024261982743350248, + "loss": 0.0523, + "step": 4862 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024259708488198197, + "loss": 0.27, + "step": 4863 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002425743388907513, + "loss": 0.2369, + "step": 4864 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002425515894606554, + "loss": 0.1578, + "step": 4865 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024252883659253938, + "loss": 0.1166, + "step": 4866 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002425060802872484, + "loss": 0.2162, + "step": 4867 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024248332054562784, + "loss": 0.1946, + "step": 4868 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002424605573685231, + "loss": 0.0479, + "step": 4869 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024243779075677982, + "loss": 0.1106, + "step": 4870 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024241502071124365, + "loss": 0.1466, + "step": 4871 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024239224723276046, + "loss": 0.0656, + "step": 4872 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024236947032217626, + "loss": 0.1563, + "step": 4873 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002423466899803371, + "loss": 0.1884, + "step": 4874 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002423239062080892, + "loss": 0.0719, + "step": 4875 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024230111900627892, + "loss": 0.1252, + "step": 4876 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002422783283757528, + "loss": 0.2118, + "step": 4877 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002422555343173573, + "loss": 0.1459, + "step": 4878 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002422327368319393, + "loss": 0.1147, + "step": 4879 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024220993592034552, + "loss": 0.2229, + "step": 4880 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024218713158342308, + "loss": 0.2909, + "step": 4881 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024216432382201904, + "loss": 0.2582, + "step": 4882 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024214151263698063, + "loss": 0.1231, + "step": 4883 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002421186980291552, + "loss": 0.1089, + "step": 4884 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002420958799993903, + "loss": 0.2527, + "step": 4885 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024207305854853355, + "loss": 0.1949, + "step": 4886 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024205023367743263, + "loss": 0.1793, + "step": 4887 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002420274053869355, + "loss": 0.2496, + "step": 4888 + }, + { + "epoch": 1.48, + "learning_rate": 0.00024200457367789011, + "loss": 0.1955, + "step": 4889 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002419817385511446, + "loss": 0.1759, + "step": 4890 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024195890000754724, + "loss": 0.2314, + "step": 4891 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024193605804794646, + "loss": 0.1863, + "step": 4892 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024191321267319065, + "loss": 0.0854, + "step": 4893 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002418903638841286, + "loss": 0.1258, + "step": 4894 + }, + { + "epoch": 1.49, + "learning_rate": 0.000241867511681609, + "loss": 0.0963, + "step": 4895 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002418446560664806, + "loss": 0.1528, + "step": 4896 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024182179703959272, + "loss": 0.2272, + "step": 4897 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024179893460179428, + "loss": 0.1176, + "step": 4898 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002417760687539346, + "loss": 0.0736, + "step": 4899 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002417531994968631, + "loss": 0.1079, + "step": 4900 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024173032683142933, + "loss": 0.202, + "step": 4901 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024170745075848292, + "loss": 0.2651, + "step": 4902 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024168457127887364, + "loss": 0.1272, + "step": 4903 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002416616883934514, + "loss": 0.2007, + "step": 4904 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024163880210306622, + "loss": 0.1778, + "step": 4905 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024161591240856825, + "loss": 0.3051, + "step": 4906 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024159301931080782, + "loss": 0.0878, + "step": 4907 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002415701228106353, + "loss": 0.1748, + "step": 4908 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024154722290890127, + "loss": 0.0855, + "step": 4909 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024152431960645636, + "loss": 0.1633, + "step": 4910 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024150141290415136, + "loss": 0.0559, + "step": 4911 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002414785028028372, + "loss": 0.1372, + "step": 4912 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024145558930336493, + "loss": 0.1148, + "step": 4913 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024143267240658568, + "loss": 0.1413, + "step": 4914 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024140975211335084, + "loss": 0.1947, + "step": 4915 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002413868284245117, + "loss": 0.0605, + "step": 4916 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002413639013409199, + "loss": 0.2568, + "step": 4917 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002413409708634271, + "loss": 0.161, + "step": 4918 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024131803699288504, + "loss": 0.1572, + "step": 4919 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024129509973014576, + "loss": 0.2067, + "step": 4920 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002412721590760612, + "loss": 0.172, + "step": 4921 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024124921503148358, + "loss": 0.0394, + "step": 4922 + }, + { + "epoch": 1.49, + "learning_rate": 0.00024122626759726524, + "loss": 0.2853, + "step": 4923 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024120331677425855, + "loss": 0.2808, + "step": 4924 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024118036256331609, + "loss": 0.1654, + "step": 4925 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024115740496529056, + "loss": 0.2078, + "step": 4926 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024113444398103474, + "loss": 0.1198, + "step": 4927 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002411114796114016, + "loss": 0.1519, + "step": 4928 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024108851185724415, + "loss": 0.1121, + "step": 4929 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002410655407194156, + "loss": 0.1119, + "step": 4930 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024104256619876928, + "loss": 0.1005, + "step": 4931 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002410195882961586, + "loss": 0.2238, + "step": 4932 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002409966070124371, + "loss": 0.2841, + "step": 4933 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024097362234845853, + "loss": 0.0564, + "step": 4934 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024095063430507667, + "loss": 0.1973, + "step": 4935 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024092764288314545, + "loss": 0.2603, + "step": 4936 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002409046480835189, + "loss": 0.2984, + "step": 4937 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024088164990705127, + "loss": 0.2175, + "step": 4938 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002408586483545969, + "loss": 0.2486, + "step": 4939 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024083564342701014, + "loss": 0.1566, + "step": 4940 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002408126351251456, + "loss": 0.2282, + "step": 4941 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024078962344985797, + "loss": 0.2482, + "step": 4942 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002407666084020021, + "loss": 0.2571, + "step": 4943 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024074358998243283, + "loss": 0.2135, + "step": 4944 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024072056819200537, + "loss": 0.1883, + "step": 4945 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024069754303157481, + "loss": 0.2808, + "step": 4946 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024067451450199647, + "loss": 0.2682, + "step": 4947 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024065148260412586, + "loss": 0.1381, + "step": 4948 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002406284473388185, + "loss": 0.1786, + "step": 4949 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024060540870693005, + "loss": 0.1161, + "step": 4950 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024058236670931638, + "loss": 0.242, + "step": 4951 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024055932134683345, + "loss": 0.166, + "step": 4952 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002405362726203373, + "loss": 0.1391, + "step": 4953 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024051322053068403, + "loss": 0.1153, + "step": 4954 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002404901650787301, + "loss": 0.2139, + "step": 4955 + }, + { + "epoch": 1.5, + "learning_rate": 0.00024046710626533192, + "loss": 0.1491, + "step": 4956 + }, + { + "epoch": 1.51, + "learning_rate": 0.000240444044091346, + "loss": 0.2523, + "step": 4957 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024042097855762908, + "loss": 0.0984, + "step": 4958 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024039790966503795, + "loss": 0.1191, + "step": 4959 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002403748374144296, + "loss": 0.2819, + "step": 4960 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024035176180666105, + "loss": 0.1829, + "step": 4961 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024032868284258949, + "loss": 0.2079, + "step": 4962 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024030560052307225, + "loss": 0.1978, + "step": 4963 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024028251484896676, + "loss": 0.214, + "step": 4964 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024025942582113065, + "loss": 0.149, + "step": 4965 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024023633344042147, + "loss": 0.1049, + "step": 4966 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002402132377076972, + "loss": 0.1477, + "step": 4967 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024019013862381568, + "loss": 0.2492, + "step": 4968 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002401670361896349, + "loss": 0.2476, + "step": 4969 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024014393040601323, + "loss": 0.1432, + "step": 4970 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024012082127380886, + "loss": 0.2337, + "step": 4971 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002400977087938803, + "loss": 0.1171, + "step": 4972 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024007459296708606, + "loss": 0.1095, + "step": 4973 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024005147379428475, + "loss": 0.1287, + "step": 4974 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024002835127633532, + "loss": 0.1915, + "step": 4975 + }, + { + "epoch": 1.51, + "learning_rate": 0.00024000522541409668, + "loss": 0.0828, + "step": 4976 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002399820962084278, + "loss": 0.1927, + "step": 4977 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002399589636601879, + "loss": 0.0768, + "step": 4978 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002399358277702364, + "loss": 0.172, + "step": 4979 + }, + { + "epoch": 1.51, + "learning_rate": 0.00023991268853943256, + "loss": 0.2276, + "step": 4980 + }, + { + "epoch": 1.51, + "learning_rate": 0.00023988954596863596, + "loss": 0.2071, + "step": 4981 + }, + { + "epoch": 1.51, + "learning_rate": 0.00023986640005870641, + "loss": 0.2261, + "step": 4982 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002398432508105036, + "loss": 0.2018, + "step": 4983 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002398200982248875, + "loss": 0.2071, + "step": 4984 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002397969423027181, + "loss": 0.304, + "step": 4985 + }, + { + "epoch": 1.51, + "learning_rate": 0.00023977378304485566, + "loss": 0.1309, + "step": 4986 + }, + { + "epoch": 1.51, + "learning_rate": 0.00023975062045216046, + "loss": 0.2296, + "step": 4987 + }, + { + "epoch": 1.51, + "learning_rate": 0.00023972745452549284, + "loss": 0.1748, + "step": 4988 + }, + { + "epoch": 1.51, + "learning_rate": 0.00023970428526571346, + "loss": 0.2961, + "step": 4989 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002396811126736829, + "loss": 0.2186, + "step": 4990 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023965793675026205, + "loss": 0.1569, + "step": 4991 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023963475749631173, + "loss": 0.0596, + "step": 4992 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023961157491269303, + "loss": 0.1762, + "step": 4993 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023958838900026708, + "loss": 0.0936, + "step": 4994 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023956519975989524, + "loss": 0.1699, + "step": 4995 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023954200719243884, + "loss": 0.2865, + "step": 4996 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023951881129875946, + "loss": 0.1818, + "step": 4997 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023949561207971875, + "loss": 0.2496, + "step": 4998 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002394724095361785, + "loss": 0.2167, + "step": 4999 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023944920366900061, + "loss": 0.0765, + "step": 5000 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023942599447904713, + "loss": 0.245, + "step": 5001 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023940278196718016, + "loss": 0.1438, + "step": 5002 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023937956613426201, + "loss": 0.2264, + "step": 5003 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002393563469811551, + "loss": 0.2475, + "step": 5004 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023933312450872196, + "loss": 0.1016, + "step": 5005 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002393098987178252, + "loss": 0.2269, + "step": 5006 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023928666960932755, + "loss": 0.153, + "step": 5007 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023926343718409202, + "loss": 0.0776, + "step": 5008 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023924020144298154, + "loss": 0.1575, + "step": 5009 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023921696238685925, + "loss": 0.2811, + "step": 5010 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023919372001658846, + "loss": 0.2035, + "step": 5011 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023917047433303254, + "loss": 0.3496, + "step": 5012 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023914722533705494, + "loss": 0.2451, + "step": 5013 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002391239730295194, + "loss": 0.2767, + "step": 5014 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023910071741128954, + "loss": 0.1617, + "step": 5015 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023907745848322938, + "loss": 0.1488, + "step": 5016 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023905419624620285, + "loss": 0.2379, + "step": 5017 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023903093070107404, + "loss": 0.2984, + "step": 5018 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023900766184870725, + "loss": 0.1839, + "step": 5019 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023898438968996685, + "loss": 0.3119, + "step": 5020 + }, + { + "epoch": 1.52, + "learning_rate": 0.00023896111422571734, + "loss": 0.2784, + "step": 5021 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002389378354568233, + "loss": 0.1766, + "step": 5022 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023891455338414941, + "loss": 0.238, + "step": 5023 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023889126800856066, + "loss": 0.2001, + "step": 5024 + }, + { + "epoch": 1.53, + "learning_rate": 0.000238867979330922, + "loss": 0.1802, + "step": 5025 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023884468735209846, + "loss": 0.2057, + "step": 5026 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023882139207295536, + "loss": 0.136, + "step": 5027 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023879809349435798, + "loss": 0.2949, + "step": 5028 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023877479161717183, + "loss": 0.1228, + "step": 5029 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023875148644226255, + "loss": 0.1453, + "step": 5030 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023872817797049572, + "loss": 0.1593, + "step": 5031 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023870486620273737, + "loss": 0.2175, + "step": 5032 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002386815511398533, + "loss": 0.0797, + "step": 5033 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023865823278270966, + "loss": 0.1547, + "step": 5034 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023863491113217269, + "loss": 0.2597, + "step": 5035 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023861158618910868, + "loss": 0.1262, + "step": 5036 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002385882579543841, + "loss": 0.1672, + "step": 5037 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002385649264288655, + "loss": 0.1981, + "step": 5038 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023854159161341962, + "loss": 0.1412, + "step": 5039 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023851825350891323, + "loss": 0.185, + "step": 5040 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002384949121162133, + "loss": 0.2069, + "step": 5041 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023847156743618693, + "loss": 0.1512, + "step": 5042 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023844821946970122, + "loss": 0.2143, + "step": 5043 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023842486821762357, + "loss": 0.1733, + "step": 5044 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023840151368082139, + "loss": 0.1687, + "step": 5045 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002383781558601622, + "loss": 0.1792, + "step": 5046 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023835479475651367, + "loss": 0.1464, + "step": 5047 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002383314303707436, + "loss": 0.2294, + "step": 5048 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002383080627037199, + "loss": 0.0703, + "step": 5049 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023828469175631072, + "loss": 0.2708, + "step": 5050 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023826131752938405, + "loss": 0.1855, + "step": 5051 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023823794002380828, + "loss": 0.2047, + "step": 5052 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023821455924045182, + "loss": 0.1857, + "step": 5053 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023819117518018317, + "loss": 0.2015, + "step": 5054 + }, + { + "epoch": 1.53, + "learning_rate": 0.00023816778784387094, + "loss": 0.1357, + "step": 5055 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023814439723238398, + "loss": 0.1997, + "step": 5056 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023812100334659114, + "loss": 0.1263, + "step": 5057 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023809760618736142, + "loss": 0.0634, + "step": 5058 + }, + { + "epoch": 1.54, + "learning_rate": 0.000238074205755564, + "loss": 0.0923, + "step": 5059 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002380508020520681, + "loss": 0.289, + "step": 5060 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023802739507774307, + "loss": 0.0592, + "step": 5061 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002380039848334585, + "loss": 0.2292, + "step": 5062 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023798057132008393, + "loss": 0.1386, + "step": 5063 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023795715453848914, + "loss": 0.0387, + "step": 5064 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023793373448954401, + "loss": 0.1399, + "step": 5065 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023791031117411854, + "loss": 0.1914, + "step": 5066 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002378868845930828, + "loss": 0.1998, + "step": 5067 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023786345474730692, + "loss": 0.1886, + "step": 5068 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023784002163766143, + "loss": 0.2196, + "step": 5069 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023781658526501675, + "loss": 0.2483, + "step": 5070 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002377931456302434, + "loss": 0.1262, + "step": 5071 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023776970273421216, + "loss": 0.1282, + "step": 5072 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023774625657779387, + "loss": 0.2021, + "step": 5073 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002377228071618594, + "loss": 0.1006, + "step": 5074 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023769935448727996, + "loss": 0.0727, + "step": 5075 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002376758985549266, + "loss": 0.2057, + "step": 5076 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023765243936567078, + "loss": 0.0388, + "step": 5077 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023762897692038387, + "loss": 0.1072, + "step": 5078 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002376055112199374, + "loss": 0.1219, + "step": 5079 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023758204226520315, + "loss": 0.065, + "step": 5080 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023755857005705282, + "loss": 0.0591, + "step": 5081 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023753509459635834, + "loss": 0.2352, + "step": 5082 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023751161588399185, + "loss": 0.1683, + "step": 5083 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002374881339208254, + "loss": 0.1731, + "step": 5084 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023746464870773144, + "loss": 0.2199, + "step": 5085 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023744116024558215, + "loss": 0.2127, + "step": 5086 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002374176685352502, + "loss": 0.3046, + "step": 5087 + }, + { + "epoch": 1.54, + "learning_rate": 0.00023739417357760827, + "loss": 0.1148, + "step": 5088 + }, + { + "epoch": 1.55, + "learning_rate": 0.000237370675373529, + "loss": 0.0875, + "step": 5089 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023734717392388541, + "loss": 0.2635, + "step": 5090 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023732366922955044, + "loss": 0.0394, + "step": 5091 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002373001612913972, + "loss": 0.1275, + "step": 5092 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023727665011029903, + "loss": 0.1585, + "step": 5093 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002372531356871292, + "loss": 0.1909, + "step": 5094 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023722961802276126, + "loss": 0.1861, + "step": 5095 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002372060971180688, + "loss": 0.0451, + "step": 5096 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023718257297392556, + "loss": 0.1527, + "step": 5097 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023715904559120545, + "loss": 0.1409, + "step": 5098 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023713551497078232, + "loss": 0.0657, + "step": 5099 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023711198111353037, + "loss": 0.2353, + "step": 5100 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023708844402032375, + "loss": 0.1664, + "step": 5101 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002370649036920368, + "loss": 0.3046, + "step": 5102 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002370413601295441, + "loss": 0.1967, + "step": 5103 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023701781333372002, + "loss": 0.2013, + "step": 5104 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023699426330543933, + "loss": 0.0777, + "step": 5105 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023697071004557692, + "loss": 0.25, + "step": 5106 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023694715355500768, + "loss": 0.1802, + "step": 5107 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023692359383460665, + "loss": 0.1548, + "step": 5108 + }, + { + "epoch": 1.55, + "learning_rate": 0.000236900030885249, + "loss": 0.3857, + "step": 5109 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023687646470781, + "loss": 0.2961, + "step": 5110 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023685289530316517, + "loss": 0.2265, + "step": 5111 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002368293226721899, + "loss": 0.1199, + "step": 5112 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023680574681575998, + "loss": 0.3478, + "step": 5113 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002367821677347511, + "loss": 0.1874, + "step": 5114 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023675858543003916, + "loss": 0.1617, + "step": 5115 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002367349999025002, + "loss": 0.2252, + "step": 5116 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023671141115301034, + "loss": 0.2811, + "step": 5117 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023668781918244585, + "loss": 0.0342, + "step": 5118 + }, + { + "epoch": 1.55, + "learning_rate": 0.00023666422399168307, + "loss": 0.1806, + "step": 5119 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002366406255815985, + "loss": 0.2179, + "step": 5120 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002366170239530688, + "loss": 0.2683, + "step": 5121 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023659341910697062, + "loss": 0.1873, + "step": 5122 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023656981104418088, + "loss": 0.2122, + "step": 5123 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002365461997655765, + "loss": 0.2207, + "step": 5124 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023652258527203458, + "loss": 0.2362, + "step": 5125 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002364989675644324, + "loss": 0.1312, + "step": 5126 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002364753466436472, + "loss": 0.1584, + "step": 5127 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023645172251055645, + "loss": 0.17, + "step": 5128 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023642809516603776, + "loss": 0.1733, + "step": 5129 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002364044646109687, + "loss": 0.2235, + "step": 5130 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023638083084622728, + "loss": 0.2873, + "step": 5131 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002363571938726912, + "loss": 0.1828, + "step": 5132 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023633355369123867, + "loss": 0.1316, + "step": 5133 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023630991030274776, + "loss": 0.1998, + "step": 5134 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002362862637080968, + "loss": 0.0698, + "step": 5135 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023626261390816418, + "loss": 0.1407, + "step": 5136 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002362389609038284, + "loss": 0.1295, + "step": 5137 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002362153046959681, + "loss": 0.2411, + "step": 5138 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023619164528546207, + "loss": 0.1412, + "step": 5139 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002361679826731892, + "loss": 0.1354, + "step": 5140 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023614431686002846, + "loss": 0.2557, + "step": 5141 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002361206478468589, + "loss": 0.1248, + "step": 5142 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023609697563455986, + "loss": 0.2325, + "step": 5143 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002360733002240107, + "loss": 0.1865, + "step": 5144 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002360496216160908, + "loss": 0.1871, + "step": 5145 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023602593981167982, + "loss": 0.1527, + "step": 5146 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002360022548116574, + "loss": 0.2027, + "step": 5147 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023597856661690343, + "loss": 0.2909, + "step": 5148 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023595487522829785, + "loss": 0.0884, + "step": 5149 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002359311806467207, + "loss": 0.1674, + "step": 5150 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002359074828730522, + "loss": 0.2346, + "step": 5151 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002358837819081726, + "loss": 0.1567, + "step": 5152 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002358600777529624, + "loss": 0.2658, + "step": 5153 + }, + { + "epoch": 1.56, + "learning_rate": 0.00023583637040830205, + "loss": 0.1875, + "step": 5154 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023581265987507224, + "loss": 0.2446, + "step": 5155 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023578894615415384, + "loss": 0.2497, + "step": 5156 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002357652292464276, + "loss": 0.0495, + "step": 5157 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002357415091527746, + "loss": 0.2376, + "step": 5158 + }, + { + "epoch": 1.57, + "learning_rate": 0.000235717785874076, + "loss": 0.1314, + "step": 5159 + }, + { + "epoch": 1.57, + "learning_rate": 0.000235694059411213, + "loss": 0.1556, + "step": 5160 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023567032976506698, + "loss": 0.3135, + "step": 5161 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023564659693651945, + "loss": 0.2418, + "step": 5162 + }, + { + "epoch": 1.57, + "learning_rate": 0.000235622860926452, + "loss": 0.1955, + "step": 5163 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002355991217357464, + "loss": 0.1771, + "step": 5164 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023557537936528438, + "loss": 0.1426, + "step": 5165 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023555163381594794, + "loss": 0.2492, + "step": 5166 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023552788508861925, + "loss": 0.2686, + "step": 5167 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002355041331841804, + "loss": 0.244, + "step": 5168 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002354803781035138, + "loss": 0.2981, + "step": 5169 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023545661984750174, + "loss": 0.1789, + "step": 5170 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002354328584170269, + "loss": 0.2373, + "step": 5171 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023540909381297189, + "loss": 0.2449, + "step": 5172 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002353853260362195, + "loss": 0.1628, + "step": 5173 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002353615550876526, + "loss": 0.1879, + "step": 5174 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002353377809681543, + "loss": 0.0188, + "step": 5175 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002353140036786077, + "loss": 0.0523, + "step": 5176 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023529022321989602, + "loss": 0.0973, + "step": 5177 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023526643959290264, + "loss": 0.2983, + "step": 5178 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002352426527985111, + "loss": 0.2471, + "step": 5179 + }, + { + "epoch": 1.57, + "learning_rate": 0.000235218862837605, + "loss": 0.1444, + "step": 5180 + }, + { + "epoch": 1.57, + "learning_rate": 0.000235195069711068, + "loss": 0.0865, + "step": 5181 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023517127341978402, + "loss": 0.1156, + "step": 5182 + }, + { + "epoch": 1.57, + "learning_rate": 0.000235147473964637, + "loss": 0.2391, + "step": 5183 + }, + { + "epoch": 1.57, + "learning_rate": 0.000235123671346511, + "loss": 0.1601, + "step": 5184 + }, + { + "epoch": 1.57, + "learning_rate": 0.00023509986556629023, + "loss": 0.1615, + "step": 5185 + }, + { + "epoch": 1.57, + "learning_rate": 0.000235076056624859, + "loss": 0.1859, + "step": 5186 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002350522445231018, + "loss": 0.0989, + "step": 5187 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023502842926190304, + "loss": 0.1609, + "step": 5188 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023500461084214756, + "loss": 0.1585, + "step": 5189 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023498078926472, + "loss": 0.1263, + "step": 5190 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023495696453050536, + "loss": 0.1606, + "step": 5191 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023493313664038864, + "loss": 0.1483, + "step": 5192 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002349093055952549, + "loss": 0.3372, + "step": 5193 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002348854713959895, + "loss": 0.1067, + "step": 5194 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023486163404347774, + "loss": 0.1232, + "step": 5195 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023483779353860513, + "loss": 0.2353, + "step": 5196 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002348139498822573, + "loss": 0.2022, + "step": 5197 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023479010307531992, + "loss": 0.3067, + "step": 5198 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023476625311867882, + "loss": 0.1425, + "step": 5199 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023474240001322005, + "loss": 0.1639, + "step": 5200 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023471854375982967, + "loss": 0.0642, + "step": 5201 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023469468435939375, + "loss": 0.1281, + "step": 5202 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023467082181279867, + "loss": 0.2481, + "step": 5203 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023464695612093086, + "loss": 0.2231, + "step": 5204 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002346230872846769, + "loss": 0.1351, + "step": 5205 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023459921530492333, + "loss": 0.1821, + "step": 5206 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023457534018255706, + "loss": 0.2734, + "step": 5207 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023455146191846487, + "loss": 0.1711, + "step": 5208 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002345275805135338, + "loss": 0.1194, + "step": 5209 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023450369596865106, + "loss": 0.1934, + "step": 5210 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023447980828470376, + "loss": 0.0135, + "step": 5211 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002344559174625794, + "loss": 0.2411, + "step": 5212 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002344320235031653, + "loss": 0.1751, + "step": 5213 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023440812640734913, + "loss": 0.2132, + "step": 5214 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002343842261760186, + "loss": 0.2038, + "step": 5215 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002343603228100615, + "loss": 0.1714, + "step": 5216 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023433641631036585, + "loss": 0.1407, + "step": 5217 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023431250667781958, + "loss": 0.1532, + "step": 5218 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023428859391331095, + "loss": 0.2543, + "step": 5219 + }, + { + "epoch": 1.58, + "learning_rate": 0.00023426467801772826, + "loss": 0.1301, + "step": 5220 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023424075899195984, + "loss": 0.2581, + "step": 5221 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002342168368368943, + "loss": 0.1401, + "step": 5222 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023419291155342019, + "loss": 0.1674, + "step": 5223 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023416898314242637, + "loss": 0.1811, + "step": 5224 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002341450516048016, + "loss": 0.1314, + "step": 5225 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023412111694143492, + "loss": 0.2705, + "step": 5226 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002340971791532155, + "loss": 0.1528, + "step": 5227 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023407323824103238, + "loss": 0.2879, + "step": 5228 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023404929420577504, + "loss": 0.2027, + "step": 5229 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023402534704833293, + "loss": 0.178, + "step": 5230 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023400139676959554, + "loss": 0.1783, + "step": 5231 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002339774433704526, + "loss": 0.1422, + "step": 5232 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023395348685179392, + "loss": 0.2072, + "step": 5233 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023392952721450937, + "loss": 0.1984, + "step": 5234 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023390556445948904, + "loss": 0.2829, + "step": 5235 + }, + { + "epoch": 1.59, + "learning_rate": 0.000233881598587623, + "loss": 0.213, + "step": 5236 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023385762959980154, + "loss": 0.2701, + "step": 5237 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023383365749691514, + "loss": 0.2015, + "step": 5238 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023380968227985408, + "loss": 0.1391, + "step": 5239 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002337857039495092, + "loss": 0.1557, + "step": 5240 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002337617225067711, + "loss": 0.2871, + "step": 5241 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023373773795253057, + "loss": 0.319, + "step": 5242 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002337137502876787, + "loss": 0.1167, + "step": 5243 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002336897595131065, + "loss": 0.1444, + "step": 5244 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002336657656297051, + "loss": 0.1879, + "step": 5245 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023364176863836588, + "loss": 0.2903, + "step": 5246 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023361776853998023, + "loss": 0.1307, + "step": 5247 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023359376533543967, + "loss": 0.1924, + "step": 5248 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002335697590256359, + "loss": 0.0748, + "step": 5249 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023354574961146065, + "loss": 0.0964, + "step": 5250 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023352173709380577, + "loss": 0.1264, + "step": 5251 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023349772147356328, + "loss": 0.1918, + "step": 5252 + }, + { + "epoch": 1.59, + "learning_rate": 0.00023347370275162533, + "loss": 0.1046, + "step": 5253 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023344968092888405, + "loss": 0.2906, + "step": 5254 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002334256560062319, + "loss": 0.1945, + "step": 5255 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002334016279845612, + "loss": 0.1806, + "step": 5256 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023337759686476464, + "loss": 0.214, + "step": 5257 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023335356264773488, + "loss": 0.1464, + "step": 5258 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023332952533436466, + "loss": 0.0836, + "step": 5259 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023330548492554696, + "loss": 0.2864, + "step": 5260 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023328144142217483, + "loss": 0.1875, + "step": 5261 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002332573948251413, + "loss": 0.3533, + "step": 5262 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002332333451353397, + "loss": 0.1492, + "step": 5263 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023320929235366342, + "loss": 0.0532, + "step": 5264 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023318523648100596, + "loss": 0.1438, + "step": 5265 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002331611775182609, + "loss": 0.2041, + "step": 5266 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023313711546632196, + "loss": 0.1008, + "step": 5267 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023311305032608297, + "loss": 0.2681, + "step": 5268 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002330889820984379, + "loss": 0.1793, + "step": 5269 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002330649107842808, + "loss": 0.2443, + "step": 5270 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023304083638450582, + "loss": 0.0368, + "step": 5271 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002330167589000073, + "loss": 0.0445, + "step": 5272 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023299267833167968, + "loss": 0.214, + "step": 5273 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023296859468041736, + "loss": 0.2037, + "step": 5274 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023294450794711507, + "loss": 0.1704, + "step": 5275 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023292041813266755, + "loss": 0.1731, + "step": 5276 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023289632523796968, + "loss": 0.2301, + "step": 5277 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002328722292639164, + "loss": 0.2958, + "step": 5278 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023284813021140282, + "loss": 0.1397, + "step": 5279 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023282402808132415, + "loss": 0.1707, + "step": 5280 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023279992287457573, + "loss": 0.1606, + "step": 5281 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023277581459205297, + "loss": 0.0472, + "step": 5282 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023275170323465145, + "loss": 0.161, + "step": 5283 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023272758880326681, + "loss": 0.2294, + "step": 5284 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023270347129879487, + "loss": 0.1491, + "step": 5285 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023267935072213147, + "loss": 0.2489, + "step": 5286 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023265522707417266, + "loss": 0.3624, + "step": 5287 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023263110035581455, + "loss": 0.1972, + "step": 5288 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023260697056795335, + "loss": 0.1642, + "step": 5289 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023258283771148547, + "loss": 0.1297, + "step": 5290 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023255870178730734, + "loss": 0.1705, + "step": 5291 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002325345627963155, + "loss": 0.065, + "step": 5292 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023251042073940667, + "loss": 0.1461, + "step": 5293 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023248627561747777, + "loss": 0.0694, + "step": 5294 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002324621274314255, + "loss": 0.0739, + "step": 5295 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023243797618214713, + "loss": 0.1937, + "step": 5296 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023241382187053961, + "loss": 0.1993, + "step": 5297 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002323896644975003, + "loss": 0.0703, + "step": 5298 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023236550406392656, + "loss": 0.0961, + "step": 5299 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023234134057071587, + "loss": 0.1755, + "step": 5300 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023231717401876587, + "loss": 0.2523, + "step": 5301 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023229300440897425, + "loss": 0.2397, + "step": 5302 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023226883174223882, + "loss": 0.1704, + "step": 5303 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023224465601945753, + "loss": 0.1975, + "step": 5304 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002322204772415285, + "loss": 0.2197, + "step": 5305 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002321962954093498, + "loss": 0.196, + "step": 5306 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023217211052381978, + "loss": 0.1307, + "step": 5307 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023214792258583679, + "loss": 0.197, + "step": 5308 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023212373159629942, + "loss": 0.3414, + "step": 5309 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023209953755610622, + "loss": 0.221, + "step": 5310 + }, + { + "epoch": 1.61, + "learning_rate": 0.000232075340466156, + "loss": 0.2398, + "step": 5311 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002320511403273475, + "loss": 0.1026, + "step": 5312 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002320269371405798, + "loss": 0.2256, + "step": 5313 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002320027309067519, + "loss": 0.0585, + "step": 5314 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023197852162676306, + "loss": 0.1671, + "step": 5315 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023195430930151247, + "loss": 0.1523, + "step": 5316 + }, + { + "epoch": 1.61, + "learning_rate": 0.00023193009393189968, + "loss": 0.1844, + "step": 5317 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002319058755188241, + "loss": 0.237, + "step": 5318 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002318816540631855, + "loss": 0.2037, + "step": 5319 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023185742956588354, + "loss": 0.288, + "step": 5320 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002318332020278181, + "loss": 0.1655, + "step": 5321 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002318089714498892, + "loss": 0.2336, + "step": 5322 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002317847378329969, + "loss": 0.0607, + "step": 5323 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002317605011780414, + "loss": 0.1321, + "step": 5324 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023173626148592305, + "loss": 0.1516, + "step": 5325 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002317120187575423, + "loss": 0.1435, + "step": 5326 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023168777299379967, + "loss": 0.2422, + "step": 5327 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002316635241955958, + "loss": 0.1954, + "step": 5328 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023163927236383153, + "loss": 0.1268, + "step": 5329 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023161501749940764, + "loss": 0.1842, + "step": 5330 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023159075960322518, + "loss": 0.1777, + "step": 5331 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002315664986761853, + "loss": 0.1329, + "step": 5332 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023154223471918915, + "loss": 0.271, + "step": 5333 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023151796773313816, + "loss": 0.1516, + "step": 5334 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023149369771893372, + "loss": 0.1383, + "step": 5335 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002314694246774773, + "loss": 0.0993, + "step": 5336 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023144514860967073, + "loss": 0.2177, + "step": 5337 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023142086951641571, + "loss": 0.2265, + "step": 5338 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023139658739861416, + "loss": 0.2654, + "step": 5339 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023137230225716806, + "loss": 0.2804, + "step": 5340 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023134801409297959, + "loss": 0.1506, + "step": 5341 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002313237229069509, + "loss": 0.1795, + "step": 5342 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002312994286999844, + "loss": 0.2252, + "step": 5343 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023127513147298256, + "loss": 0.1698, + "step": 5344 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023125083122684792, + "loss": 0.1044, + "step": 5345 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002312265279624831, + "loss": 0.1911, + "step": 5346 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023120222168079103, + "loss": 0.3269, + "step": 5347 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002311779123826745, + "loss": 0.2422, + "step": 5348 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023115360006903664, + "loss": 0.2011, + "step": 5349 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023112928474078046, + "loss": 0.1085, + "step": 5350 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002311049663988093, + "loss": 0.2305, + "step": 5351 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023108064504402646, + "loss": 0.1898, + "step": 5352 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023105632067733543, + "loss": 0.216, + "step": 5353 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023103199329963976, + "loss": 0.1489, + "step": 5354 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023100766291184318, + "loss": 0.1888, + "step": 5355 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023098332951484947, + "loss": 0.3072, + "step": 5356 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023095899310956257, + "loss": 0.1496, + "step": 5357 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023093465369688647, + "loss": 0.2232, + "step": 5358 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023091031127772533, + "loss": 0.1596, + "step": 5359 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023088596585298343, + "loss": 0.155, + "step": 5360 + }, + { + "epoch": 1.63, + "learning_rate": 0.000230861617423565, + "loss": 0.0705, + "step": 5361 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002308372659903747, + "loss": 0.2638, + "step": 5362 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023081291155431695, + "loss": 0.121, + "step": 5363 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023078855411629655, + "loss": 0.253, + "step": 5364 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002307641936772183, + "loss": 0.1411, + "step": 5365 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023073983023798704, + "loss": 0.2953, + "step": 5366 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002307154637995079, + "loss": 0.1674, + "step": 5367 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023069109436268595, + "loss": 0.2748, + "step": 5368 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002306667219284265, + "loss": 0.213, + "step": 5369 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002306423464976349, + "loss": 0.2681, + "step": 5370 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023061796807121652, + "loss": 0.1329, + "step": 5371 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002305935866500771, + "loss": 0.1266, + "step": 5372 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023056920223512228, + "loss": 0.1553, + "step": 5373 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023054481482725784, + "loss": 0.2031, + "step": 5374 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023052042442738973, + "loss": 0.1137, + "step": 5375 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023049603103642398, + "loss": 0.1303, + "step": 5376 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023047163465526676, + "loss": 0.2593, + "step": 5377 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023044723528482425, + "loss": 0.1185, + "step": 5378 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023042283292600287, + "loss": 0.1413, + "step": 5379 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023039842757970906, + "loss": 0.0738, + "step": 5380 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023037401924684946, + "loss": 0.1574, + "step": 5381 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023034960792833075, + "loss": 0.1416, + "step": 5382 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023032519362505975, + "loss": 0.1774, + "step": 5383 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002303007763379433, + "loss": 0.1759, + "step": 5384 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023027635606788854, + "loss": 0.2708, + "step": 5385 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023025193281580252, + "loss": 0.2315, + "step": 5386 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023022750658259253, + "loss": 0.1745, + "step": 5387 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023020307736916596, + "loss": 0.227, + "step": 5388 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023017864517643026, + "loss": 0.2067, + "step": 5389 + }, + { + "epoch": 1.64, + "learning_rate": 0.000230154210005293, + "loss": 0.1899, + "step": 5390 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002301297718566619, + "loss": 0.1882, + "step": 5391 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023010533073144476, + "loss": 0.215, + "step": 5392 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023008088663054946, + "loss": 0.0865, + "step": 5393 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023005643955488408, + "loss": 0.1702, + "step": 5394 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023003198950535676, + "loss": 0.1815, + "step": 5395 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023000753648287566, + "loss": 0.2258, + "step": 5396 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022998308048834923, + "loss": 0.1958, + "step": 5397 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022995862152268593, + "loss": 0.1585, + "step": 5398 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022993415958679427, + "loss": 0.1778, + "step": 5399 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022990969468158302, + "loss": 0.1255, + "step": 5400 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022988522680796096, + "loss": 0.184, + "step": 5401 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022986075596683698, + "loss": 0.0975, + "step": 5402 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002298362821591201, + "loss": 0.171, + "step": 5403 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022981180538571945, + "loss": 0.2191, + "step": 5404 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022978732564754424, + "loss": 0.1046, + "step": 5405 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022976284294550387, + "loss": 0.1917, + "step": 5406 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022973835728050783, + "loss": 0.2924, + "step": 5407 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022971386865346563, + "loss": 0.1816, + "step": 5408 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022968937706528694, + "loss": 0.2419, + "step": 5409 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022966488251688162, + "loss": 0.1828, + "step": 5410 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002296403850091595, + "loss": 0.0825, + "step": 5411 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022961588454303066, + "loss": 0.1165, + "step": 5412 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022959138111940512, + "loss": 0.0548, + "step": 5413 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002295668747391932, + "loss": 0.1963, + "step": 5414 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022954236540330525, + "loss": 0.1051, + "step": 5415 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022951785311265164, + "loss": 0.1832, + "step": 5416 + }, + { + "epoch": 1.64, + "learning_rate": 0.00022949333786814296, + "loss": 0.1912, + "step": 5417 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022946881967068992, + "loss": 0.2017, + "step": 5418 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022944429852120326, + "loss": 0.1537, + "step": 5419 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022941977442059386, + "loss": 0.1892, + "step": 5420 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022939524736977277, + "loss": 0.1447, + "step": 5421 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022937071736965105, + "loss": 0.2137, + "step": 5422 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022934618442113994, + "loss": 0.1935, + "step": 5423 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022932164852515076, + "loss": 0.1043, + "step": 5424 + }, + { + "epoch": 1.65, + "learning_rate": 0.0002292971096825949, + "loss": 0.1822, + "step": 5425 + }, + { + "epoch": 1.65, + "learning_rate": 0.000229272567894384, + "loss": 0.3027, + "step": 5426 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022924802316142968, + "loss": 0.1693, + "step": 5427 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022922347548464365, + "loss": 0.1392, + "step": 5428 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022919892486493782, + "loss": 0.1333, + "step": 5429 + }, + { + "epoch": 1.65, + "learning_rate": 0.0002291743713032242, + "loss": 0.1488, + "step": 5430 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022914981480041488, + "loss": 0.1532, + "step": 5431 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022912525535742203, + "loss": 0.1652, + "step": 5432 + }, + { + "epoch": 1.65, + "learning_rate": 0.000229100692975158, + "loss": 0.173, + "step": 5433 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022907612765453518, + "loss": 0.0571, + "step": 5434 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022905155939646608, + "loss": 0.1208, + "step": 5435 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022902698820186337, + "loss": 0.039, + "step": 5436 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022900241407163978, + "loss": 0.1962, + "step": 5437 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022897783700670822, + "loss": 0.1832, + "step": 5438 + }, + { + "epoch": 1.65, + "learning_rate": 0.0002289532570079816, + "loss": 0.1943, + "step": 5439 + }, + { + "epoch": 1.65, + "learning_rate": 0.000228928674076373, + "loss": 0.1142, + "step": 5440 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022890408821279565, + "loss": 0.2505, + "step": 5441 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022887949941816278, + "loss": 0.1559, + "step": 5442 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022885490769338783, + "loss": 0.1074, + "step": 5443 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022883031303938428, + "loss": 0.0705, + "step": 5444 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022880571545706577, + "loss": 0.1852, + "step": 5445 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022878111494734602, + "loss": 0.22, + "step": 5446 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022875651151113888, + "loss": 0.2817, + "step": 5447 + }, + { + "epoch": 1.65, + "learning_rate": 0.0002287319051493583, + "loss": 0.1371, + "step": 5448 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022870729586291832, + "loss": 0.1621, + "step": 5449 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022868268365273307, + "loss": 0.17, + "step": 5450 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022865806851971694, + "loss": 0.1525, + "step": 5451 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022863345046478419, + "loss": 0.2471, + "step": 5452 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022860882948884932, + "loss": 0.0706, + "step": 5453 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022858420559282696, + "loss": 0.1443, + "step": 5454 + }, + { + "epoch": 1.66, + "learning_rate": 0.0002285595787776318, + "loss": 0.19, + "step": 5455 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022853494904417868, + "loss": 0.1061, + "step": 5456 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022851031639338251, + "loss": 0.0992, + "step": 5457 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022848568082615831, + "loss": 0.1572, + "step": 5458 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022846104234342123, + "loss": 0.2101, + "step": 5459 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022843640094608652, + "loss": 0.2091, + "step": 5460 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022841175663506946, + "loss": 0.2354, + "step": 5461 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022838710941128568, + "loss": 0.3019, + "step": 5462 + }, + { + "epoch": 1.66, + "learning_rate": 0.0002283624592756506, + "loss": 0.2564, + "step": 5463 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022833780622907995, + "loss": 0.3279, + "step": 5464 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022831315027248956, + "loss": 0.1277, + "step": 5465 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022828849140679527, + "loss": 0.1443, + "step": 5466 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022826382963291307, + "loss": 0.317, + "step": 5467 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022823916495175913, + "loss": 0.193, + "step": 5468 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022821449736424963, + "loss": 0.1554, + "step": 5469 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022818982687130096, + "loss": 0.1505, + "step": 5470 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022816515347382952, + "loss": 0.179, + "step": 5471 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022814047717275177, + "loss": 0.1434, + "step": 5472 + }, + { + "epoch": 1.66, + "learning_rate": 0.0002281157979689845, + "loss": 0.1027, + "step": 5473 + }, + { + "epoch": 1.66, + "learning_rate": 0.0002280911158634444, + "loss": 0.0922, + "step": 5474 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022806643085704833, + "loss": 0.2323, + "step": 5475 + }, + { + "epoch": 1.66, + "learning_rate": 0.0002280417429507133, + "loss": 0.2096, + "step": 5476 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022801705214535632, + "loss": 0.1692, + "step": 5477 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022799235844189465, + "loss": 0.1307, + "step": 5478 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022796766184124563, + "loss": 0.1327, + "step": 5479 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022794296234432656, + "loss": 0.1446, + "step": 5480 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022791825995205505, + "loss": 0.2114, + "step": 5481 + }, + { + "epoch": 1.66, + "learning_rate": 0.00022789355466534865, + "loss": 0.2348, + "step": 5482 + }, + { + "epoch": 1.66, + "learning_rate": 0.0002278688464851251, + "loss": 0.1768, + "step": 5483 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022784413541230233, + "loss": 0.1451, + "step": 5484 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002278194214477981, + "loss": 0.2417, + "step": 5485 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002277947045925306, + "loss": 0.3072, + "step": 5486 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022776998484741798, + "loss": 0.2184, + "step": 5487 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022774526221337846, + "loss": 0.1253, + "step": 5488 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022772053669133045, + "loss": 0.1839, + "step": 5489 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022769580828219243, + "loss": 0.2429, + "step": 5490 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022767107698688297, + "loss": 0.1177, + "step": 5491 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022764634280632073, + "loss": 0.188, + "step": 5492 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022762160574142458, + "loss": 0.2874, + "step": 5493 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002275968657931134, + "loss": 0.2323, + "step": 5494 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022757212296230619, + "loss": 0.2153, + "step": 5495 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022754737724992204, + "loss": 0.3515, + "step": 5496 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022752262865688034, + "loss": 0.1379, + "step": 5497 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022749787718410024, + "loss": 0.2434, + "step": 5498 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022747312283250134, + "loss": 0.2472, + "step": 5499 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022744836560300303, + "loss": 0.1718, + "step": 5500 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002274236054965251, + "loss": 0.2945, + "step": 5501 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022739884251398728, + "loss": 0.1162, + "step": 5502 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002273740766563094, + "loss": 0.1724, + "step": 5503 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022734930792441148, + "loss": 0.2083, + "step": 5504 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002273245363192136, + "loss": 0.1233, + "step": 5505 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002272997618416359, + "loss": 0.0848, + "step": 5506 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002272749844925988, + "loss": 0.1178, + "step": 5507 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022725020427302262, + "loss": 0.2878, + "step": 5508 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022722542118382784, + "loss": 0.2094, + "step": 5509 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022720063522593516, + "loss": 0.1794, + "step": 5510 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022717584640026526, + "loss": 0.1792, + "step": 5511 + }, + { + "epoch": 1.67, + "learning_rate": 0.000227151054707739, + "loss": 0.1802, + "step": 5512 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022712626014927728, + "loss": 0.1388, + "step": 5513 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022710146272580118, + "loss": 0.0617, + "step": 5514 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022707666243823184, + "loss": 0.2702, + "step": 5515 + }, + { + "epoch": 1.67, + "learning_rate": 0.00022705185928749048, + "loss": 0.1959, + "step": 5516 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022702705327449856, + "loss": 0.152, + "step": 5517 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022700224440017747, + "loss": 0.252, + "step": 5518 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022697743266544875, + "loss": 0.1385, + "step": 5519 + }, + { + "epoch": 1.68, + "learning_rate": 0.0002269526180712342, + "loss": 0.1902, + "step": 5520 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022692780061845554, + "loss": 0.2582, + "step": 5521 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022690298030803466, + "loss": 0.2341, + "step": 5522 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022687815714089358, + "loss": 0.2845, + "step": 5523 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022685333111795446, + "loss": 0.162, + "step": 5524 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022682850224013941, + "loss": 0.3112, + "step": 5525 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022680367050837083, + "loss": 0.23, + "step": 5526 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022677883592357108, + "loss": 0.2386, + "step": 5527 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022675399848666268, + "loss": 0.2431, + "step": 5528 + }, + { + "epoch": 1.68, + "learning_rate": 0.0002267291581985684, + "loss": 0.1023, + "step": 5529 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022670431506021087, + "loss": 0.2842, + "step": 5530 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022667946907251299, + "loss": 0.0973, + "step": 5531 + }, + { + "epoch": 1.68, + "learning_rate": 0.0002266546202363977, + "loss": 0.3037, + "step": 5532 + }, + { + "epoch": 1.68, + "learning_rate": 0.000226629768552788, + "loss": 0.098, + "step": 5533 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022660491402260715, + "loss": 0.1653, + "step": 5534 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022658005664677838, + "loss": 0.2562, + "step": 5535 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022655519642622506, + "loss": 0.0628, + "step": 5536 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022653033336187073, + "loss": 0.2001, + "step": 5537 + }, + { + "epoch": 1.68, + "learning_rate": 0.0002265054674546389, + "loss": 0.3274, + "step": 5538 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022648059870545334, + "loss": 0.147, + "step": 5539 + }, + { + "epoch": 1.68, + "learning_rate": 0.0002264557271152378, + "loss": 0.2276, + "step": 5540 + }, + { + "epoch": 1.68, + "learning_rate": 0.0002264308526849162, + "loss": 0.2087, + "step": 5541 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022640597541541253, + "loss": 0.2986, + "step": 5542 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022638109530765093, + "loss": 0.1003, + "step": 5543 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022635621236255567, + "loss": 0.1924, + "step": 5544 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022633132658105105, + "loss": 0.1579, + "step": 5545 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022630643796406143, + "loss": 0.1522, + "step": 5546 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022628154651251143, + "loss": 0.1121, + "step": 5547 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022625665222732565, + "loss": 0.1988, + "step": 5548 + }, + { + "epoch": 1.68, + "learning_rate": 0.0002262317551094289, + "loss": 0.1535, + "step": 5549 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022620685515974598, + "loss": 0.2663, + "step": 5550 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022618195237920184, + "loss": 0.1757, + "step": 5551 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022615704676872157, + "loss": 0.1129, + "step": 5552 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022613213832923038, + "loss": 0.357, + "step": 5553 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022610722706165344, + "loss": 0.2189, + "step": 5554 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022608231296691628, + "loss": 0.0342, + "step": 5555 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022605739604594427, + "loss": 0.1809, + "step": 5556 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022603247629966302, + "loss": 0.224, + "step": 5557 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002260075537289983, + "loss": 0.0821, + "step": 5558 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022598262833487575, + "loss": 0.1556, + "step": 5559 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022595770011822145, + "loss": 0.1681, + "step": 5560 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022593276907996126, + "loss": 0.2349, + "step": 5561 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022590783522102143, + "loss": 0.229, + "step": 5562 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022588289854232814, + "loss": 0.0549, + "step": 5563 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022585795904480764, + "loss": 0.2592, + "step": 5564 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022583301672938645, + "loss": 0.1757, + "step": 5565 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002258080715969911, + "loss": 0.1048, + "step": 5566 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022578312364854812, + "loss": 0.224, + "step": 5567 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022575817288498435, + "loss": 0.2521, + "step": 5568 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022573321930722656, + "loss": 0.0911, + "step": 5569 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022570826291620186, + "loss": 0.1162, + "step": 5570 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022568330371283717, + "loss": 0.1765, + "step": 5571 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022565834169805969, + "loss": 0.1026, + "step": 5572 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022563337687279666, + "loss": 0.2524, + "step": 5573 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022560840923797548, + "loss": 0.1537, + "step": 5574 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022558343879452365, + "loss": 0.1657, + "step": 5575 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022555846554336866, + "loss": 0.1063, + "step": 5576 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022553348948543825, + "loss": 0.2179, + "step": 5577 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022550851062166027, + "loss": 0.1639, + "step": 5578 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022548352895296252, + "loss": 0.1409, + "step": 5579 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022545854448027298, + "loss": 0.1295, + "step": 5580 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022543355720451986, + "loss": 0.1275, + "step": 5581 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022540856712663124, + "loss": 0.1626, + "step": 5582 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022538357424753552, + "loss": 0.2818, + "step": 5583 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022535857856816104, + "loss": 0.1829, + "step": 5584 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022533358008943637, + "loss": 0.2435, + "step": 5585 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022530857881229016, + "loss": 0.2539, + "step": 5586 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022528357473765103, + "loss": 0.2051, + "step": 5587 + }, + { + "epoch": 1.7, + "learning_rate": 0.0002252585678664479, + "loss": 0.1013, + "step": 5588 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022523355819960964, + "loss": 0.1725, + "step": 5589 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022520854573806535, + "loss": 0.0164, + "step": 5590 + }, + { + "epoch": 1.7, + "learning_rate": 0.0002251835304827441, + "loss": 0.1434, + "step": 5591 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022515851243457515, + "loss": 0.1873, + "step": 5592 + }, + { + "epoch": 1.7, + "learning_rate": 0.0002251334915944879, + "loss": 0.1677, + "step": 5593 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022510846796341177, + "loss": 0.1181, + "step": 5594 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022508344154227627, + "loss": 0.2315, + "step": 5595 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022505841233201116, + "loss": 0.2881, + "step": 5596 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022503338033354604, + "loss": 0.2503, + "step": 5597 + }, + { + "epoch": 1.7, + "learning_rate": 0.000225008345547811, + "loss": 0.2352, + "step": 5598 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022498330797573579, + "loss": 0.104, + "step": 5599 + }, + { + "epoch": 1.7, + "learning_rate": 0.0002249582676182506, + "loss": 0.194, + "step": 5600 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022493322447628556, + "loss": 0.2312, + "step": 5601 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022490817855077102, + "loss": 0.0721, + "step": 5602 + }, + { + "epoch": 1.7, + "learning_rate": 0.0002248831298426373, + "loss": 0.3291, + "step": 5603 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022485807835281489, + "loss": 0.1244, + "step": 5604 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022483302408223436, + "loss": 0.1643, + "step": 5605 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022480796703182646, + "loss": 0.1954, + "step": 5606 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022478290720252196, + "loss": 0.1557, + "step": 5607 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022475784459525172, + "loss": 0.144, + "step": 5608 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022473277921094683, + "loss": 0.2848, + "step": 5609 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022470771105053832, + "loss": 0.1626, + "step": 5610 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022468264011495743, + "loss": 0.1027, + "step": 5611 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022465756640513545, + "loss": 0.1864, + "step": 5612 + }, + { + "epoch": 1.7, + "learning_rate": 0.0002246324899220038, + "loss": 0.1997, + "step": 5613 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022460741066649407, + "loss": 0.1676, + "step": 5614 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022458232863953776, + "loss": 0.2615, + "step": 5615 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022455724384206664, + "loss": 0.0773, + "step": 5616 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022453215627501257, + "loss": 0.1755, + "step": 5617 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022450706593930746, + "loss": 0.1659, + "step": 5618 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022448197283588336, + "loss": 0.1982, + "step": 5619 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022445687696567236, + "loss": 0.1512, + "step": 5620 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022443177832960677, + "loss": 0.12, + "step": 5621 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022440667692861884, + "loss": 0.1743, + "step": 5622 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022438157276364107, + "loss": 0.1985, + "step": 5623 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022435646583560597, + "loss": 0.2644, + "step": 5624 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022433135614544623, + "loss": 0.1592, + "step": 5625 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002243062436940946, + "loss": 0.1527, + "step": 5626 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002242811284824839, + "loss": 0.1334, + "step": 5627 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002242560105115471, + "loss": 0.1007, + "step": 5628 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002242308897822173, + "loss": 0.0726, + "step": 5629 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002242057662954276, + "loss": 0.1442, + "step": 5630 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002241806400521113, + "loss": 0.1229, + "step": 5631 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022415551105320174, + "loss": 0.0973, + "step": 5632 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022413037929963238, + "loss": 0.1055, + "step": 5633 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022410524479233684, + "loss": 0.1201, + "step": 5634 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002240801075322488, + "loss": 0.1595, + "step": 5635 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022405496752030192, + "loss": 0.0657, + "step": 5636 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022402982475743023, + "loss": 0.1303, + "step": 5637 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022400467924456763, + "loss": 0.1389, + "step": 5638 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022397953098264825, + "loss": 0.3142, + "step": 5639 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022395437997260617, + "loss": 0.2238, + "step": 5640 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022392922621537577, + "loss": 0.1279, + "step": 5641 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002239040697118914, + "loss": 0.2859, + "step": 5642 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022387891046308755, + "loss": 0.1215, + "step": 5643 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022385374846989887, + "loss": 0.107, + "step": 5644 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022382858373325997, + "loss": 0.2416, + "step": 5645 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022380341625410569, + "loss": 0.1045, + "step": 5646 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022377824603337095, + "loss": 0.0686, + "step": 5647 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002237530730719907, + "loss": 0.334, + "step": 5648 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022372789737090006, + "loss": 0.1753, + "step": 5649 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022370271893103425, + "loss": 0.1634, + "step": 5650 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022367753775332853, + "loss": 0.1206, + "step": 5651 + }, + { + "epoch": 1.72, + "learning_rate": 0.0002236523538387184, + "loss": 0.1903, + "step": 5652 + }, + { + "epoch": 1.72, + "learning_rate": 0.0002236271671881393, + "loss": 0.0468, + "step": 5653 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022360197780252683, + "loss": 0.238, + "step": 5654 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022357678568281667, + "loss": 0.1713, + "step": 5655 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022355159082994476, + "loss": 0.1206, + "step": 5656 + }, + { + "epoch": 1.72, + "learning_rate": 0.0002235263932448469, + "loss": 0.1094, + "step": 5657 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022350119292845912, + "loss": 0.0366, + "step": 5658 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022347598988171763, + "loss": 0.137, + "step": 5659 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022345078410555857, + "loss": 0.1322, + "step": 5660 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022342557560091824, + "loss": 0.2271, + "step": 5661 + }, + { + "epoch": 1.72, + "learning_rate": 0.0002234003643687331, + "loss": 0.2333, + "step": 5662 + }, + { + "epoch": 1.72, + "learning_rate": 0.0002233751504099397, + "loss": 0.0265, + "step": 5663 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022334993372547462, + "loss": 0.0879, + "step": 5664 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022332471431627457, + "loss": 0.1389, + "step": 5665 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022329949218327638, + "loss": 0.1582, + "step": 5666 + }, + { + "epoch": 1.72, + "learning_rate": 0.0002232742673274171, + "loss": 0.2132, + "step": 5667 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022324903974963361, + "loss": 0.1325, + "step": 5668 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022322380945086307, + "loss": 0.1228, + "step": 5669 + }, + { + "epoch": 1.72, + "learning_rate": 0.0002231985764320428, + "loss": 0.0741, + "step": 5670 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022317334069411004, + "loss": 0.1901, + "step": 5671 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022314810223800224, + "loss": 0.2569, + "step": 5672 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022312286106465696, + "loss": 0.2421, + "step": 5673 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022309761717501176, + "loss": 0.2047, + "step": 5674 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022307237057000453, + "loss": 0.1843, + "step": 5675 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022304712125057296, + "loss": 0.1487, + "step": 5676 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022302186921765508, + "loss": 0.2109, + "step": 5677 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022299661447218892, + "loss": 0.1302, + "step": 5678 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022297135701511252, + "loss": 0.1671, + "step": 5679 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022294609684736424, + "loss": 0.123, + "step": 5680 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022292083396988237, + "loss": 0.0747, + "step": 5681 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022289556838360532, + "loss": 0.2025, + "step": 5682 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022287030008947168, + "loss": 0.1124, + "step": 5683 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002228450290884201, + "loss": 0.2547, + "step": 5684 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022281975538138932, + "loss": 0.1366, + "step": 5685 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022279447896931815, + "loss": 0.2608, + "step": 5686 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022276919985314558, + "loss": 0.1872, + "step": 5687 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022274391803381058, + "loss": 0.2987, + "step": 5688 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022271863351225237, + "loss": 0.2127, + "step": 5689 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022269334628941017, + "loss": 0.2432, + "step": 5690 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022266805636622332, + "loss": 0.0165, + "step": 5691 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022264276374363133, + "loss": 0.2366, + "step": 5692 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002226174684225736, + "loss": 0.1724, + "step": 5693 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002225921704039899, + "loss": 0.1177, + "step": 5694 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022256686968881998, + "loss": 0.2357, + "step": 5695 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002225415662780036, + "loss": 0.0829, + "step": 5696 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022251626017248078, + "loss": 0.132, + "step": 5697 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022249095137319156, + "loss": 0.2055, + "step": 5698 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022246563988107606, + "loss": 0.1504, + "step": 5699 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022244032569707458, + "loss": 0.1899, + "step": 5700 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022241500882212739, + "loss": 0.1835, + "step": 5701 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022238968925717496, + "loss": 0.1529, + "step": 5702 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022236436700315789, + "loss": 0.0917, + "step": 5703 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002223390420610168, + "loss": 0.1244, + "step": 5704 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022231371443169243, + "loss": 0.0672, + "step": 5705 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022228838411612558, + "loss": 0.1729, + "step": 5706 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022226305111525726, + "loss": 0.3468, + "step": 5707 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022223771543002853, + "loss": 0.1895, + "step": 5708 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002222123770613805, + "loss": 0.1342, + "step": 5709 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022218703601025442, + "loss": 0.1795, + "step": 5710 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002221616922775916, + "loss": 0.2486, + "step": 5711 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022213634586433357, + "loss": 0.1358, + "step": 5712 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022211099677142183, + "loss": 0.1062, + "step": 5713 + }, + { + "epoch": 1.73, + "learning_rate": 0.000222085644999798, + "loss": 0.1281, + "step": 5714 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022206029055040388, + "loss": 0.1494, + "step": 5715 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022203493342418126, + "loss": 0.1772, + "step": 5716 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022200957362207208, + "loss": 0.0848, + "step": 5717 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022198421114501846, + "loss": 0.1503, + "step": 5718 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022195884599396246, + "loss": 0.1618, + "step": 5719 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022193347816984633, + "loss": 0.161, + "step": 5720 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022190810767361248, + "loss": 0.1217, + "step": 5721 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022188273450620324, + "loss": 0.0522, + "step": 5722 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022185735866856121, + "loss": 0.0955, + "step": 5723 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022183198016162902, + "loss": 0.1529, + "step": 5724 + }, + { + "epoch": 1.74, + "learning_rate": 0.0002218065989863495, + "loss": 0.084, + "step": 5725 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022178121514366533, + "loss": 0.2024, + "step": 5726 + }, + { + "epoch": 1.74, + "learning_rate": 0.0002217558286345195, + "loss": 0.2164, + "step": 5727 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022173043945985512, + "loss": 0.2277, + "step": 5728 + }, + { + "epoch": 1.74, + "learning_rate": 0.0002217050476206152, + "loss": 0.2605, + "step": 5729 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022167965311774304, + "loss": 0.0805, + "step": 5730 + }, + { + "epoch": 1.74, + "learning_rate": 0.000221654255952182, + "loss": 0.2349, + "step": 5731 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022162885612487542, + "loss": 0.1288, + "step": 5732 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022160345363676697, + "loss": 0.1827, + "step": 5733 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022157804848880013, + "loss": 0.1814, + "step": 5734 + }, + { + "epoch": 1.74, + "learning_rate": 0.0002215526406819187, + "loss": 0.2622, + "step": 5735 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022152723021706653, + "loss": 0.1571, + "step": 5736 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022150181709518744, + "loss": 0.089, + "step": 5737 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022147640131722556, + "loss": 0.1592, + "step": 5738 + }, + { + "epoch": 1.74, + "learning_rate": 0.000221450982884125, + "loss": 0.3107, + "step": 5739 + }, + { + "epoch": 1.74, + "learning_rate": 0.0002214255617968299, + "loss": 0.1476, + "step": 5740 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022140013805628468, + "loss": 0.1726, + "step": 5741 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022137471166343368, + "loss": 0.1851, + "step": 5742 + }, + { + "epoch": 1.74, + "learning_rate": 0.0002213492826192215, + "loss": 0.0926, + "step": 5743 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022132385092459266, + "loss": 0.1282, + "step": 5744 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022129841658049187, + "loss": 0.1026, + "step": 5745 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022127297958786406, + "loss": 0.1477, + "step": 5746 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022124753994765402, + "loss": 0.1815, + "step": 5747 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022122209766080683, + "loss": 0.1502, + "step": 5748 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022119665272826754, + "loss": 0.1554, + "step": 5749 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022117120515098142, + "loss": 0.1553, + "step": 5750 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002211457549298937, + "loss": 0.237, + "step": 5751 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022112030206594983, + "loss": 0.1517, + "step": 5752 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022109484656009532, + "loss": 0.1898, + "step": 5753 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022106938841327573, + "loss": 0.1777, + "step": 5754 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022104392762643677, + "loss": 0.2366, + "step": 5755 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002210184642005242, + "loss": 0.1038, + "step": 5756 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022099299813648396, + "loss": 0.1902, + "step": 5757 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022096752943526207, + "loss": 0.1369, + "step": 5758 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022094205809780452, + "loss": 0.2965, + "step": 5759 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022091658412505755, + "loss": 0.1144, + "step": 5760 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002208911075179675, + "loss": 0.1517, + "step": 5761 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022086562827748065, + "loss": 0.1231, + "step": 5762 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002208401464045435, + "loss": 0.1907, + "step": 5763 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022081466190010266, + "loss": 0.1601, + "step": 5764 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002207891747651048, + "loss": 0.218, + "step": 5765 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002207636850004967, + "loss": 0.1275, + "step": 5766 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022073819260722517, + "loss": 0.2764, + "step": 5767 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022071269758623725, + "loss": 0.0712, + "step": 5768 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022068719993848, + "loss": 0.0713, + "step": 5769 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022066169966490054, + "loss": 0.2222, + "step": 5770 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022063619676644615, + "loss": 0.0889, + "step": 5771 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022061069124406418, + "loss": 0.194, + "step": 5772 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002205851830987021, + "loss": 0.0966, + "step": 5773 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002205596723313075, + "loss": 0.2432, + "step": 5774 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022053415894282793, + "loss": 0.0449, + "step": 5775 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022050864293421122, + "loss": 0.1714, + "step": 5776 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002204831243064052, + "loss": 0.2422, + "step": 5777 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022045760306035778, + "loss": 0.1671, + "step": 5778 + }, + { + "epoch": 1.75, + "learning_rate": 0.000220432079197017, + "loss": 0.2976, + "step": 5779 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022040655271733105, + "loss": 0.1397, + "step": 5780 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022038102362224816, + "loss": 0.1271, + "step": 5781 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022035549191271657, + "loss": 0.206, + "step": 5782 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022032995758968483, + "loss": 0.1155, + "step": 5783 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022030442065410142, + "loss": 0.1356, + "step": 5784 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022027888110691488, + "loss": 0.2097, + "step": 5785 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022025333894907406, + "loss": 0.1765, + "step": 5786 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022022779418152767, + "loss": 0.1989, + "step": 5787 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022020224680522466, + "loss": 0.3576, + "step": 5788 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022017669682111406, + "loss": 0.1131, + "step": 5789 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022015114423014494, + "loss": 0.213, + "step": 5790 + }, + { + "epoch": 1.76, + "learning_rate": 0.0002201255890332666, + "loss": 0.1494, + "step": 5791 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022010003123142823, + "loss": 0.1606, + "step": 5792 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022007447082557927, + "loss": 0.1402, + "step": 5793 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022004890781666923, + "loss": 0.2842, + "step": 5794 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022002334220564763, + "loss": 0.2035, + "step": 5795 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021999777399346422, + "loss": 0.3049, + "step": 5796 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021997220318106879, + "loss": 0.2839, + "step": 5797 + }, + { + "epoch": 1.76, + "learning_rate": 0.0002199466297694112, + "loss": 0.2056, + "step": 5798 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021992105375944144, + "loss": 0.2256, + "step": 5799 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021989547515210956, + "loss": 0.2402, + "step": 5800 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021986989394836575, + "loss": 0.3575, + "step": 5801 + }, + { + "epoch": 1.76, + "learning_rate": 0.0002198443101491603, + "loss": 0.0803, + "step": 5802 + }, + { + "epoch": 1.76, + "learning_rate": 0.0002198187237554435, + "loss": 0.1903, + "step": 5803 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021979313476816587, + "loss": 0.0983, + "step": 5804 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021976754318827797, + "loss": 0.134, + "step": 5805 + }, + { + "epoch": 1.76, + "learning_rate": 0.0002197419490167304, + "loss": 0.216, + "step": 5806 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021971635225447397, + "loss": 0.1114, + "step": 5807 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021969075290245949, + "loss": 0.1945, + "step": 5808 + }, + { + "epoch": 1.76, + "learning_rate": 0.0002196651509616379, + "loss": 0.162, + "step": 5809 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021963954643296026, + "loss": 0.2444, + "step": 5810 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021961393931737764, + "loss": 0.2279, + "step": 5811 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021958832961584132, + "loss": 0.1555, + "step": 5812 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021956271732930264, + "loss": 0.1934, + "step": 5813 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021953710245871302, + "loss": 0.2279, + "step": 5814 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021951148500502394, + "loss": 0.2681, + "step": 5815 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021948586496918702, + "loss": 0.2274, + "step": 5816 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021946024235215404, + "loss": 0.0598, + "step": 5817 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002194346171548767, + "loss": 0.0697, + "step": 5818 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021940898937830697, + "loss": 0.1889, + "step": 5819 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002193833590233968, + "loss": 0.2501, + "step": 5820 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002193577260910983, + "loss": 0.3253, + "step": 5821 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002193320905823637, + "loss": 0.2238, + "step": 5822 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002193064524981452, + "loss": 0.2848, + "step": 5823 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021928081183939524, + "loss": 0.1997, + "step": 5824 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021925516860706637, + "loss": 0.0751, + "step": 5825 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021922952280211097, + "loss": 0.2819, + "step": 5826 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002192038744254819, + "loss": 0.1599, + "step": 5827 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021917822347813177, + "loss": 0.0993, + "step": 5828 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002191525699610135, + "loss": 0.125, + "step": 5829 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002191269138750801, + "loss": 0.1192, + "step": 5830 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021910125522128448, + "loss": 0.1709, + "step": 5831 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021907559400057992, + "loss": 0.2382, + "step": 5832 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002190499302139196, + "loss": 0.2527, + "step": 5833 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021902426386225684, + "loss": 0.1751, + "step": 5834 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021899859494654513, + "loss": 0.1952, + "step": 5835 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021897292346773794, + "loss": 0.1866, + "step": 5836 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021894724942678886, + "loss": 0.245, + "step": 5837 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002189215728246517, + "loss": 0.172, + "step": 5838 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002188958936622802, + "loss": 0.1802, + "step": 5839 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002188702119406283, + "loss": 0.1246, + "step": 5840 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021884452766064995, + "loss": 0.048, + "step": 5841 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021881884082329932, + "loss": 0.0424, + "step": 5842 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021879315142953057, + "loss": 0.0916, + "step": 5843 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021876745948029797, + "loss": 0.2559, + "step": 5844 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021874176497655587, + "loss": 0.1281, + "step": 5845 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002187160679192588, + "loss": 0.095, + "step": 5846 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021869036830936134, + "loss": 0.1816, + "step": 5847 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021866466614781815, + "loss": 0.2039, + "step": 5848 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021863896143558393, + "loss": 0.1833, + "step": 5849 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002186132541736136, + "loss": 0.0906, + "step": 5850 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021858754436286206, + "loss": 0.1499, + "step": 5851 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002185618320042844, + "loss": 0.0942, + "step": 5852 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002185361170988357, + "loss": 0.0678, + "step": 5853 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021851039964747125, + "loss": 0.2739, + "step": 5854 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002184846796511464, + "loss": 0.1983, + "step": 5855 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021845895711081645, + "loss": 0.2356, + "step": 5856 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021843323202743706, + "loss": 0.1764, + "step": 5857 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021840750440196374, + "loss": 0.2491, + "step": 5858 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021838177423535228, + "loss": 0.1735, + "step": 5859 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002183560415285584, + "loss": 0.2061, + "step": 5860 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021833030628253805, + "loss": 0.2307, + "step": 5861 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021830456849824715, + "loss": 0.1803, + "step": 5862 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002182788281766419, + "loss": 0.265, + "step": 5863 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021825308531867838, + "loss": 0.2297, + "step": 5864 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002182273399253129, + "loss": 0.0805, + "step": 5865 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021820159199750178, + "loss": 0.1197, + "step": 5866 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021817584153620157, + "loss": 0.2298, + "step": 5867 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021815008854236877, + "loss": 0.1966, + "step": 5868 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021812433301695997, + "loss": 0.2455, + "step": 5869 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021809857496093199, + "loss": 0.0707, + "step": 5870 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021807281437524168, + "loss": 0.1909, + "step": 5871 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002180470512608459, + "loss": 0.1268, + "step": 5872 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021802128561870175, + "loss": 0.1202, + "step": 5873 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002179955174497663, + "loss": 0.1318, + "step": 5874 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002179697467549968, + "loss": 0.172, + "step": 5875 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021794397353535044, + "loss": 0.1574, + "step": 5876 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021791819779178476, + "loss": 0.2678, + "step": 5877 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021789241952525722, + "loss": 0.2206, + "step": 5878 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021786663873672534, + "loss": 0.1529, + "step": 5879 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021784085542714688, + "loss": 0.1129, + "step": 5880 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021781506959747958, + "loss": 0.1651, + "step": 5881 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021778928124868132, + "loss": 0.2344, + "step": 5882 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021776349038171003, + "loss": 0.2285, + "step": 5883 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021773769699752382, + "loss": 0.2856, + "step": 5884 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021771190109708079, + "loss": 0.0709, + "step": 5885 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021768610268133916, + "loss": 0.3945, + "step": 5886 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021766030175125734, + "loss": 0.2398, + "step": 5887 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002176344983077937, + "loss": 0.2659, + "step": 5888 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002176086923519068, + "loss": 0.1727, + "step": 5889 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021758288388455525, + "loss": 0.2139, + "step": 5890 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021755707290669777, + "loss": 0.1572, + "step": 5891 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002175312594192931, + "loss": 0.0946, + "step": 5892 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021750544342330022, + "loss": 0.3522, + "step": 5893 + }, + { + "epoch": 1.79, + "learning_rate": 0.000217479624919678, + "loss": 0.1314, + "step": 5894 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021745380390938561, + "loss": 0.1762, + "step": 5895 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021742798039338227, + "loss": 0.0695, + "step": 5896 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021740215437262715, + "loss": 0.23, + "step": 5897 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002173763258480797, + "loss": 0.1213, + "step": 5898 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021735049482069928, + "loss": 0.2477, + "step": 5899 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021732466129144545, + "loss": 0.1821, + "step": 5900 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021729882526127793, + "loss": 0.2033, + "step": 5901 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002172729867311564, + "loss": 0.149, + "step": 5902 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021724714570204069, + "loss": 0.1258, + "step": 5903 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021722130217489069, + "loss": 0.1416, + "step": 5904 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021719545615066644, + "loss": 0.1533, + "step": 5905 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002171696076303281, + "loss": 0.203, + "step": 5906 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021714375661483572, + "loss": 0.1647, + "step": 5907 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021711790310514976, + "loss": 0.1808, + "step": 5908 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021709204710223048, + "loss": 0.0574, + "step": 5909 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002170661886070384, + "loss": 0.2238, + "step": 5910 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002170403276205341, + "loss": 0.0929, + "step": 5911 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021701446414367818, + "loss": 0.1834, + "step": 5912 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021698859817743151, + "loss": 0.1721, + "step": 5913 + }, + { + "epoch": 1.8, + "learning_rate": 0.0002169627297227548, + "loss": 0.2569, + "step": 5914 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021693685878060906, + "loss": 0.1728, + "step": 5915 + }, + { + "epoch": 1.8, + "learning_rate": 0.0002169109853519553, + "loss": 0.1273, + "step": 5916 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021688510943775467, + "loss": 0.1543, + "step": 5917 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021685923103896836, + "loss": 0.1435, + "step": 5918 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021683335015655766, + "loss": 0.2009, + "step": 5919 + }, + { + "epoch": 1.8, + "learning_rate": 0.000216807466791484, + "loss": 0.2158, + "step": 5920 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021678158094470885, + "loss": 0.1978, + "step": 5921 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021675569261719383, + "loss": 0.1632, + "step": 5922 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021672980180990057, + "loss": 0.2269, + "step": 5923 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021670390852379087, + "loss": 0.2781, + "step": 5924 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021667801275982655, + "loss": 0.1299, + "step": 5925 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021665211451896962, + "loss": 0.0931, + "step": 5926 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021662621380218208, + "loss": 0.1162, + "step": 5927 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021660031061042606, + "loss": 0.0706, + "step": 5928 + }, + { + "epoch": 1.8, + "learning_rate": 0.0002165744049446638, + "loss": 0.3002, + "step": 5929 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021654849680585766, + "loss": 0.1337, + "step": 5930 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021652258619497003, + "loss": 0.2706, + "step": 5931 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021649667311296334, + "loss": 0.1576, + "step": 5932 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021647075756080027, + "loss": 0.1372, + "step": 5933 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021644483953944352, + "loss": 0.1421, + "step": 5934 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021641891904985574, + "loss": 0.2958, + "step": 5935 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021639299609299995, + "loss": 0.2198, + "step": 5936 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021636707066983904, + "loss": 0.236, + "step": 5937 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021634114278133607, + "loss": 0.1391, + "step": 5938 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021631521242845419, + "loss": 0.0204, + "step": 5939 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021628927961215663, + "loss": 0.1734, + "step": 5940 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021626334433340674, + "loss": 0.0816, + "step": 5941 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021623740659316792, + "loss": 0.0423, + "step": 5942 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021621146639240368, + "loss": 0.1337, + "step": 5943 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021618552373207762, + "loss": 0.1474, + "step": 5944 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021615957861315343, + "loss": 0.033, + "step": 5945 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021613363103659497, + "loss": 0.0924, + "step": 5946 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021610768100336597, + "loss": 0.2956, + "step": 5947 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021608172851443052, + "loss": 0.1518, + "step": 5948 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021605577357075267, + "loss": 0.1525, + "step": 5949 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021602981617329652, + "loss": 0.276, + "step": 5950 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021600385632302628, + "loss": 0.155, + "step": 5951 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021597789402090636, + "loss": 0.1636, + "step": 5952 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021595192926790116, + "loss": 0.1164, + "step": 5953 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002159259620649752, + "loss": 0.1425, + "step": 5954 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021589999241309308, + "loss": 0.1369, + "step": 5955 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021587402031321946, + "loss": 0.1217, + "step": 5956 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021584804576631919, + "loss": 0.1003, + "step": 5957 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002158220687733571, + "loss": 0.2664, + "step": 5958 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002157960893352982, + "loss": 0.0205, + "step": 5959 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021577010745310748, + "loss": 0.3149, + "step": 5960 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021574412312775016, + "loss": 0.2539, + "step": 5961 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002157181363601915, + "loss": 0.1594, + "step": 5962 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021569214715139675, + "loss": 0.1941, + "step": 5963 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002156661555023314, + "loss": 0.2426, + "step": 5964 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002156401614139609, + "loss": 0.1461, + "step": 5965 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002156141648872509, + "loss": 0.109, + "step": 5966 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002155881659231671, + "loss": 0.1087, + "step": 5967 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021556216452267525, + "loss": 0.1651, + "step": 5968 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021553616068674132, + "loss": 0.238, + "step": 5969 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021551015441633112, + "loss": 0.1456, + "step": 5970 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002154841457124108, + "loss": 0.0838, + "step": 5971 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021545813457594655, + "loss": 0.2479, + "step": 5972 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002154321210079045, + "loss": 0.1923, + "step": 5973 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002154061050092511, + "loss": 0.1761, + "step": 5974 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021538008658095265, + "loss": 0.2098, + "step": 5975 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021535406572397567, + "loss": 0.1744, + "step": 5976 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021532804243928685, + "loss": 0.1363, + "step": 5977 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021530201672785283, + "loss": 0.2171, + "step": 5978 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021527598859064038, + "loss": 0.1733, + "step": 5979 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021524995802861636, + "loss": 0.2474, + "step": 5980 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021522392504274776, + "loss": 0.1907, + "step": 5981 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021519788963400162, + "loss": 0.2411, + "step": 5982 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021517185180334504, + "loss": 0.107, + "step": 5983 + }, + { + "epoch": 1.82, + "learning_rate": 0.0002151458115517453, + "loss": 0.0581, + "step": 5984 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021511976888016965, + "loss": 0.1249, + "step": 5985 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021509372378958563, + "loss": 0.3107, + "step": 5986 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021506767628096061, + "loss": 0.1429, + "step": 5987 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021504162635526224, + "loss": 0.1627, + "step": 5988 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021501557401345817, + "loss": 0.2253, + "step": 5989 + }, + { + "epoch": 1.82, + "learning_rate": 0.0002149895192565162, + "loss": 0.1255, + "step": 5990 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021496346208540418, + "loss": 0.2594, + "step": 5991 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021493740250109, + "loss": 0.1432, + "step": 5992 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021491134050454177, + "loss": 0.1458, + "step": 5993 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021488527609672757, + "loss": 0.2767, + "step": 5994 + }, + { + "epoch": 1.82, + "learning_rate": 0.0002148592092786157, + "loss": 0.1924, + "step": 5995 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021483314005117435, + "loss": 0.2129, + "step": 5996 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021480706841537202, + "loss": 0.1935, + "step": 5997 + }, + { + "epoch": 1.82, + "learning_rate": 0.0002147809943721771, + "loss": 0.1043, + "step": 5998 + }, + { + "epoch": 1.82, + "learning_rate": 0.0002147549179225582, + "loss": 0.1529, + "step": 5999 + }, + { + "epoch": 1.82, + "learning_rate": 0.000214728839067484, + "loss": 0.0977, + "step": 6000 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021470275780792323, + "loss": 0.1541, + "step": 6001 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021467667414484478, + "loss": 0.2207, + "step": 6002 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021465058807921755, + "loss": 0.0448, + "step": 6003 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021462449961201052, + "loss": 0.2815, + "step": 6004 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021459840874419288, + "loss": 0.0961, + "step": 6005 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021457231547673377, + "loss": 0.2052, + "step": 6006 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021454621981060253, + "loss": 0.06, + "step": 6007 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021452012174676843, + "loss": 0.0502, + "step": 6008 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021449402128620104, + "loss": 0.1881, + "step": 6009 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021446791842986995, + "loss": 0.1512, + "step": 6010 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021444181317874467, + "loss": 0.2765, + "step": 6011 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021441570553379501, + "loss": 0.147, + "step": 6012 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021438959549599084, + "loss": 0.2724, + "step": 6013 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021436348306630196, + "loss": 0.1305, + "step": 6014 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021433736824569847, + "loss": 0.2101, + "step": 6015 + }, + { + "epoch": 1.83, + "learning_rate": 0.0002143112510351504, + "loss": 0.2675, + "step": 6016 + }, + { + "epoch": 1.83, + "learning_rate": 0.0002142851314356279, + "loss": 0.1694, + "step": 6017 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021425900944810133, + "loss": 0.19, + "step": 6018 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021423288507354098, + "loss": 0.2058, + "step": 6019 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021420675831291733, + "loss": 0.0807, + "step": 6020 + }, + { + "epoch": 1.83, + "learning_rate": 0.0002141806291672009, + "loss": 0.1672, + "step": 6021 + }, + { + "epoch": 1.83, + "learning_rate": 0.0002141544976373623, + "loss": 0.0903, + "step": 6022 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021412836372437224, + "loss": 0.0357, + "step": 6023 + }, + { + "epoch": 1.83, + "learning_rate": 0.0002141022274292015, + "loss": 0.2242, + "step": 6024 + }, + { + "epoch": 1.83, + "learning_rate": 0.000214076088752821, + "loss": 0.1707, + "step": 6025 + }, + { + "epoch": 1.83, + "learning_rate": 0.0002140499476962017, + "loss": 0.1977, + "step": 6026 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021402380426031468, + "loss": 0.1014, + "step": 6027 + }, + { + "epoch": 1.83, + "learning_rate": 0.0002139976584461311, + "loss": 0.1686, + "step": 6028 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021397151025462217, + "loss": 0.1221, + "step": 6029 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021394535968675922, + "loss": 0.2597, + "step": 6030 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021391920674351366, + "loss": 0.1946, + "step": 6031 + }, + { + "epoch": 1.83, + "learning_rate": 0.000213893051425857, + "loss": 0.2997, + "step": 6032 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021386689373476087, + "loss": 0.2794, + "step": 6033 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021384073367119693, + "loss": 0.3307, + "step": 6034 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021381457123613697, + "loss": 0.1535, + "step": 6035 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021378840643055278, + "loss": 0.2044, + "step": 6036 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021376223925541633, + "loss": 0.1662, + "step": 6037 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021373606971169975, + "loss": 0.1494, + "step": 6038 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021370989780037503, + "loss": 0.1344, + "step": 6039 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021368372352241442, + "loss": 0.1454, + "step": 6040 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021365754687879026, + "loss": 0.1026, + "step": 6041 + }, + { + "epoch": 1.83, + "learning_rate": 0.0002136313678704749, + "loss": 0.1371, + "step": 6042 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021360518649844083, + "loss": 0.1164, + "step": 6043 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002135790027636606, + "loss": 0.1423, + "step": 6044 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021355281666710687, + "loss": 0.0586, + "step": 6045 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021352662820975238, + "loss": 0.1342, + "step": 6046 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021350043739256988, + "loss": 0.2121, + "step": 6047 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002134742442165324, + "loss": 0.1582, + "step": 6048 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021344804868261285, + "loss": 0.2792, + "step": 6049 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021342185079178434, + "loss": 0.2567, + "step": 6050 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002133956505450201, + "loss": 0.1367, + "step": 6051 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021336944794329333, + "loss": 0.2173, + "step": 6052 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021334324298757744, + "loss": 0.1423, + "step": 6053 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002133170356788458, + "loss": 0.0589, + "step": 6054 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002132908260180719, + "loss": 0.2106, + "step": 6055 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002132646140062295, + "loss": 0.1608, + "step": 6056 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021323839964429217, + "loss": 0.1214, + "step": 6057 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021321218293323372, + "loss": 0.0529, + "step": 6058 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021318596387402805, + "loss": 0.3355, + "step": 6059 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002131597424676491, + "loss": 0.1565, + "step": 6060 + }, + { + "epoch": 1.84, + "learning_rate": 0.000213133518715071, + "loss": 0.2941, + "step": 6061 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021310729261726774, + "loss": 0.1134, + "step": 6062 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002130810641752137, + "loss": 0.1839, + "step": 6063 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021305483338988307, + "loss": 0.1002, + "step": 6064 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021302860026225026, + "loss": 0.197, + "step": 6065 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002130023647932898, + "loss": 0.2482, + "step": 6066 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002129761269839762, + "loss": 0.1369, + "step": 6067 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021294988683528426, + "loss": 0.163, + "step": 6068 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021292364434818853, + "loss": 0.1853, + "step": 6069 + }, + { + "epoch": 1.84, + "learning_rate": 0.000212897399523664, + "loss": 0.0886, + "step": 6070 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021287115236268546, + "loss": 0.1528, + "step": 6071 + }, + { + "epoch": 1.84, + "learning_rate": 0.000212844902866228, + "loss": 0.2274, + "step": 6072 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021281865103526672, + "loss": 0.1732, + "step": 6073 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021279239687077673, + "loss": 0.2736, + "step": 6074 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021276614037373334, + "loss": 0.2093, + "step": 6075 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002127398815451119, + "loss": 0.1365, + "step": 6076 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021271362038588785, + "loss": 0.2324, + "step": 6077 + }, + { + "epoch": 1.85, + "learning_rate": 0.0002126873568970367, + "loss": 0.2495, + "step": 6078 + }, + { + "epoch": 1.85, + "learning_rate": 0.0002126610910795341, + "loss": 0.2085, + "step": 6079 + }, + { + "epoch": 1.85, + "learning_rate": 0.0002126348229343557, + "loss": 0.2818, + "step": 6080 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021260855246247728, + "loss": 0.1716, + "step": 6081 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021258227966487476, + "loss": 0.1638, + "step": 6082 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021255600454252402, + "loss": 0.1626, + "step": 6083 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021252972709640125, + "loss": 0.1961, + "step": 6084 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021250344732748244, + "loss": 0.2072, + "step": 6085 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021247716523674384, + "loss": 0.103, + "step": 6086 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021245088082516176, + "loss": 0.1798, + "step": 6087 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021242459409371263, + "loss": 0.0549, + "step": 6088 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021239830504337284, + "loss": 0.0798, + "step": 6089 + }, + { + "epoch": 1.85, + "learning_rate": 0.000212372013675119, + "loss": 0.2627, + "step": 6090 + }, + { + "epoch": 1.85, + "learning_rate": 0.0002123457199899278, + "loss": 0.2326, + "step": 6091 + }, + { + "epoch": 1.85, + "learning_rate": 0.0002123194239887759, + "loss": 0.1292, + "step": 6092 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021229312567264014, + "loss": 0.199, + "step": 6093 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021226682504249747, + "loss": 0.1373, + "step": 6094 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021224052209932483, + "loss": 0.3611, + "step": 6095 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021221421684409933, + "loss": 0.2756, + "step": 6096 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021218790927779807, + "loss": 0.1997, + "step": 6097 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021216159940139838, + "loss": 0.2479, + "step": 6098 + }, + { + "epoch": 1.85, + "learning_rate": 0.0002121352872158775, + "loss": 0.0597, + "step": 6099 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021210897272221297, + "loss": 0.1437, + "step": 6100 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021208265592138225, + "loss": 0.1264, + "step": 6101 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021205633681436288, + "loss": 0.2515, + "step": 6102 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021203001540213257, + "loss": 0.1938, + "step": 6103 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021200369168566913, + "loss": 0.1109, + "step": 6104 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021197736566595034, + "loss": 0.2323, + "step": 6105 + }, + { + "epoch": 1.85, + "learning_rate": 0.0002119510373439542, + "loss": 0.3681, + "step": 6106 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021192470672065864, + "loss": 0.1943, + "step": 6107 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021189837379704182, + "loss": 0.1584, + "step": 6108 + }, + { + "epoch": 1.85, + "learning_rate": 0.00021187203857408196, + "loss": 0.1612, + "step": 6109 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002118457010527573, + "loss": 0.1071, + "step": 6110 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002118193612340462, + "loss": 0.213, + "step": 6111 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021179301911892714, + "loss": 0.2013, + "step": 6112 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021176667470837858, + "loss": 0.2826, + "step": 6113 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002117403280033792, + "loss": 0.0983, + "step": 6114 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021171397900490767, + "loss": 0.021, + "step": 6115 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021168762771394282, + "loss": 0.1838, + "step": 6116 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021166127413146352, + "loss": 0.1424, + "step": 6117 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002116349182584487, + "loss": 0.2242, + "step": 6118 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021160856009587738, + "loss": 0.1474, + "step": 6119 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021158219964472874, + "loss": 0.1875, + "step": 6120 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021155583690598195, + "loss": 0.1616, + "step": 6121 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002115294718806163, + "loss": 0.2491, + "step": 6122 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021150310456961125, + "loss": 0.137, + "step": 6123 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021147673497394625, + "loss": 0.1071, + "step": 6124 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002114503630946008, + "loss": 0.2151, + "step": 6125 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021142398893255455, + "loss": 0.1334, + "step": 6126 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021139761248878722, + "loss": 0.2087, + "step": 6127 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021137123376427865, + "loss": 0.101, + "step": 6128 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021134485276000877, + "loss": 0.1364, + "step": 6129 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021131846947695742, + "loss": 0.0325, + "step": 6130 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021129208391610477, + "loss": 0.1458, + "step": 6131 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021126569607843093, + "loss": 0.1545, + "step": 6132 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021123930596491616, + "loss": 0.157, + "step": 6133 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021121291357654074, + "loss": 0.2621, + "step": 6134 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021118651891428508, + "loss": 0.13, + "step": 6135 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002111601219791297, + "loss": 0.2526, + "step": 6136 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021113372277205514, + "loss": 0.1242, + "step": 6137 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021110732129404203, + "loss": 0.149, + "step": 6138 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021108091754607115, + "loss": 0.2867, + "step": 6139 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021105451152912327, + "loss": 0.2436, + "step": 6140 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002110281032441793, + "loss": 0.1719, + "step": 6141 + }, + { + "epoch": 1.86, + "learning_rate": 0.00021100169269222034, + "loss": 0.1219, + "step": 6142 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021097527987422736, + "loss": 0.0953, + "step": 6143 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002109488647911815, + "loss": 0.239, + "step": 6144 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002109224474440641, + "loss": 0.1955, + "step": 6145 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002108960278338564, + "loss": 0.182, + "step": 6146 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021086960596153985, + "loss": 0.3709, + "step": 6147 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002108431818280959, + "loss": 0.2108, + "step": 6148 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002108167554345062, + "loss": 0.175, + "step": 6149 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002107903267817524, + "loss": 0.1693, + "step": 6150 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002107638958708162, + "loss": 0.0912, + "step": 6151 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021073746270267946, + "loss": 0.1952, + "step": 6152 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021071102727832409, + "loss": 0.3249, + "step": 6153 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021068458959873208, + "loss": 0.0757, + "step": 6154 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021065814966488553, + "loss": 0.1489, + "step": 6155 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002106317074777666, + "loss": 0.3395, + "step": 6156 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021060526303835755, + "loss": 0.0592, + "step": 6157 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021057881634764072, + "loss": 0.2206, + "step": 6158 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021055236740659847, + "loss": 0.1762, + "step": 6159 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021052591621621337, + "loss": 0.1358, + "step": 6160 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002104994627774679, + "loss": 0.2956, + "step": 6161 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002104730070913449, + "loss": 0.2088, + "step": 6162 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021044654915882698, + "loss": 0.0708, + "step": 6163 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021042008898089699, + "loss": 0.1702, + "step": 6164 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021039362655853793, + "loss": 0.1866, + "step": 6165 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021036716189273269, + "loss": 0.1375, + "step": 6166 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021034069498446445, + "loss": 0.1591, + "step": 6167 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021031422583471632, + "loss": 0.2729, + "step": 6168 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021028775444447158, + "loss": 0.1461, + "step": 6169 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021026128081471355, + "loss": 0.0597, + "step": 6170 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021023480494642566, + "loss": 0.2576, + "step": 6171 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021020832684059138, + "loss": 0.0825, + "step": 6172 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021018184649819433, + "loss": 0.1487, + "step": 6173 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021015536392021813, + "loss": 0.0573, + "step": 6174 + }, + { + "epoch": 1.87, + "learning_rate": 0.00021012887910764664, + "loss": 0.1608, + "step": 6175 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002101023920614636, + "loss": 0.2233, + "step": 6176 + }, + { + "epoch": 1.88, + "learning_rate": 0.00021007590278265288, + "loss": 0.1721, + "step": 6177 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002100494112721986, + "loss": 0.2253, + "step": 6178 + }, + { + "epoch": 1.88, + "learning_rate": 0.00021002291753108478, + "loss": 0.2315, + "step": 6179 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020999642156029557, + "loss": 0.2823, + "step": 6180 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020996992336081525, + "loss": 0.1931, + "step": 6181 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020994342293362812, + "loss": 0.0637, + "step": 6182 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020991692027971866, + "loss": 0.2596, + "step": 6183 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020989041540007126, + "loss": 0.1239, + "step": 6184 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002098639082956706, + "loss": 0.1099, + "step": 6185 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020983739896750128, + "loss": 0.1458, + "step": 6186 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020981088741654804, + "loss": 0.1151, + "step": 6187 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020978437364379572, + "loss": 0.2123, + "step": 6188 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020975785765022927, + "loss": 0.2184, + "step": 6189 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020973133943683365, + "loss": 0.2489, + "step": 6190 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020970481900459392, + "loss": 0.1779, + "step": 6191 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002096782963544952, + "loss": 0.1573, + "step": 6192 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020965177148752283, + "loss": 0.0953, + "step": 6193 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020962524440466208, + "loss": 0.2041, + "step": 6194 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020959871510689836, + "loss": 0.0964, + "step": 6195 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020957218359521706, + "loss": 0.1023, + "step": 6196 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002095456498706039, + "loss": 0.1659, + "step": 6197 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002095191139340445, + "loss": 0.1663, + "step": 6198 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020949257578652448, + "loss": 0.204, + "step": 6199 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020946603542902975, + "loss": 0.1106, + "step": 6200 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002094394928625462, + "loss": 0.1515, + "step": 6201 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020941294808805977, + "loss": 0.1559, + "step": 6202 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020938640110655662, + "loss": 0.135, + "step": 6203 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020935985191902277, + "loss": 0.1637, + "step": 6204 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020933330052644444, + "loss": 0.1538, + "step": 6205 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020930674692980805, + "loss": 0.2515, + "step": 6206 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002092801911300999, + "loss": 0.1521, + "step": 6207 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020925363312830652, + "loss": 0.2207, + "step": 6208 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002092270729254144, + "loss": 0.1982, + "step": 6209 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002092005105224102, + "loss": 0.1345, + "step": 6210 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002091739459202807, + "loss": 0.2749, + "step": 6211 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020914737912001256, + "loss": 0.2979, + "step": 6212 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020912081012259274, + "loss": 0.1726, + "step": 6213 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002090942389290082, + "loss": 0.0907, + "step": 6214 + }, + { + "epoch": 1.89, + "learning_rate": 0.000209067665540246, + "loss": 0.2007, + "step": 6215 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020904108995729325, + "loss": 0.326, + "step": 6216 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020901451218113714, + "loss": 0.0474, + "step": 6217 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020898793221276493, + "loss": 0.1851, + "step": 6218 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020896135005316405, + "loss": 0.0827, + "step": 6219 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002089347657033219, + "loss": 0.1809, + "step": 6220 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020890817916422604, + "loss": 0.263, + "step": 6221 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020888159043686404, + "loss": 0.2434, + "step": 6222 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020885499952222363, + "loss": 0.1808, + "step": 6223 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020882840642129263, + "loss": 0.2758, + "step": 6224 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002088018111350588, + "loss": 0.1901, + "step": 6225 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020877521366451015, + "loss": 0.265, + "step": 6226 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020874861401063463, + "loss": 0.1941, + "step": 6227 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002087220121744204, + "loss": 0.2296, + "step": 6228 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002086954081568556, + "loss": 0.0618, + "step": 6229 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020866880195892853, + "loss": 0.1813, + "step": 6230 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020864219358162752, + "loss": 0.1206, + "step": 6231 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020861558302594097, + "loss": 0.1457, + "step": 6232 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002085889702928574, + "loss": 0.3555, + "step": 6233 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020856235538336542, + "loss": 0.164, + "step": 6234 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020853573829845363, + "loss": 0.2991, + "step": 6235 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020850911903911085, + "loss": 0.2398, + "step": 6236 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020848249760632581, + "loss": 0.1186, + "step": 6237 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020845587400108754, + "loss": 0.1289, + "step": 6238 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020842924822438497, + "loss": 0.1985, + "step": 6239 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020840262027720718, + "loss": 0.1298, + "step": 6240 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020837599016054333, + "loss": 0.2804, + "step": 6241 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020834935787538257, + "loss": 0.0358, + "step": 6242 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002083227234227143, + "loss": 0.0555, + "step": 6243 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002082960868035279, + "loss": 0.2138, + "step": 6244 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002082694480188128, + "loss": 0.0289, + "step": 6245 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002082428070695586, + "loss": 0.1741, + "step": 6246 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002082161639567549, + "loss": 0.1409, + "step": 6247 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002081895186813914, + "loss": 0.1909, + "step": 6248 + }, + { + "epoch": 1.9, + "learning_rate": 0.000208162871244458, + "loss": 0.1521, + "step": 6249 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002081362216469444, + "loss": 0.3363, + "step": 6250 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020810956988984074, + "loss": 0.1819, + "step": 6251 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020808291597413693, + "loss": 0.143, + "step": 6252 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002080562599008231, + "loss": 0.2494, + "step": 6253 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020802960167088944, + "loss": 0.1312, + "step": 6254 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002080029412853263, + "loss": 0.2637, + "step": 6255 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020797627874512395, + "loss": 0.2539, + "step": 6256 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020794961405127288, + "loss": 0.1397, + "step": 6257 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020792294720476358, + "loss": 0.2001, + "step": 6258 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020789627820658665, + "loss": 0.1652, + "step": 6259 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020786960705773273, + "loss": 0.14, + "step": 6260 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020784293375919265, + "loss": 0.0995, + "step": 6261 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020781625831195718, + "loss": 0.1578, + "step": 6262 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020778958071701725, + "loss": 0.0873, + "step": 6263 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002077629009753639, + "loss": 0.2898, + "step": 6264 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020773621908798815, + "loss": 0.0643, + "step": 6265 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020770953505588115, + "loss": 0.1797, + "step": 6266 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002076828488800342, + "loss": 0.2182, + "step": 6267 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020765616056143854, + "loss": 0.0965, + "step": 6268 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020762947010108558, + "loss": 0.0791, + "step": 6269 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002076027774999668, + "loss": 0.2375, + "step": 6270 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020757608275907377, + "loss": 0.1108, + "step": 6271 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020754938587939814, + "loss": 0.2706, + "step": 6272 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020752268686193153, + "loss": 0.3907, + "step": 6273 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020749598570766583, + "loss": 0.1026, + "step": 6274 + }, + { + "epoch": 1.91, + "learning_rate": 0.0002074692824175928, + "loss": 0.2336, + "step": 6275 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020744257699270455, + "loss": 0.2044, + "step": 6276 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020741586943399295, + "loss": 0.208, + "step": 6277 + }, + { + "epoch": 1.91, + "learning_rate": 0.0002073891597424502, + "loss": 0.1903, + "step": 6278 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020736244791906845, + "loss": 0.1278, + "step": 6279 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020733573396483992, + "loss": 0.1806, + "step": 6280 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020730901788075707, + "loss": 0.1493, + "step": 6281 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020728229966781228, + "loss": 0.0639, + "step": 6282 + }, + { + "epoch": 1.91, + "learning_rate": 0.000207255579326998, + "loss": 0.2199, + "step": 6283 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020722885685930687, + "loss": 0.1038, + "step": 6284 + }, + { + "epoch": 1.91, + "learning_rate": 0.0002072021322657315, + "loss": 0.1604, + "step": 6285 + }, + { + "epoch": 1.91, + "learning_rate": 0.0002071754055472647, + "loss": 0.0421, + "step": 6286 + }, + { + "epoch": 1.91, + "learning_rate": 0.0002071486767048992, + "loss": 0.346, + "step": 6287 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020712194573962794, + "loss": 0.0872, + "step": 6288 + }, + { + "epoch": 1.91, + "learning_rate": 0.000207095212652444, + "loss": 0.1005, + "step": 6289 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020706847744434025, + "loss": 0.1069, + "step": 6290 + }, + { + "epoch": 1.91, + "learning_rate": 0.0002070417401163099, + "loss": 0.3206, + "step": 6291 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020701500066934623, + "loss": 0.1434, + "step": 6292 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020698825910444248, + "loss": 0.2063, + "step": 6293 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020696151542259203, + "loss": 0.1381, + "step": 6294 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020693476962478828, + "loss": 0.1178, + "step": 6295 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020690802171202482, + "loss": 0.1075, + "step": 6296 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020688127168529526, + "loss": 0.0607, + "step": 6297 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020685451954559323, + "loss": 0.1756, + "step": 6298 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020682776529391247, + "loss": 0.1721, + "step": 6299 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020680100893124696, + "loss": 0.2373, + "step": 6300 + }, + { + "epoch": 1.91, + "learning_rate": 0.0002067742504585905, + "loss": 0.1677, + "step": 6301 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020674748987693713, + "loss": 0.146, + "step": 6302 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020672072718728093, + "loss": 0.1463, + "step": 6303 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020669396239061602, + "loss": 0.1592, + "step": 6304 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020666719548793672, + "loss": 0.1352, + "step": 6305 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020664042648023723, + "loss": 0.1908, + "step": 6306 + }, + { + "epoch": 1.91, + "learning_rate": 0.00020661365536851203, + "loss": 0.1919, + "step": 6307 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020658688215375554, + "loss": 0.3064, + "step": 6308 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002065601068369623, + "loss": 0.1196, + "step": 6309 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020653332941912694, + "loss": 0.194, + "step": 6310 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020650654990124416, + "loss": 0.1785, + "step": 6311 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002064797682843088, + "loss": 0.1109, + "step": 6312 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020645298456931568, + "loss": 0.1699, + "step": 6313 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002064261987572597, + "loss": 0.1199, + "step": 6314 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020639941084913595, + "loss": 0.1841, + "step": 6315 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020637262084593943, + "loss": 0.2663, + "step": 6316 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002063458287486654, + "loss": 0.1202, + "step": 6317 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020631903455830902, + "loss": 0.1357, + "step": 6318 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020629223827586563, + "loss": 0.1878, + "step": 6319 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020626543990233075, + "loss": 0.0973, + "step": 6320 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020623863943869972, + "loss": 0.2797, + "step": 6321 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002062118368859682, + "loss": 0.1802, + "step": 6322 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020618503224513173, + "loss": 0.2515, + "step": 6323 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002061582255171861, + "loss": 0.2363, + "step": 6324 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020613141670312708, + "loss": 0.0688, + "step": 6325 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002061046058039505, + "loss": 0.1924, + "step": 6326 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020607779282065238, + "loss": 0.2286, + "step": 6327 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020605097775422865, + "loss": 0.1611, + "step": 6328 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002060241606056755, + "loss": 0.2212, + "step": 6329 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020599734137598905, + "loss": 0.0971, + "step": 6330 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020597052006616562, + "loss": 0.1745, + "step": 6331 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020594369667720146, + "loss": 0.1728, + "step": 6332 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020591687121009308, + "loss": 0.0989, + "step": 6333 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020589004366583687, + "loss": 0.1762, + "step": 6334 + }, + { + "epoch": 1.92, + "learning_rate": 0.00020586321404542942, + "loss": 0.2358, + "step": 6335 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002058363823498674, + "loss": 0.1469, + "step": 6336 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002058095485801475, + "loss": 0.1146, + "step": 6337 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002057827127372666, + "loss": 0.1343, + "step": 6338 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002057558748222214, + "loss": 0.1324, + "step": 6339 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020572903483600903, + "loss": 0.1139, + "step": 6340 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020570219277962643, + "loss": 0.0714, + "step": 6341 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020567534865407065, + "loss": 0.159, + "step": 6342 + }, + { + "epoch": 1.93, + "learning_rate": 0.000205648502460339, + "loss": 0.2187, + "step": 6343 + }, + { + "epoch": 1.93, + "learning_rate": 0.0002056216541994286, + "loss": 0.1399, + "step": 6344 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020559480387233697, + "loss": 0.1115, + "step": 6345 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020556795148006133, + "loss": 0.1228, + "step": 6346 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020554109702359925, + "loss": 0.119, + "step": 6347 + }, + { + "epoch": 1.93, + "learning_rate": 0.0002055142405039483, + "loss": 0.1526, + "step": 6348 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020548738192210613, + "loss": 0.1323, + "step": 6349 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020546052127907045, + "loss": 0.2053, + "step": 6350 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020543365857583895, + "loss": 0.164, + "step": 6351 + }, + { + "epoch": 1.93, + "learning_rate": 0.0002054067938134096, + "loss": 0.1912, + "step": 6352 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020537992699278044, + "loss": 0.2285, + "step": 6353 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020535305811494932, + "loss": 0.1595, + "step": 6354 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020532618718091446, + "loss": 0.094, + "step": 6355 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020529931419167397, + "loss": 0.073, + "step": 6356 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020527243914822608, + "loss": 0.11, + "step": 6357 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020524556205156922, + "loss": 0.0791, + "step": 6358 + }, + { + "epoch": 1.93, + "learning_rate": 0.0002052186829027017, + "loss": 0.2017, + "step": 6359 + }, + { + "epoch": 1.93, + "learning_rate": 0.000205191801702622, + "loss": 0.1569, + "step": 6360 + }, + { + "epoch": 1.93, + "learning_rate": 0.0002051649184523288, + "loss": 0.0704, + "step": 6361 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020513803315282058, + "loss": 0.2288, + "step": 6362 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020511114580509617, + "loss": 0.2907, + "step": 6363 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020508425641015428, + "loss": 0.0987, + "step": 6364 + }, + { + "epoch": 1.93, + "learning_rate": 0.0002050573649689938, + "loss": 0.1314, + "step": 6365 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020503047148261367, + "loss": 0.2056, + "step": 6366 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020500357595201288, + "loss": 0.1796, + "step": 6367 + }, + { + "epoch": 1.93, + "learning_rate": 0.0002049766783781906, + "loss": 0.2586, + "step": 6368 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020494977876214587, + "loss": 0.158, + "step": 6369 + }, + { + "epoch": 1.93, + "learning_rate": 0.000204922877104878, + "loss": 0.1615, + "step": 6370 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020489597340738635, + "loss": 0.142, + "step": 6371 + }, + { + "epoch": 1.93, + "learning_rate": 0.00020486906767067025, + "loss": 0.2214, + "step": 6372 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002048421598957292, + "loss": 0.1867, + "step": 6373 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002048152500835627, + "loss": 0.1655, + "step": 6374 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002047883382351704, + "loss": 0.2072, + "step": 6375 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020476142435155205, + "loss": 0.1428, + "step": 6376 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002047345084337073, + "loss": 0.0608, + "step": 6377 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020470759048263617, + "loss": 0.134, + "step": 6378 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002046806704993384, + "loss": 0.175, + "step": 6379 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002046537484848141, + "loss": 0.2568, + "step": 6380 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020462682444006333, + "loss": 0.2578, + "step": 6381 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002045998983660862, + "loss": 0.256, + "step": 6382 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020457297026388294, + "loss": 0.0479, + "step": 6383 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002045460401344539, + "loss": 0.1714, + "step": 6384 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002045191079787994, + "loss": 0.1311, + "step": 6385 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020449217379791995, + "loss": 0.0959, + "step": 6386 + }, + { + "epoch": 1.94, + "learning_rate": 0.000204465237592816, + "loss": 0.1153, + "step": 6387 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002044382993644882, + "loss": 0.1159, + "step": 6388 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020441135911393725, + "loss": 0.1247, + "step": 6389 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020438441684216387, + "loss": 0.1143, + "step": 6390 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020435747255016887, + "loss": 0.3133, + "step": 6391 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020433052623895312, + "loss": 0.172, + "step": 6392 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020430357790951766, + "loss": 0.1445, + "step": 6393 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020427662756286358, + "loss": 0.1918, + "step": 6394 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002042496751999919, + "loss": 0.1523, + "step": 6395 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020422272082190393, + "loss": 0.1374, + "step": 6396 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020419576442960082, + "loss": 0.1737, + "step": 6397 + }, + { + "epoch": 1.94, + "learning_rate": 0.000204168806024084, + "loss": 0.2275, + "step": 6398 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020414184560635495, + "loss": 0.1795, + "step": 6399 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020411488317741505, + "loss": 0.3322, + "step": 6400 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020408791873826592, + "loss": 0.0546, + "step": 6401 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020406095228990925, + "loss": 0.1813, + "step": 6402 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020403398383334672, + "loss": 0.1609, + "step": 6403 + }, + { + "epoch": 1.94, + "learning_rate": 0.00020400701336958016, + "loss": 0.294, + "step": 6404 + }, + { + "epoch": 1.94, + "learning_rate": 0.0002039800408996114, + "loss": 0.1624, + "step": 6405 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020395306642444246, + "loss": 0.1171, + "step": 6406 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020392608994507526, + "loss": 0.1927, + "step": 6407 + }, + { + "epoch": 1.95, + "learning_rate": 0.000203899111462512, + "loss": 0.1164, + "step": 6408 + }, + { + "epoch": 1.95, + "learning_rate": 0.0002038721309777548, + "loss": 0.0979, + "step": 6409 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020384514849180587, + "loss": 0.1698, + "step": 6410 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020381816400566764, + "loss": 0.1349, + "step": 6411 + }, + { + "epoch": 1.95, + "learning_rate": 0.0002037911775203424, + "loss": 0.0747, + "step": 6412 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020376418903683268, + "loss": 0.2688, + "step": 6413 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020373719855614096, + "loss": 0.1941, + "step": 6414 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020371020607926995, + "loss": 0.1179, + "step": 6415 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020368321160722222, + "loss": 0.1314, + "step": 6416 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020365621514100067, + "loss": 0.1471, + "step": 6417 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020362921668160802, + "loss": 0.0767, + "step": 6418 + }, + { + "epoch": 1.95, + "learning_rate": 0.0002036022162300473, + "loss": 0.1604, + "step": 6419 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020357521378732134, + "loss": 0.1403, + "step": 6420 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020354820935443335, + "loss": 0.0549, + "step": 6421 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020352120293238646, + "loss": 0.1553, + "step": 6422 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020349419452218373, + "loss": 0.1597, + "step": 6423 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020346718412482862, + "loss": 0.137, + "step": 6424 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020344017174132439, + "loss": 0.1949, + "step": 6425 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020341315737267446, + "loss": 0.105, + "step": 6426 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020338614101988244, + "loss": 0.1853, + "step": 6427 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020335912268395175, + "loss": 0.3262, + "step": 6428 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020333210236588622, + "loss": 0.139, + "step": 6429 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020330508006668942, + "loss": 0.153, + "step": 6430 + }, + { + "epoch": 1.95, + "learning_rate": 0.0002032780557873652, + "loss": 0.0962, + "step": 6431 + }, + { + "epoch": 1.95, + "learning_rate": 0.0002032510295289175, + "loss": 0.2802, + "step": 6432 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020322400129235013, + "loss": 0.2325, + "step": 6433 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020319697107866724, + "loss": 0.1981, + "step": 6434 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020316993888887285, + "loss": 0.0954, + "step": 6435 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020314290472397118, + "loss": 0.206, + "step": 6436 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020311586858496645, + "loss": 0.1859, + "step": 6437 + }, + { + "epoch": 1.95, + "learning_rate": 0.00020308883047286293, + "loss": 0.0999, + "step": 6438 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020306179038866507, + "loss": 0.1451, + "step": 6439 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020303474833337727, + "loss": 0.2803, + "step": 6440 + }, + { + "epoch": 1.96, + "learning_rate": 0.0002030077043080041, + "loss": 0.1166, + "step": 6441 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020298065831355014, + "loss": 0.2038, + "step": 6442 + }, + { + "epoch": 1.96, + "learning_rate": 0.0002029536103510201, + "loss": 0.0767, + "step": 6443 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020292656042141877, + "loss": 0.2054, + "step": 6444 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020289950852575084, + "loss": 0.1778, + "step": 6445 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020287245466502136, + "loss": 0.1615, + "step": 6446 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020284539884023523, + "loss": 0.2153, + "step": 6447 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020281834105239746, + "loss": 0.1042, + "step": 6448 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020279128130251326, + "loss": 0.0544, + "step": 6449 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020276421959158775, + "loss": 0.212, + "step": 6450 + }, + { + "epoch": 1.96, + "learning_rate": 0.0002027371559206262, + "loss": 0.1955, + "step": 6451 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020271009029063398, + "loss": 0.1399, + "step": 6452 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020268302270261645, + "loss": 0.1421, + "step": 6453 + }, + { + "epoch": 1.96, + "learning_rate": 0.0002026559531575791, + "loss": 0.214, + "step": 6454 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020262888165652754, + "loss": 0.2137, + "step": 6455 + }, + { + "epoch": 1.96, + "learning_rate": 0.0002026018082004674, + "loss": 0.1465, + "step": 6456 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020257473279040425, + "loss": 0.2251, + "step": 6457 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020254765542734396, + "loss": 0.2908, + "step": 6458 + }, + { + "epoch": 1.96, + "learning_rate": 0.0002025205761122924, + "loss": 0.1883, + "step": 6459 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020249349484625548, + "loss": 0.1581, + "step": 6460 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020246641163023914, + "loss": 0.1487, + "step": 6461 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020243932646524945, + "loss": 0.1789, + "step": 6462 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020241223935229258, + "loss": 0.1321, + "step": 6463 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020238515029237468, + "loss": 0.1683, + "step": 6464 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020235805928650212, + "loss": 0.2893, + "step": 6465 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020233096633568112, + "loss": 0.2507, + "step": 6466 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020230387144091826, + "loss": 0.2429, + "step": 6467 + }, + { + "epoch": 1.96, + "learning_rate": 0.0002022767746032199, + "loss": 0.1395, + "step": 6468 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020224967582359267, + "loss": 0.1323, + "step": 6469 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020222257510304323, + "loss": 0.1403, + "step": 6470 + }, + { + "epoch": 1.96, + "learning_rate": 0.00020219547244257826, + "loss": 0.2268, + "step": 6471 + }, + { + "epoch": 1.97, + "learning_rate": 0.0002021683678432046, + "loss": 0.1705, + "step": 6472 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020214126130592894, + "loss": 0.2158, + "step": 6473 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020211415283175838, + "loss": 0.1808, + "step": 6474 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020208704242169988, + "loss": 0.2172, + "step": 6475 + }, + { + "epoch": 1.97, + "learning_rate": 0.0002020599300767605, + "loss": 0.2252, + "step": 6476 + }, + { + "epoch": 1.97, + "learning_rate": 0.0002020328157979474, + "loss": 0.2293, + "step": 6477 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020200569958626774, + "loss": 0.1629, + "step": 6478 + }, + { + "epoch": 1.97, + "learning_rate": 0.0002019785814427288, + "loss": 0.0803, + "step": 6479 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020195146136833808, + "loss": 0.1036, + "step": 6480 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020192433936410287, + "loss": 0.0399, + "step": 6481 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020189721543103072, + "loss": 0.0458, + "step": 6482 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020187008957012922, + "loss": 0.2962, + "step": 6483 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020184296178240598, + "loss": 0.14, + "step": 6484 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020181583206886877, + "loss": 0.2634, + "step": 6485 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020178870043052532, + "loss": 0.1354, + "step": 6486 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020176156686838351, + "loss": 0.258, + "step": 6487 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020173443138345133, + "loss": 0.0825, + "step": 6488 + }, + { + "epoch": 1.97, + "learning_rate": 0.0002017072939767367, + "loss": 0.1078, + "step": 6489 + }, + { + "epoch": 1.97, + "learning_rate": 0.0002016801546492477, + "loss": 0.0981, + "step": 6490 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020165301340199253, + "loss": 0.0807, + "step": 6491 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020162587023597937, + "loss": 0.0905, + "step": 6492 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020159872515221656, + "loss": 0.2002, + "step": 6493 + }, + { + "epoch": 1.97, + "learning_rate": 0.0002015715781517124, + "loss": 0.2222, + "step": 6494 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020154442923547535, + "loss": 0.2245, + "step": 6495 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020151727840451392, + "loss": 0.1424, + "step": 6496 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020149012565983665, + "loss": 0.106, + "step": 6497 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020146297100245218, + "loss": 0.185, + "step": 6498 + }, + { + "epoch": 1.97, + "learning_rate": 0.0002014358144333693, + "loss": 0.0394, + "step": 6499 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020140865595359673, + "loss": 0.2461, + "step": 6500 + }, + { + "epoch": 1.97, + "learning_rate": 0.0002013814955641433, + "loss": 0.1448, + "step": 6501 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020135433326601805, + "loss": 0.1059, + "step": 6502 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020132716906022993, + "loss": 0.104, + "step": 6503 + }, + { + "epoch": 1.97, + "learning_rate": 0.00020130000294778794, + "loss": 0.2948, + "step": 6504 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002012728349297013, + "loss": 0.1422, + "step": 6505 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002012456650069792, + "loss": 0.1699, + "step": 6506 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020121849318063085, + "loss": 0.2046, + "step": 6507 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002011913194516658, + "loss": 0.2251, + "step": 6508 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020116414382109326, + "loss": 0.2199, + "step": 6509 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020113696628992287, + "loss": 0.1794, + "step": 6510 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002011097868591641, + "loss": 0.1328, + "step": 6511 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020108260552982664, + "loss": 0.1743, + "step": 6512 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020105542230292023, + "loss": 0.1892, + "step": 6513 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020102823717945453, + "loss": 0.2045, + "step": 6514 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020100105016043946, + "loss": 0.0826, + "step": 6515 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020097386124688497, + "loss": 0.2505, + "step": 6516 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020094667043980098, + "loss": 0.1627, + "step": 6517 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020091947774019764, + "loss": 0.392, + "step": 6518 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020089228314908498, + "loss": 0.1916, + "step": 6519 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020086508666747324, + "loss": 0.19, + "step": 6520 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020083788829637273, + "loss": 0.1548, + "step": 6521 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020081068803679371, + "loss": 0.2684, + "step": 6522 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020078348588974666, + "loss": 0.1503, + "step": 6523 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020075628185624197, + "loss": 0.117, + "step": 6524 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020072907593729028, + "loss": 0.2031, + "step": 6525 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020070186813390224, + "loss": 0.1588, + "step": 6526 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002006746584470884, + "loss": 0.2152, + "step": 6527 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020064744687785967, + "loss": 0.1852, + "step": 6528 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020062023342722675, + "loss": 0.1448, + "step": 6529 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020059301809620064, + "loss": 0.1366, + "step": 6530 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020056580088579222, + "loss": 0.1287, + "step": 6531 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020053858179701264, + "loss": 0.2308, + "step": 6532 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020051136083087294, + "loss": 0.1163, + "step": 6533 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002004841379883843, + "loss": 0.1288, + "step": 6534 + }, + { + "epoch": 1.98, + "learning_rate": 0.00020045691327055793, + "loss": 0.1183, + "step": 6535 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002004296866784053, + "loss": 0.2121, + "step": 6536 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002004024582129376, + "loss": 0.0691, + "step": 6537 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020037522787516644, + "loss": 0.2221, + "step": 6538 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020034799566610322, + "loss": 0.1816, + "step": 6539 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020032076158675962, + "loss": 0.2292, + "step": 6540 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020029352563814736, + "loss": 0.1559, + "step": 6541 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020026628782127807, + "loss": 0.1661, + "step": 6542 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020023904813716359, + "loss": 0.1828, + "step": 6543 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020021180658681578, + "loss": 0.2098, + "step": 6544 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020018456317124662, + "loss": 0.1811, + "step": 6545 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020015731789146813, + "loss": 0.2079, + "step": 6546 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020013007074849232, + "loss": 0.2222, + "step": 6547 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020010282174333136, + "loss": 0.0585, + "step": 6548 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020007557087699753, + "loss": 0.0898, + "step": 6549 + }, + { + "epoch": 1.99, + "learning_rate": 0.0002000483181505031, + "loss": 0.1046, + "step": 6550 + }, + { + "epoch": 1.99, + "learning_rate": 0.00020002106356486045, + "loss": 0.1029, + "step": 6551 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019999380712108192, + "loss": 0.2901, + "step": 6552 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019996654882018007, + "loss": 0.144, + "step": 6553 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019993928866316748, + "loss": 0.206, + "step": 6554 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019991202665105672, + "loss": 0.0751, + "step": 6555 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019988476278486058, + "loss": 0.0762, + "step": 6556 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019985749706559177, + "loss": 0.1997, + "step": 6557 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019983022949426313, + "loss": 0.2234, + "step": 6558 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019980296007188764, + "loss": 0.0588, + "step": 6559 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001997756887994782, + "loss": 0.1718, + "step": 6560 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019974841567804786, + "loss": 0.0743, + "step": 6561 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001997211407086098, + "loss": 0.1517, + "step": 6562 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019969386389217714, + "loss": 0.1824, + "step": 6563 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001996665852297632, + "loss": 0.0874, + "step": 6564 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019963930472238124, + "loss": 0.1585, + "step": 6565 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019961202237104469, + "loss": 0.2098, + "step": 6566 + }, + { + "epoch": 1.99, + "learning_rate": 0.000199584738176767, + "loss": 0.1497, + "step": 6567 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019955745214056162, + "loss": 0.0977, + "step": 6568 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001995301642634423, + "loss": 0.2779, + "step": 6569 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019950287454642261, + "loss": 0.1236, + "step": 6570 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019947558299051628, + "loss": 0.2935, + "step": 6571 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001994482895967371, + "loss": 0.1249, + "step": 6572 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019942099436609902, + "loss": 0.2706, + "step": 6573 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019939369729961594, + "loss": 0.1159, + "step": 6574 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001993663983983018, + "loss": 0.16, + "step": 6575 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019933909766317075, + "loss": 0.215, + "step": 6576 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001993117950952369, + "loss": 0.1649, + "step": 6577 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019928449069551445, + "loss": 0.0951, + "step": 6578 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019925718446501777, + "loss": 0.1392, + "step": 6579 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019922987640476106, + "loss": 0.1139, + "step": 6580 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001992025665157588, + "loss": 0.1607, + "step": 6581 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001991752547990255, + "loss": 0.21, + "step": 6582 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001991479412555757, + "loss": 0.192, + "step": 6583 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019912062588642402, + "loss": 0.0858, + "step": 6584 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019909330869258512, + "loss": 0.2061, + "step": 6585 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019906598967507378, + "loss": 0.163, + "step": 6586 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001990386688349048, + "loss": 0.1742, + "step": 6587 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019901134617309304, + "loss": 0.152, + "step": 6588 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001989840216906535, + "loss": 0.099, + "step": 6589 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019895669538860122, + "loss": 0.1044, + "step": 6590 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019892936726795127, + "loss": 0.081, + "step": 6591 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019890203732971887, + "loss": 0.155, + "step": 6592 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019887470557491908, + "loss": 0.1077, + "step": 6593 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019884737200456738, + "loss": 0.1093, + "step": 6594 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019882003661967902, + "loss": 0.0487, + "step": 6595 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019879269942126945, + "loss": 0.1655, + "step": 6596 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001987653604103542, + "loss": 0.0898, + "step": 6597 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019873801958794878, + "loss": 0.1563, + "step": 6598 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019871067695506896, + "loss": 0.0838, + "step": 6599 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019868333251273022, + "loss": 0.0995, + "step": 6600 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019865598626194847, + "loss": 0.0735, + "step": 6601 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019862863820373954, + "loss": 0.0616, + "step": 6602 + }, + { + "epoch": 2.0, + "learning_rate": 0.00019860128833911928, + "loss": 0.1407, + "step": 6603 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001985739366691037, + "loss": 0.1303, + "step": 6604 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001985465831947088, + "loss": 0.0746, + "step": 6605 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019851922791695067, + "loss": 0.0898, + "step": 6606 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019849187083684555, + "loss": 0.0989, + "step": 6607 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001984645119554096, + "loss": 0.0604, + "step": 6608 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019843715127365916, + "loss": 0.0652, + "step": 6609 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001984097887926106, + "loss": 0.107, + "step": 6610 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019838242451328036, + "loss": 0.1094, + "step": 6611 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001983550584366849, + "loss": 0.0517, + "step": 6612 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019832769056384082, + "loss": 0.1035, + "step": 6613 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019830032089576476, + "loss": 0.0379, + "step": 6614 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019827294943347345, + "loss": 0.1229, + "step": 6615 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019824557617798358, + "loss": 0.1159, + "step": 6616 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001982182011303121, + "loss": 0.0655, + "step": 6617 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019819082429147579, + "loss": 0.0521, + "step": 6618 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019816344566249167, + "loss": 0.1184, + "step": 6619 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001981360652443768, + "loss": 0.0613, + "step": 6620 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019810868303814824, + "loss": 0.129, + "step": 6621 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019808129904482324, + "loss": 0.0893, + "step": 6622 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019805391326541892, + "loss": 0.088, + "step": 6623 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019802652570095268, + "loss": 0.0892, + "step": 6624 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019799913635244182, + "loss": 0.0649, + "step": 6625 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019797174522090383, + "loss": 0.0533, + "step": 6626 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019794435230735617, + "loss": 0.064, + "step": 6627 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019791695761281638, + "loss": 0.058, + "step": 6628 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001978895611383022, + "loss": 0.1334, + "step": 6629 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019786216288483118, + "loss": 0.1659, + "step": 6630 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001978347628534212, + "loss": 0.055, + "step": 6631 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019780736104509008, + "loss": 0.1252, + "step": 6632 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019777995746085564, + "loss": 0.076, + "step": 6633 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019775255210173592, + "loss": 0.1098, + "step": 6634 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019772514496874897, + "loss": 0.0832, + "step": 6635 + }, + { + "epoch": 2.01, + "learning_rate": 0.00019769773606291276, + "loss": 0.0476, + "step": 6636 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019767032538524555, + "loss": 0.1565, + "step": 6637 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019764291293676553, + "loss": 0.0933, + "step": 6638 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019761549871849104, + "loss": 0.2262, + "step": 6639 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001975880827314404, + "loss": 0.0698, + "step": 6640 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019756066497663203, + "loss": 0.1167, + "step": 6641 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019753324545508443, + "loss": 0.095, + "step": 6642 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019750582416781616, + "loss": 0.0433, + "step": 6643 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019747840111584582, + "loss": 0.11, + "step": 6644 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019745097630019211, + "loss": 0.1337, + "step": 6645 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019742354972187377, + "loss": 0.1275, + "step": 6646 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001973961213819096, + "loss": 0.1213, + "step": 6647 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019736869128131858, + "loss": 0.0768, + "step": 6648 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019734125942111953, + "loss": 0.0968, + "step": 6649 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019731382580233156, + "loss": 0.1606, + "step": 6650 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001972863904259737, + "loss": 0.0982, + "step": 6651 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019725895329306505, + "loss": 0.1728, + "step": 6652 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001972315144046249, + "loss": 0.1964, + "step": 6653 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001972040737616725, + "loss": 0.0194, + "step": 6654 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019717663136522721, + "loss": 0.0423, + "step": 6655 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019714918721630838, + "loss": 0.0852, + "step": 6656 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001971217413159355, + "loss": 0.0712, + "step": 6657 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019709429366512817, + "loss": 0.0652, + "step": 6658 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001970668442649059, + "loss": 0.192, + "step": 6659 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001970393931162884, + "loss": 0.0592, + "step": 6660 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019701194022029537, + "loss": 0.0797, + "step": 6661 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019698448557794654, + "loss": 0.0986, + "step": 6662 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019695702919026196, + "loss": 0.0781, + "step": 6663 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019692957105826143, + "loss": 0.132, + "step": 6664 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019690211118296495, + "loss": 0.1275, + "step": 6665 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001968746495653926, + "loss": 0.0971, + "step": 6666 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001968471862065644, + "loss": 0.128, + "step": 6667 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019681972110750068, + "loss": 0.1529, + "step": 6668 + }, + { + "epoch": 2.02, + "learning_rate": 0.00019679225426922159, + "loss": 0.1286, + "step": 6669 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019676478569274748, + "loss": 0.0492, + "step": 6670 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001967373153790987, + "loss": 0.0985, + "step": 6671 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019670984332929574, + "loss": 0.1038, + "step": 6672 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019668236954435906, + "loss": 0.1182, + "step": 6673 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019665489402530926, + "loss": 0.1697, + "step": 6674 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019662741677316692, + "loss": 0.1379, + "step": 6675 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019659993778895288, + "loss": 0.0494, + "step": 6676 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019657245707368768, + "loss": 0.1192, + "step": 6677 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019654497462839234, + "loss": 0.0522, + "step": 6678 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019651749045408764, + "loss": 0.1998, + "step": 6679 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019649000455179463, + "loss": 0.0286, + "step": 6680 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001964625169225343, + "loss": 0.0266, + "step": 6681 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019643502756732766, + "loss": 0.0291, + "step": 6682 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019640753648719598, + "loss": 0.1002, + "step": 6683 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019638004368316035, + "loss": 0.033, + "step": 6684 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001963525491562421, + "loss": 0.1245, + "step": 6685 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019632505290746258, + "loss": 0.0674, + "step": 6686 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019629755493784322, + "loss": 0.131, + "step": 6687 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019627005524840548, + "loss": 0.0997, + "step": 6688 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019624255384017084, + "loss": 0.103, + "step": 6689 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019621505071416092, + "loss": 0.1327, + "step": 6690 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001961875458713974, + "loss": 0.1411, + "step": 6691 + }, + { + "epoch": 2.03, + "learning_rate": 0.000196160039312902, + "loss": 0.0855, + "step": 6692 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019613253103969652, + "loss": 0.0136, + "step": 6693 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019610502105280272, + "loss": 0.1577, + "step": 6694 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019607750935324268, + "loss": 0.1101, + "step": 6695 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019604999594203826, + "loss": 0.0929, + "step": 6696 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019602248082021151, + "loss": 0.1223, + "step": 6697 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001959949639887846, + "loss": 0.1024, + "step": 6698 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019596744544877963, + "loss": 0.0891, + "step": 6699 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019593992520121884, + "loss": 0.0462, + "step": 6700 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001959124032471246, + "loss": 0.0461, + "step": 6701 + }, + { + "epoch": 2.03, + "learning_rate": 0.00019588487958751918, + "loss": 0.1957, + "step": 6702 + }, + { + "epoch": 2.04, + "learning_rate": 0.000195857354223425, + "loss": 0.1323, + "step": 6703 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019582982715586465, + "loss": 0.0561, + "step": 6704 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019580229838586056, + "loss": 0.1469, + "step": 6705 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019577476791443548, + "loss": 0.0741, + "step": 6706 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019574723574261195, + "loss": 0.1129, + "step": 6707 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019571970187141276, + "loss": 0.1693, + "step": 6708 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019569216630186073, + "loss": 0.0876, + "step": 6709 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019566462903497874, + "loss": 0.1046, + "step": 6710 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019563709007178962, + "loss": 0.0408, + "step": 6711 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019560954941331648, + "loss": 0.1421, + "step": 6712 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019558200706058231, + "loss": 0.0488, + "step": 6713 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019555446301461031, + "loss": 0.1088, + "step": 6714 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019552691727642356, + "loss": 0.1136, + "step": 6715 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019549936984704534, + "loss": 0.0746, + "step": 6716 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019547182072749894, + "loss": 0.1083, + "step": 6717 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019544426991880773, + "loss": 0.118, + "step": 6718 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019541671742199517, + "loss": 0.1519, + "step": 6719 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019538916323808475, + "loss": 0.137, + "step": 6720 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019536160736810003, + "loss": 0.0933, + "step": 6721 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001953340498130646, + "loss": 0.1319, + "step": 6722 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019530649057400215, + "loss": 0.1207, + "step": 6723 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019527892965193647, + "loss": 0.0669, + "step": 6724 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001952513670478913, + "loss": 0.0289, + "step": 6725 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019522380276289057, + "loss": 0.0891, + "step": 6726 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019519623679795816, + "loss": 0.0542, + "step": 6727 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019516866915411811, + "loss": 0.1598, + "step": 6728 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019514109983239443, + "loss": 0.0753, + "step": 6729 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001951135288338113, + "loss": 0.1523, + "step": 6730 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019508595615939288, + "loss": 0.0539, + "step": 6731 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019505838181016336, + "loss": 0.1045, + "step": 6732 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019503080578714708, + "loss": 0.1254, + "step": 6733 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019500322809136848, + "loss": 0.1324, + "step": 6734 + }, + { + "epoch": 2.04, + "learning_rate": 0.00019497564872385183, + "loss": 0.1417, + "step": 6735 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001949480676856218, + "loss": 0.0855, + "step": 6736 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019492048497770284, + "loss": 0.0412, + "step": 6737 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001948929006011196, + "loss": 0.0699, + "step": 6738 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019486531455689674, + "loss": 0.1477, + "step": 6739 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019483772684605904, + "loss": 0.0529, + "step": 6740 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019481013746963123, + "loss": 0.1098, + "step": 6741 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019478254642863824, + "loss": 0.0578, + "step": 6742 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019475495372410498, + "loss": 0.0872, + "step": 6743 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001947273593570564, + "loss": 0.0542, + "step": 6744 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019469976332851758, + "loss": 0.1131, + "step": 6745 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019467216563951364, + "loss": 0.1369, + "step": 6746 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019464456629106978, + "loss": 0.083, + "step": 6747 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001946169652842112, + "loss": 0.0044, + "step": 6748 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001945893626199632, + "loss": 0.1087, + "step": 6749 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019456175829935112, + "loss": 0.0891, + "step": 6750 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019453415232340037, + "loss": 0.1108, + "step": 6751 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019450654469313649, + "loss": 0.1154, + "step": 6752 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019447893540958496, + "loss": 0.1962, + "step": 6753 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019445132447377145, + "loss": 0.1238, + "step": 6754 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019442371188672155, + "loss": 0.0531, + "step": 6755 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019439609764946105, + "loss": 0.0556, + "step": 6756 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019436848176301573, + "loss": 0.1588, + "step": 6757 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001943408642284114, + "loss": 0.0644, + "step": 6758 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019431324504667397, + "loss": 0.125, + "step": 6759 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019428562421882942, + "loss": 0.1123, + "step": 6760 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019425800174590385, + "loss": 0.1773, + "step": 6761 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019423037762892332, + "loss": 0.1005, + "step": 6762 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019420275186891393, + "loss": 0.0391, + "step": 6763 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001941751244669019, + "loss": 0.1008, + "step": 6764 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001941474954239136, + "loss": 0.0527, + "step": 6765 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019411986474097523, + "loss": 0.0628, + "step": 6766 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019409223241911332, + "loss": 0.1305, + "step": 6767 + }, + { + "epoch": 2.05, + "learning_rate": 0.00019406459845935426, + "loss": 0.1731, + "step": 6768 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019403696286272458, + "loss": 0.0847, + "step": 6769 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019400932563025088, + "loss": 0.1286, + "step": 6770 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001939816867629598, + "loss": 0.1458, + "step": 6771 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019395404626187802, + "loss": 0.028, + "step": 6772 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001939264041280323, + "loss": 0.0413, + "step": 6773 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001938987603624495, + "loss": 0.2061, + "step": 6774 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019387111496615652, + "loss": 0.0726, + "step": 6775 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001938434679401802, + "loss": 0.1517, + "step": 6776 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019381581928554765, + "loss": 0.1344, + "step": 6777 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001937881690032859, + "loss": 0.1009, + "step": 6778 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019376051709442204, + "loss": 0.0835, + "step": 6779 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019373286355998338, + "loss": 0.0533, + "step": 6780 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019370520840099702, + "loss": 0.1652, + "step": 6781 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019367755161849037, + "loss": 0.1197, + "step": 6782 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019364989321349075, + "loss": 0.122, + "step": 6783 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019362223318702555, + "loss": 0.1332, + "step": 6784 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019359457154012236, + "loss": 0.0863, + "step": 6785 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019356690827380865, + "loss": 0.1003, + "step": 6786 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019353924338911212, + "loss": 0.1503, + "step": 6787 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019351157688706028, + "loss": 0.1316, + "step": 6788 + }, + { + "epoch": 2.06, + "learning_rate": 0.000193483908768681, + "loss": 0.0951, + "step": 6789 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019345623903500205, + "loss": 0.0584, + "step": 6790 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019342856768705122, + "loss": 0.1735, + "step": 6791 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019340089472585646, + "loss": 0.0847, + "step": 6792 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019337322015244572, + "loss": 0.1156, + "step": 6793 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019334554396784707, + "loss": 0.0893, + "step": 6794 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019331786617308858, + "loss": 0.077, + "step": 6795 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019329018676919837, + "loss": 0.0571, + "step": 6796 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019326250575720468, + "loss": 0.0493, + "step": 6797 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019323482313813575, + "loss": 0.0843, + "step": 6798 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019320713891301996, + "loss": 0.0991, + "step": 6799 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019317945308288564, + "loss": 0.0941, + "step": 6800 + }, + { + "epoch": 2.06, + "learning_rate": 0.00019315176564876124, + "loss": 0.1398, + "step": 6801 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019312407661167526, + "loss": 0.0865, + "step": 6802 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019309638597265637, + "loss": 0.0612, + "step": 6803 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019306869373273309, + "loss": 0.187, + "step": 6804 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019304099989293417, + "loss": 0.1163, + "step": 6805 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019301330445428827, + "loss": 0.097, + "step": 6806 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019298560741782425, + "loss": 0.1636, + "step": 6807 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019295790878457096, + "loss": 0.0572, + "step": 6808 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019293020855555733, + "loss": 0.0586, + "step": 6809 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001929025067318124, + "loss": 0.0597, + "step": 6810 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001928748033143651, + "loss": 0.1451, + "step": 6811 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001928470983042446, + "loss": 0.0451, + "step": 6812 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001928193917024801, + "loss": 0.1297, + "step": 6813 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019279168351010072, + "loss": 0.1347, + "step": 6814 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001927639737281358, + "loss": 0.0791, + "step": 6815 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001927362623576146, + "loss": 0.1127, + "step": 6816 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019270854939956662, + "loss": 0.0865, + "step": 6817 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019268083485502125, + "loss": 0.1423, + "step": 6818 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019265311872500806, + "loss": 0.1259, + "step": 6819 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001926254010105566, + "loss": 0.1809, + "step": 6820 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019259768171269646, + "loss": 0.1344, + "step": 6821 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019256996083245734, + "loss": 0.0587, + "step": 6822 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019254223837086908, + "loss": 0.0887, + "step": 6823 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019251451432896133, + "loss": 0.0024, + "step": 6824 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001924867887077641, + "loss": 0.0926, + "step": 6825 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019245906150830729, + "loss": 0.1559, + "step": 6826 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019243133273162078, + "loss": 0.0376, + "step": 6827 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019240360237873476, + "loss": 0.074, + "step": 6828 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019237587045067923, + "loss": 0.0881, + "step": 6829 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019234813694848433, + "loss": 0.0918, + "step": 6830 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019232040187318038, + "loss": 0.0528, + "step": 6831 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001922926652257976, + "loss": 0.0527, + "step": 6832 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001922649270073663, + "loss": 0.1008, + "step": 6833 + }, + { + "epoch": 2.07, + "learning_rate": 0.00019223718721891693, + "loss": 0.1022, + "step": 6834 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001922094458614799, + "loss": 0.1322, + "step": 6835 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019218170293608579, + "loss": 0.0727, + "step": 6836 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019215395844376506, + "loss": 0.0393, + "step": 6837 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001921262123855484, + "loss": 0.1284, + "step": 6838 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019209846476246647, + "loss": 0.0934, + "step": 6839 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019207071557555004, + "loss": 0.0513, + "step": 6840 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019204296482582987, + "loss": 0.0903, + "step": 6841 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019201521251433685, + "loss": 0.1626, + "step": 6842 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019198745864210196, + "loss": 0.131, + "step": 6843 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019195970321015605, + "loss": 0.0374, + "step": 6844 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019193194621953022, + "loss": 0.0494, + "step": 6845 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019190418767125558, + "loss": 0.0856, + "step": 6846 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019187642756636323, + "loss": 0.1011, + "step": 6847 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019184866590588439, + "loss": 0.0963, + "step": 6848 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019182090269085034, + "loss": 0.1409, + "step": 6849 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019179313792229235, + "loss": 0.1167, + "step": 6850 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019176537160124193, + "loss": 0.0727, + "step": 6851 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019173760372873038, + "loss": 0.09, + "step": 6852 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019170983430578928, + "loss": 0.1231, + "step": 6853 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001916820633334501, + "loss": 0.1608, + "step": 6854 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001916542908127445, + "loss": 0.0677, + "step": 6855 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001916265167447042, + "loss": 0.1052, + "step": 6856 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001915987411303608, + "loss": 0.105, + "step": 6857 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001915709639707462, + "loss": 0.1099, + "step": 6858 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019154318526689218, + "loss": 0.0617, + "step": 6859 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019151540501983065, + "loss": 0.092, + "step": 6860 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001914876232305936, + "loss": 0.0868, + "step": 6861 + }, + { + "epoch": 2.08, + "learning_rate": 0.000191459839900213, + "loss": 0.0767, + "step": 6862 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019143205502972089, + "loss": 0.1354, + "step": 6863 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019140426862014948, + "loss": 0.0708, + "step": 6864 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019137648067253085, + "loss": 0.0557, + "step": 6865 + }, + { + "epoch": 2.08, + "learning_rate": 0.00019134869118789732, + "loss": 0.1484, + "step": 6866 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001913209001672812, + "loss": 0.0435, + "step": 6867 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019129310761171475, + "loss": 0.1121, + "step": 6868 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001912653135222305, + "loss": 0.1448, + "step": 6869 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019123751789986085, + "loss": 0.0728, + "step": 6870 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019120972074563835, + "loss": 0.0566, + "step": 6871 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019118192206059553, + "loss": 0.042, + "step": 6872 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019115412184576508, + "loss": 0.1299, + "step": 6873 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019112632010217972, + "loss": 0.0704, + "step": 6874 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019109851683087213, + "loss": 0.1165, + "step": 6875 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001910707120328752, + "loss": 0.1006, + "step": 6876 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019104290570922172, + "loss": 0.0727, + "step": 6877 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001910150978609447, + "loss": 0.1507, + "step": 6878 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019098728848907705, + "loss": 0.0571, + "step": 6879 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001909594775946518, + "loss": 0.0951, + "step": 6880 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001909316651787021, + "loss": 0.0538, + "step": 6881 + }, + { + "epoch": 2.09, + "learning_rate": 0.000190903851242261, + "loss": 0.0789, + "step": 6882 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019087603578636184, + "loss": 0.0806, + "step": 6883 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019084821881203782, + "loss": 0.1727, + "step": 6884 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019082040032032224, + "loss": 0.1649, + "step": 6885 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019079258031224852, + "loss": 0.1255, + "step": 6886 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019076475878885004, + "loss": 0.0931, + "step": 6887 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019073693575116029, + "loss": 0.0924, + "step": 6888 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019070911120021292, + "loss": 0.1795, + "step": 6889 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019068128513704134, + "loss": 0.1555, + "step": 6890 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019065345756267933, + "loss": 0.0542, + "step": 6891 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019062562847816064, + "loss": 0.1238, + "step": 6892 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019059779788451898, + "loss": 0.0683, + "step": 6893 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019056996578278819, + "loss": 0.0838, + "step": 6894 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019054213217400214, + "loss": 0.0588, + "step": 6895 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019051429705919473, + "loss": 0.0916, + "step": 6896 + }, + { + "epoch": 2.09, + "learning_rate": 0.00019048646043940003, + "loss": 0.0752, + "step": 6897 + }, + { + "epoch": 2.09, + "learning_rate": 0.000190458622315652, + "loss": 0.0883, + "step": 6898 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001904307826889848, + "loss": 0.1338, + "step": 6899 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019040294156043262, + "loss": 0.068, + "step": 6900 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019037509893102963, + "loss": 0.1015, + "step": 6901 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019034725480181016, + "loss": 0.0631, + "step": 6902 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019031940917380843, + "loss": 0.1185, + "step": 6903 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001902915620480589, + "loss": 0.1999, + "step": 6904 + }, + { + "epoch": 2.1, + "learning_rate": 0.000190263713425596, + "loss": 0.0741, + "step": 6905 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001902358633074542, + "loss": 0.0719, + "step": 6906 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001902080116946681, + "loss": 0.0839, + "step": 6907 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019018015858827223, + "loss": 0.1287, + "step": 6908 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019015230398930136, + "loss": 0.0932, + "step": 6909 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019012444789879012, + "loss": 0.1029, + "step": 6910 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019009659031777326, + "loss": 0.1087, + "step": 6911 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001900687312472857, + "loss": 0.0988, + "step": 6912 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019004087068836225, + "loss": 0.0862, + "step": 6913 + }, + { + "epoch": 2.1, + "learning_rate": 0.00019001300864203788, + "loss": 0.071, + "step": 6914 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018998514510934754, + "loss": 0.1223, + "step": 6915 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018995728009132636, + "loss": 0.1725, + "step": 6916 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018992941358900943, + "loss": 0.054, + "step": 6917 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018990154560343182, + "loss": 0.1442, + "step": 6918 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018987367613562884, + "loss": 0.0969, + "step": 6919 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001898458051866357, + "loss": 0.0815, + "step": 6920 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018981793275748773, + "loss": 0.0764, + "step": 6921 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018979005884922035, + "loss": 0.1018, + "step": 6922 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018976218346286892, + "loss": 0.1868, + "step": 6923 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018973430659946898, + "loss": 0.0777, + "step": 6924 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018970642826005612, + "loss": 0.1001, + "step": 6925 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018967854844566587, + "loss": 0.0625, + "step": 6926 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018965066715733394, + "loss": 0.0851, + "step": 6927 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001896227843960959, + "loss": 0.093, + "step": 6928 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001895949001629877, + "loss": 0.0771, + "step": 6929 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018956701445904504, + "loss": 0.1545, + "step": 6930 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018953912728530381, + "loss": 0.052, + "step": 6931 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018951123864279995, + "loss": 0.1566, + "step": 6932 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018948334853256945, + "loss": 0.1371, + "step": 6933 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001894554569556483, + "loss": 0.0961, + "step": 6934 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018942756391307272, + "loss": 0.1009, + "step": 6935 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001893996694058787, + "loss": 0.0742, + "step": 6936 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018937177343510256, + "loss": 0.0839, + "step": 6937 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018934387600178038, + "loss": 0.128, + "step": 6938 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018931597710694864, + "loss": 0.1033, + "step": 6939 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018928807675164364, + "loss": 0.1062, + "step": 6940 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001892601749369018, + "loss": 0.0826, + "step": 6941 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018923227166375964, + "loss": 0.1013, + "step": 6942 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018920436693325358, + "loss": 0.1458, + "step": 6943 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018917646074642026, + "loss": 0.0745, + "step": 6944 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018914855310429632, + "loss": 0.1108, + "step": 6945 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001891206440079184, + "loss": 0.1596, + "step": 6946 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001890927334583233, + "loss": 0.0752, + "step": 6947 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001890648214565478, + "loss": 0.1447, + "step": 6948 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001890369080036287, + "loss": 0.0649, + "step": 6949 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018900899310060298, + "loss": 0.088, + "step": 6950 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018898107674850757, + "loss": 0.1152, + "step": 6951 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018895315894837945, + "loss": 0.0679, + "step": 6952 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001889252397012557, + "loss": 0.0713, + "step": 6953 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018889731900817346, + "loss": 0.0633, + "step": 6954 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018886939687016985, + "loss": 0.1086, + "step": 6955 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018884147328828213, + "loss": 0.1631, + "step": 6956 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018881354826354762, + "loss": 0.1477, + "step": 6957 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018878562179700363, + "loss": 0.0524, + "step": 6958 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018875769388968747, + "loss": 0.0758, + "step": 6959 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018872976454263672, + "loss": 0.1016, + "step": 6960 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001887018337568887, + "loss": 0.0699, + "step": 6961 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018867390153348107, + "loss": 0.0852, + "step": 6962 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018864596787345145, + "loss": 0.1186, + "step": 6963 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018861803277783735, + "loss": 0.1142, + "step": 6964 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001885900962476767, + "loss": 0.107, + "step": 6965 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018856215828400705, + "loss": 0.042, + "step": 6966 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018853421888786632, + "loss": 0.135, + "step": 6967 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018850627806029244, + "loss": 0.0868, + "step": 6968 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018847833580232318, + "loss": 0.0399, + "step": 6969 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018845039211499661, + "loss": 0.0396, + "step": 6970 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001884224469993507, + "loss": 0.155, + "step": 6971 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018839450045642355, + "loss": 0.1082, + "step": 6972 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001883665524872533, + "loss": 0.0772, + "step": 6973 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018833860309287813, + "loss": 0.1644, + "step": 6974 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018831065227433634, + "loss": 0.1268, + "step": 6975 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018828270003266612, + "loss": 0.121, + "step": 6976 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018825474636890584, + "loss": 0.012, + "step": 6977 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018822679128409396, + "loss": 0.1896, + "step": 6978 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018819883477926884, + "loss": 0.0605, + "step": 6979 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018817087685546902, + "loss": 0.1233, + "step": 6980 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018814291751373307, + "loss": 0.1312, + "step": 6981 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001881149567550996, + "loss": 0.072, + "step": 6982 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018808699458060729, + "loss": 0.0884, + "step": 6983 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018805903099129479, + "loss": 0.0887, + "step": 6984 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018803106598820087, + "loss": 0.1176, + "step": 6985 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018800309957236441, + "loss": 0.1996, + "step": 6986 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018797513174482423, + "loss": 0.0939, + "step": 6987 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018794716250661924, + "loss": 0.0725, + "step": 6988 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018791919185878845, + "loss": 0.0934, + "step": 6989 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018789121980237092, + "loss": 0.0813, + "step": 6990 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018786324633840567, + "loss": 0.101, + "step": 6991 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018783527146793184, + "loss": 0.168, + "step": 6992 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018780729519198865, + "loss": 0.0703, + "step": 6993 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001877793175116153, + "loss": 0.06, + "step": 6994 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018775133842785112, + "loss": 0.0935, + "step": 6995 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018772335794173535, + "loss": 0.0978, + "step": 6996 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001876953760543075, + "loss": 0.0725, + "step": 6997 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018766739276660698, + "loss": 0.0955, + "step": 6998 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018763940807967324, + "loss": 0.1384, + "step": 6999 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018761142199454588, + "loss": 0.044, + "step": 7000 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018758343451226453, + "loss": 0.1332, + "step": 7001 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018755544563386878, + "loss": 0.0971, + "step": 7002 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018752745536039834, + "loss": 0.0795, + "step": 7003 + }, + { + "epoch": 2.13, + "learning_rate": 0.000187499463692893, + "loss": 0.0444, + "step": 7004 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001874714706323925, + "loss": 0.1008, + "step": 7005 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001874434761799368, + "loss": 0.1279, + "step": 7006 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018741548033656574, + "loss": 0.0717, + "step": 7007 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018738748310331933, + "loss": 0.0627, + "step": 7008 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018735948448123755, + "loss": 0.1358, + "step": 7009 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018733148447136046, + "loss": 0.059, + "step": 7010 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018730348307472824, + "loss": 0.1735, + "step": 7011 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018727548029238097, + "loss": 0.1379, + "step": 7012 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018724747612535892, + "loss": 0.0877, + "step": 7013 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018721947057470236, + "loss": 0.1367, + "step": 7014 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018719146364145158, + "loss": 0.0954, + "step": 7015 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018716345532664708, + "loss": 0.1376, + "step": 7016 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018713544563132912, + "loss": 0.0703, + "step": 7017 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001871074345565383, + "loss": 0.1185, + "step": 7018 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001870794221033151, + "loss": 0.0987, + "step": 7019 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018705140827270008, + "loss": 0.1107, + "step": 7020 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018702339306573388, + "loss": 0.1261, + "step": 7021 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018699537648345722, + "loss": 0.0757, + "step": 7022 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018696735852691078, + "loss": 0.1283, + "step": 7023 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018693933919713545, + "loss": 0.0372, + "step": 7024 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001869113184951719, + "loss": 0.101, + "step": 7025 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018688329642206123, + "loss": 0.109, + "step": 7026 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018685527297884416, + "loss": 0.0886, + "step": 7027 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001868272481665618, + "loss": 0.0949, + "step": 7028 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018679922198625517, + "loss": 0.0825, + "step": 7029 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018677119443896533, + "loss": 0.044, + "step": 7030 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001867431655257335, + "loss": 0.1213, + "step": 7031 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001867151352476008, + "loss": 0.0999, + "step": 7032 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018668710360560844, + "loss": 0.0813, + "step": 7033 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018665907060079785, + "loss": 0.0902, + "step": 7034 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018663103623421022, + "loss": 0.1231, + "step": 7035 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018660300050688707, + "loss": 0.0439, + "step": 7036 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018657496341986974, + "loss": 0.1168, + "step": 7037 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018654692497419977, + "loss": 0.0961, + "step": 7038 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018651888517091873, + "loss": 0.0769, + "step": 7039 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001864908440110682, + "loss": 0.1399, + "step": 7040 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018646280149568986, + "loss": 0.1154, + "step": 7041 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018643475762582533, + "loss": 0.1107, + "step": 7042 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001864067124025164, + "loss": 0.1108, + "step": 7043 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001863786658268049, + "loss": 0.0503, + "step": 7044 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001863506178997326, + "loss": 0.0908, + "step": 7045 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018632256862234143, + "loss": 0.1009, + "step": 7046 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018629451799567336, + "loss": 0.0668, + "step": 7047 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001862664660207704, + "loss": 0.1189, + "step": 7048 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018623841269867458, + "loss": 0.1478, + "step": 7049 + }, + { + "epoch": 2.14, + "learning_rate": 0.000186210358030428, + "loss": 0.0926, + "step": 7050 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018618230201707277, + "loss": 0.1227, + "step": 7051 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018615424465965116, + "loss": 0.1357, + "step": 7052 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018612618595920536, + "loss": 0.1055, + "step": 7053 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001860981259167777, + "loss": 0.1229, + "step": 7054 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018607006453341053, + "loss": 0.1215, + "step": 7055 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001860420018101462, + "loss": 0.0706, + "step": 7056 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018601393774802727, + "loss": 0.0737, + "step": 7057 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018598587234809608, + "loss": 0.1559, + "step": 7058 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018595780561139532, + "loss": 0.0647, + "step": 7059 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001859297375389675, + "loss": 0.0876, + "step": 7060 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018590166813185527, + "loss": 0.1107, + "step": 7061 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018587359739110138, + "loss": 0.0441, + "step": 7062 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018584552531774852, + "loss": 0.0721, + "step": 7063 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018581745191283957, + "loss": 0.0408, + "step": 7064 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018578937717741726, + "loss": 0.1254, + "step": 7065 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018576130111252456, + "loss": 0.14, + "step": 7066 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001857332237192044, + "loss": 0.1456, + "step": 7067 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018570514499849976, + "loss": 0.1317, + "step": 7068 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018567706495145364, + "loss": 0.0682, + "step": 7069 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018564898357910923, + "loss": 0.0575, + "step": 7070 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018562090088250957, + "loss": 0.069, + "step": 7071 + }, + { + "epoch": 2.15, + "learning_rate": 0.000185592816862698, + "loss": 0.0594, + "step": 7072 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001855647315207176, + "loss": 0.1347, + "step": 7073 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001855366448576117, + "loss": 0.0844, + "step": 7074 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001855085568744237, + "loss": 0.0368, + "step": 7075 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018548046757219685, + "loss": 0.0163, + "step": 7076 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001854523769519748, + "loss": 0.1097, + "step": 7077 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001854242850148008, + "loss": 0.0647, + "step": 7078 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018539619176171854, + "loss": 0.1263, + "step": 7079 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018536809719377158, + "loss": 0.1007, + "step": 7080 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018534000131200347, + "loss": 0.063, + "step": 7081 + }, + { + "epoch": 2.15, + "learning_rate": 0.000185311904117458, + "loss": 0.1415, + "step": 7082 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001852838056111788, + "loss": 0.0647, + "step": 7083 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018525570579420967, + "loss": 0.0617, + "step": 7084 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018522760466759455, + "loss": 0.0855, + "step": 7085 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018519950223237715, + "loss": 0.0477, + "step": 7086 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018517139848960146, + "loss": 0.1893, + "step": 7087 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018514329344031144, + "loss": 0.1569, + "step": 7088 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018511518708555117, + "loss": 0.2028, + "step": 7089 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001850870794263647, + "loss": 0.0911, + "step": 7090 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018505897046379606, + "loss": 0.1217, + "step": 7091 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001850308601988895, + "loss": 0.1481, + "step": 7092 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001850027486326892, + "loss": 0.1128, + "step": 7093 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018497463576623945, + "loss": 0.0888, + "step": 7094 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018494652160058452, + "loss": 0.0565, + "step": 7095 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001849184061367688, + "loss": 0.1076, + "step": 7096 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018489028937583674, + "loss": 0.1182, + "step": 7097 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018486217131883268, + "loss": 0.1169, + "step": 7098 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001848340519668012, + "loss": 0.0376, + "step": 7099 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018480593132078686, + "loss": 0.0615, + "step": 7100 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018477780938183424, + "loss": 0.0566, + "step": 7101 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018474968615098797, + "loss": 0.1208, + "step": 7102 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018472156162929276, + "loss": 0.1887, + "step": 7103 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018469343581779335, + "loss": 0.0578, + "step": 7104 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018466530871753456, + "loss": 0.1157, + "step": 7105 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001846371803295612, + "loss": 0.0639, + "step": 7106 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001846090506549182, + "loss": 0.086, + "step": 7107 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001845809196946504, + "loss": 0.0792, + "step": 7108 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018455278744980286, + "loss": 0.0733, + "step": 7109 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018452465392142058, + "loss": 0.008, + "step": 7110 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018449651911054863, + "loss": 0.1453, + "step": 7111 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001844683830182322, + "loss": 0.1071, + "step": 7112 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018444024564551642, + "loss": 0.1842, + "step": 7113 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018441210699344647, + "loss": 0.044, + "step": 7114 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001843839670630677, + "loss": 0.069, + "step": 7115 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018435582585542537, + "loss": 0.0298, + "step": 7116 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018432768337156485, + "loss": 0.101, + "step": 7117 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001842995396125316, + "loss": 0.0961, + "step": 7118 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018427139457937098, + "loss": 0.0534, + "step": 7119 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018424324827312854, + "loss": 0.1083, + "step": 7120 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018421510069484986, + "loss": 0.1154, + "step": 7121 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018418695184558055, + "loss": 0.1, + "step": 7122 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018415880172636622, + "loss": 0.1681, + "step": 7123 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018413065033825258, + "loss": 0.0901, + "step": 7124 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001841024976822854, + "loss": 0.0803, + "step": 7125 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018407434375951039, + "loss": 0.0955, + "step": 7126 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018404618857097342, + "loss": 0.186, + "step": 7127 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018401803211772036, + "loss": 0.1897, + "step": 7128 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018398987440079723, + "loss": 0.0583, + "step": 7129 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018396171542124992, + "loss": 0.0815, + "step": 7130 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018393355518012448, + "loss": 0.1221, + "step": 7131 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018390539367846694, + "loss": 0.0046, + "step": 7132 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001838772309173235, + "loss": 0.1271, + "step": 7133 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018384906689774026, + "loss": 0.036, + "step": 7134 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018382090162076344, + "loss": 0.1228, + "step": 7135 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001837927350874393, + "loss": 0.0958, + "step": 7136 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018376456729881412, + "loss": 0.0894, + "step": 7137 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018373639825593432, + "loss": 0.1537, + "step": 7138 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018370822795984627, + "loss": 0.049, + "step": 7139 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018368005641159634, + "loss": 0.1177, + "step": 7140 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018365188361223113, + "loss": 0.124, + "step": 7141 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001836237095627971, + "loss": 0.0218, + "step": 7142 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018359553426434088, + "loss": 0.0648, + "step": 7143 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018356735771790905, + "loss": 0.0581, + "step": 7144 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018353917992454833, + "loss": 0.0035, + "step": 7145 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018351100088530547, + "loss": 0.1411, + "step": 7146 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018348282060122714, + "loss": 0.0324, + "step": 7147 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018345463907336028, + "loss": 0.1028, + "step": 7148 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018342645630275162, + "loss": 0.044, + "step": 7149 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018339827229044815, + "loss": 0.1379, + "step": 7150 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018337008703749679, + "loss": 0.0743, + "step": 7151 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018334190054494456, + "loss": 0.0849, + "step": 7152 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018331371281383855, + "loss": 0.0911, + "step": 7153 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018328552384522573, + "loss": 0.1262, + "step": 7154 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018325733364015333, + "loss": 0.116, + "step": 7155 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018322914219966855, + "loss": 0.0978, + "step": 7156 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018320094952481853, + "loss": 0.1026, + "step": 7157 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018317275561665066, + "loss": 0.0827, + "step": 7158 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018314456047621212, + "loss": 0.1099, + "step": 7159 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001831163641045504, + "loss": 0.1077, + "step": 7160 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018308816650271283, + "loss": 0.1194, + "step": 7161 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018305996767174694, + "loss": 0.1144, + "step": 7162 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018303176761270019, + "loss": 0.1223, + "step": 7163 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018300356632662014, + "loss": 0.0721, + "step": 7164 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018297536381455435, + "loss": 0.0666, + "step": 7165 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018294716007755053, + "loss": 0.1483, + "step": 7166 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001829189551166563, + "loss": 0.1473, + "step": 7167 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018289074893291943, + "loss": 0.0848, + "step": 7168 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001828625415273877, + "loss": 0.1397, + "step": 7169 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018283433290110893, + "loss": 0.1118, + "step": 7170 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018280612305513098, + "loss": 0.0732, + "step": 7171 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018277791199050177, + "loss": 0.0798, + "step": 7172 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018274969970826922, + "loss": 0.0993, + "step": 7173 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001827214862094814, + "loss": 0.1225, + "step": 7174 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018269327149518627, + "loss": 0.2076, + "step": 7175 + }, + { + "epoch": 2.18, + "learning_rate": 0.000182665055566432, + "loss": 0.1226, + "step": 7176 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018263683842426672, + "loss": 0.1432, + "step": 7177 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018260862006973857, + "loss": 0.0661, + "step": 7178 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018258040050389584, + "loss": 0.1437, + "step": 7179 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018255217972778677, + "loss": 0.0709, + "step": 7180 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018252395774245967, + "loss": 0.0696, + "step": 7181 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018249573454896294, + "loss": 0.0792, + "step": 7182 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018246751014834489, + "loss": 0.1607, + "step": 7183 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001824392845416541, + "loss": 0.0564, + "step": 7184 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018241105772993902, + "loss": 0.1029, + "step": 7185 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001823828297142482, + "loss": 0.126, + "step": 7186 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018235460049563019, + "loss": 0.1685, + "step": 7187 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018232637007513367, + "loss": 0.0369, + "step": 7188 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001822981384538073, + "loss": 0.1065, + "step": 7189 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018226990563269979, + "loss": 0.0102, + "step": 7190 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018224167161285992, + "loss": 0.0646, + "step": 7191 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018221343639533653, + "loss": 0.1636, + "step": 7192 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018218519998117836, + "loss": 0.1037, + "step": 7193 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018215696237143446, + "loss": 0.1099, + "step": 7194 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018212872356715366, + "loss": 0.0717, + "step": 7195 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018210048356938504, + "loss": 0.1201, + "step": 7196 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001820722423791776, + "loss": 0.0995, + "step": 7197 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018204399999758035, + "loss": 0.0299, + "step": 7198 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018201575642564255, + "loss": 0.1381, + "step": 7199 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018198751166441322, + "loss": 0.0528, + "step": 7200 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018195926571494165, + "loss": 0.1124, + "step": 7201 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018193101857827705, + "loss": 0.0656, + "step": 7202 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018190277025546876, + "loss": 0.0349, + "step": 7203 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018187452074756614, + "loss": 0.1548, + "step": 7204 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001818462700556185, + "loss": 0.105, + "step": 7205 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018181801818067535, + "loss": 0.1001, + "step": 7206 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018178976512378612, + "loss": 0.129, + "step": 7207 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018176151088600037, + "loss": 0.158, + "step": 7208 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001817332554683676, + "loss": 0.1358, + "step": 7209 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001817049988719374, + "loss": 0.0791, + "step": 7210 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018167674109775952, + "loss": 0.1525, + "step": 7211 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018164848214688364, + "loss": 0.0971, + "step": 7212 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018162022202035939, + "loss": 0.098, + "step": 7213 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018159196071923668, + "loss": 0.1052, + "step": 7214 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018156369824456523, + "loss": 0.1684, + "step": 7215 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001815354345973949, + "loss": 0.0652, + "step": 7216 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018150716977877572, + "loss": 0.091, + "step": 7217 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018147890378975752, + "loss": 0.101, + "step": 7218 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018145063663139045, + "loss": 0.0476, + "step": 7219 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018142236830472438, + "loss": 0.1171, + "step": 7220 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018139409881080947, + "loss": 0.0726, + "step": 7221 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018136582815069592, + "loss": 0.1552, + "step": 7222 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001813375563254338, + "loss": 0.1043, + "step": 7223 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018130928333607338, + "loss": 0.1022, + "step": 7224 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001812810091836648, + "loss": 0.101, + "step": 7225 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001812527338692585, + "loss": 0.073, + "step": 7226 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018122445739390485, + "loss": 0.0531, + "step": 7227 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018119617975865415, + "loss": 0.0624, + "step": 7228 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018116790096455685, + "loss": 0.0877, + "step": 7229 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018113962101266341, + "loss": 0.161, + "step": 7230 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018111133990402435, + "loss": 0.1724, + "step": 7231 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001810830576396903, + "loss": 0.1048, + "step": 7232 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018105477422071176, + "loss": 0.1724, + "step": 7233 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001810264896481394, + "loss": 0.1513, + "step": 7234 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018099820392302398, + "loss": 0.1141, + "step": 7235 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001809699170464162, + "loss": 0.0806, + "step": 7236 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018094162901936681, + "loss": 0.1499, + "step": 7237 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018091333984292663, + "loss": 0.1506, + "step": 7238 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018088504951814654, + "loss": 0.058, + "step": 7239 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001808567580460774, + "loss": 0.1401, + "step": 7240 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001808284654277702, + "loss": 0.0941, + "step": 7241 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018080017166427592, + "loss": 0.1059, + "step": 7242 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018077187675664556, + "loss": 0.0456, + "step": 7243 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018074358070593023, + "loss": 0.0682, + "step": 7244 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018071528351318105, + "loss": 0.0514, + "step": 7245 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018068698517944914, + "loss": 0.1687, + "step": 7246 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018065868570578574, + "loss": 0.0222, + "step": 7247 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018063038509324208, + "loss": 0.1302, + "step": 7248 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001806020833428694, + "loss": 0.07, + "step": 7249 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018057378045571905, + "loss": 0.0817, + "step": 7250 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018054547643284242, + "loss": 0.0443, + "step": 7251 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018051717127529098, + "loss": 0.1039, + "step": 7252 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018048886498411607, + "loss": 0.1061, + "step": 7253 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018046055756036923, + "loss": 0.1345, + "step": 7254 + }, + { + "epoch": 2.2, + "learning_rate": 0.000180432249005102, + "loss": 0.0587, + "step": 7255 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018040393931936598, + "loss": 0.1545, + "step": 7256 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018037562850421274, + "loss": 0.0448, + "step": 7257 + }, + { + "epoch": 2.2, + "learning_rate": 0.000180347316560694, + "loss": 0.1442, + "step": 7258 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018031900348986144, + "loss": 0.103, + "step": 7259 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018029068929276685, + "loss": 0.1051, + "step": 7260 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018026237397046193, + "loss": 0.1078, + "step": 7261 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018023405752399857, + "loss": 0.1311, + "step": 7262 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001802057399544286, + "loss": 0.1412, + "step": 7263 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018017742126280402, + "loss": 0.0534, + "step": 7264 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018014910145017674, + "loss": 0.0994, + "step": 7265 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001801207805175987, + "loss": 0.0567, + "step": 7266 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018009245846612202, + "loss": 0.0865, + "step": 7267 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018006413529679876, + "loss": 0.0263, + "step": 7268 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018003581101068105, + "loss": 0.0217, + "step": 7269 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018000748560882104, + "loss": 0.0307, + "step": 7270 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017997915909227088, + "loss": 0.1613, + "step": 7271 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017995083146208292, + "loss": 0.0579, + "step": 7272 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001799225027193094, + "loss": 0.1191, + "step": 7273 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017989417286500266, + "loss": 0.0603, + "step": 7274 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017986584190021504, + "loss": 0.0698, + "step": 7275 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017983750982599896, + "loss": 0.1149, + "step": 7276 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001798091766434069, + "loss": 0.0569, + "step": 7277 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001797808423534914, + "loss": 0.1592, + "step": 7278 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017975250695730487, + "loss": 0.0833, + "step": 7279 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001797241704559, + "loss": 0.0974, + "step": 7280 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017969583285032932, + "loss": 0.1295, + "step": 7281 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017966749414164554, + "loss": 0.0977, + "step": 7282 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017963915433090135, + "loss": 0.1021, + "step": 7283 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001796108134191495, + "loss": 0.0822, + "step": 7284 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017958247140744277, + "loss": 0.1282, + "step": 7285 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017955412829683397, + "loss": 0.0761, + "step": 7286 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017952578408837595, + "loss": 0.1242, + "step": 7287 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017949743878312171, + "loss": 0.1494, + "step": 7288 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017946909238212406, + "loss": 0.0549, + "step": 7289 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017944074488643604, + "loss": 0.154, + "step": 7290 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017941239629711068, + "loss": 0.1157, + "step": 7291 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017938404661520108, + "loss": 0.1553, + "step": 7292 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017935569584176032, + "loss": 0.0774, + "step": 7293 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001793273439778415, + "loss": 0.0237, + "step": 7294 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017929899102449791, + "loss": 0.1114, + "step": 7295 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017927063698278267, + "loss": 0.0466, + "step": 7296 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001792422818537491, + "loss": 0.1076, + "step": 7297 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017921392563845055, + "loss": 0.0464, + "step": 7298 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017918556833794032, + "loss": 0.0162, + "step": 7299 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017915720995327177, + "loss": 0.2108, + "step": 7300 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017912885048549846, + "loss": 0.1331, + "step": 7301 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001791004899356737, + "loss": 0.1109, + "step": 7302 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017907212830485113, + "loss": 0.24, + "step": 7303 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001790437655940842, + "loss": 0.1447, + "step": 7304 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017901540180442654, + "loss": 0.0637, + "step": 7305 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017898703693693184, + "loss": 0.228, + "step": 7306 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001789586709926537, + "loss": 0.0724, + "step": 7307 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017893030397264585, + "loss": 0.0455, + "step": 7308 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017890193587796204, + "loss": 0.0404, + "step": 7309 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017887356670965606, + "loss": 0.1133, + "step": 7310 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001788451964687818, + "loss": 0.0212, + "step": 7311 + }, + { + "epoch": 2.22, + "learning_rate": 0.000178816825156393, + "loss": 0.0617, + "step": 7312 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001787884527735437, + "loss": 0.1017, + "step": 7313 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017876007932128782, + "loss": 0.0565, + "step": 7314 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017873170480067928, + "loss": 0.1, + "step": 7315 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017870332921277214, + "loss": 0.1092, + "step": 7316 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017867495255862055, + "loss": 0.0849, + "step": 7317 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017864657483927858, + "loss": 0.1115, + "step": 7318 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001786181960558003, + "loss": 0.2245, + "step": 7319 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017858981620924, + "loss": 0.0198, + "step": 7320 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017856143530065187, + "loss": 0.07, + "step": 7321 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017853305333109015, + "loss": 0.0867, + "step": 7322 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017850467030160915, + "loss": 0.053, + "step": 7323 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017847628621326324, + "loss": 0.1252, + "step": 7324 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017844790106710681, + "loss": 0.0562, + "step": 7325 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017841951486419433, + "loss": 0.0243, + "step": 7326 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017839112760558018, + "loss": 0.0993, + "step": 7327 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017836273929231888, + "loss": 0.1307, + "step": 7328 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017833434992546503, + "loss": 0.0456, + "step": 7329 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017830595950607311, + "loss": 0.1082, + "step": 7330 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017827756803519783, + "loss": 0.0535, + "step": 7331 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017824917551389382, + "loss": 0.1511, + "step": 7332 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017822078194321578, + "loss": 0.1501, + "step": 7333 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017819238732421847, + "loss": 0.1088, + "step": 7334 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001781639916579566, + "loss": 0.0447, + "step": 7335 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001781355949454851, + "loss": 0.1506, + "step": 7336 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001781071971878587, + "loss": 0.098, + "step": 7337 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017807879838613235, + "loss": 0.0999, + "step": 7338 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017805039854136098, + "loss": 0.0927, + "step": 7339 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017802199765459958, + "loss": 0.0463, + "step": 7340 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017799359572690313, + "loss": 0.0908, + "step": 7341 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017796519275932666, + "loss": 0.0937, + "step": 7342 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017793678875292532, + "loss": 0.0563, + "step": 7343 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017790838370875423, + "loss": 0.0611, + "step": 7344 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017787997762786845, + "loss": 0.1282, + "step": 7345 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017785157051132333, + "loss": 0.1146, + "step": 7346 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017782316236017405, + "loss": 0.0835, + "step": 7347 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017779475317547576, + "loss": 0.0587, + "step": 7348 + }, + { + "epoch": 2.23, + "learning_rate": 0.000177766342958284, + "loss": 0.0595, + "step": 7349 + }, + { + "epoch": 2.23, + "learning_rate": 0.000177737931709654, + "loss": 0.0798, + "step": 7350 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017770951943064123, + "loss": 0.0668, + "step": 7351 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017768110612230098, + "loss": 0.1065, + "step": 7352 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017765269178568886, + "loss": 0.0881, + "step": 7353 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017762427642186034, + "loss": 0.077, + "step": 7354 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017759586003187092, + "loss": 0.0898, + "step": 7355 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017756744261677621, + "loss": 0.077, + "step": 7356 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001775390241776319, + "loss": 0.1228, + "step": 7357 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017751060471549355, + "loss": 0.1845, + "step": 7358 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017748218423141698, + "loss": 0.1365, + "step": 7359 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001774537627264578, + "loss": 0.2156, + "step": 7360 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017742534020167186, + "loss": 0.1478, + "step": 7361 + }, + { + "epoch": 2.24, + "learning_rate": 0.000177396916658115, + "loss": 0.0557, + "step": 7362 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017736849209684297, + "loss": 0.0627, + "step": 7363 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017734006651891172, + "loss": 0.1062, + "step": 7364 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017731163992537716, + "loss": 0.0772, + "step": 7365 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017728321231729529, + "loss": 0.1683, + "step": 7366 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001772547836957221, + "loss": 0.125, + "step": 7367 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001772263540617136, + "loss": 0.1285, + "step": 7368 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001771979234163259, + "loss": 0.1259, + "step": 7369 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001771694917606151, + "loss": 0.0418, + "step": 7370 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017714105909563734, + "loss": 0.0982, + "step": 7371 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017711262542244885, + "loss": 0.0833, + "step": 7372 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001770841907421058, + "loss": 0.0293, + "step": 7373 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017705575505566452, + "loss": 0.0571, + "step": 7374 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017702731836418127, + "loss": 0.1054, + "step": 7375 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001769988806687124, + "loss": 0.1429, + "step": 7376 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001769704419703143, + "loss": 0.0984, + "step": 7377 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017694200227004334, + "loss": 0.0739, + "step": 7378 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017691356156895603, + "loss": 0.1081, + "step": 7379 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001768851198681088, + "loss": 0.142, + "step": 7380 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017685667716855818, + "loss": 0.0848, + "step": 7381 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017682823347136086, + "loss": 0.1254, + "step": 7382 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017679978877757329, + "loss": 0.0541, + "step": 7383 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017677134308825214, + "loss": 0.0747, + "step": 7384 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001767428964044541, + "loss": 0.1882, + "step": 7385 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001767144487272359, + "loss": 0.0923, + "step": 7386 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001766860000576543, + "loss": 0.1428, + "step": 7387 + }, + { + "epoch": 2.24, + "learning_rate": 0.000176657550396766, + "loss": 0.1173, + "step": 7388 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017662909974562786, + "loss": 0.0925, + "step": 7389 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001766006481052968, + "loss": 0.0955, + "step": 7390 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017657219547682963, + "loss": 0.044, + "step": 7391 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017654374186128338, + "loss": 0.1123, + "step": 7392 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001765152872597149, + "loss": 0.1822, + "step": 7393 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017648683167318128, + "loss": 0.1611, + "step": 7394 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017645837510273956, + "loss": 0.0521, + "step": 7395 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017642991754944674, + "loss": 0.1213, + "step": 7396 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017640145901436, + "loss": 0.1177, + "step": 7397 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001763729994985365, + "loss": 0.1377, + "step": 7398 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017634453900303339, + "loss": 0.1301, + "step": 7399 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017631607752890795, + "loss": 0.1013, + "step": 7400 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017628761507721736, + "loss": 0.1025, + "step": 7401 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017625915164901898, + "loss": 0.0252, + "step": 7402 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001762306872453701, + "loss": 0.0957, + "step": 7403 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017620222186732812, + "loss": 0.1116, + "step": 7404 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017617375551595043, + "loss": 0.1074, + "step": 7405 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017614528819229447, + "loss": 0.1016, + "step": 7406 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017611681989741776, + "loss": 0.1223, + "step": 7407 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017608835063237773, + "loss": 0.1362, + "step": 7408 + }, + { + "epoch": 2.25, + "learning_rate": 0.000176059880398232, + "loss": 0.1538, + "step": 7409 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001760314091960382, + "loss": 0.1097, + "step": 7410 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001760029370268538, + "loss": 0.1508, + "step": 7411 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001759744638917365, + "loss": 0.1765, + "step": 7412 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017594598979174411, + "loss": 0.0703, + "step": 7413 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017591751472793426, + "loss": 0.0528, + "step": 7414 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017588903870136477, + "loss": 0.1171, + "step": 7415 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001758605617130934, + "loss": 0.056, + "step": 7416 + }, + { + "epoch": 2.25, + "learning_rate": 0.000175832083764178, + "loss": 0.0544, + "step": 7417 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017580360485567643, + "loss": 0.0533, + "step": 7418 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001757751249886466, + "loss": 0.0663, + "step": 7419 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001757466441641465, + "loss": 0.0241, + "step": 7420 + }, + { + "epoch": 2.25, + "learning_rate": 0.000175718162383234, + "loss": 0.1465, + "step": 7421 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756896796469672, + "loss": 0.0369, + "step": 7422 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756611959564042, + "loss": 0.1901, + "step": 7423 + }, + { + "epoch": 2.25, + "learning_rate": 0.000175632711312603, + "loss": 0.0848, + "step": 7424 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756042257166217, + "loss": 0.0891, + "step": 7425 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017557573916951853, + "loss": 0.0697, + "step": 7426 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017554725167235165, + "loss": 0.1183, + "step": 7427 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017551876322617933, + "loss": 0.0574, + "step": 7428 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754902738320597, + "loss": 0.0811, + "step": 7429 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754617834910512, + "loss": 0.116, + "step": 7430 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017543329220421207, + "loss": 0.131, + "step": 7431 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017540479997260074, + "loss": 0.1349, + "step": 7432 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001753763067972756, + "loss": 0.2279, + "step": 7433 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017534781267929508, + "loss": 0.1612, + "step": 7434 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017531931761971764, + "loss": 0.1242, + "step": 7435 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017529082161960182, + "loss": 0.052, + "step": 7436 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017526232468000608, + "loss": 0.1706, + "step": 7437 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017523382680198913, + "loss": 0.0786, + "step": 7438 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017520532798660947, + "loss": 0.0647, + "step": 7439 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017517682823492582, + "loss": 0.0338, + "step": 7440 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017514832754799678, + "loss": 0.1519, + "step": 7441 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017511982592688117, + "loss": 0.0467, + "step": 7442 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001750913233726377, + "loss": 0.1449, + "step": 7443 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001750628198863251, + "loss": 0.1017, + "step": 7444 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017503431546900229, + "loss": 0.0633, + "step": 7445 + }, + { + "epoch": 2.26, + "learning_rate": 0.000175005810121728, + "loss": 0.0699, + "step": 7446 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017497730384556122, + "loss": 0.1339, + "step": 7447 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017494879664156087, + "loss": 0.0364, + "step": 7448 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001749202885107859, + "loss": 0.1136, + "step": 7449 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001748917794542953, + "loss": 0.1001, + "step": 7450 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017486326947314802, + "loss": 0.1079, + "step": 7451 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017483475856840323, + "loss": 0.0829, + "step": 7452 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017480624674111995, + "loss": 0.1001, + "step": 7453 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017477773399235738, + "loss": 0.1413, + "step": 7454 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017474922032317467, + "loss": 0.1103, + "step": 7455 + }, + { + "epoch": 2.26, + "learning_rate": 0.000174720705734631, + "loss": 0.0615, + "step": 7456 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017469219022778556, + "loss": 0.1176, + "step": 7457 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001746636738036977, + "loss": 0.0819, + "step": 7458 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017463515646342668, + "loss": 0.1203, + "step": 7459 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001746066382080318, + "loss": 0.0294, + "step": 7460 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017457811903857253, + "loss": 0.1225, + "step": 7461 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017454959895610818, + "loss": 0.0687, + "step": 7462 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017452107796169823, + "loss": 0.0727, + "step": 7463 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017449255605640212, + "loss": 0.1882, + "step": 7464 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017446403324127937, + "loss": 0.1319, + "step": 7465 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017443550951738956, + "loss": 0.1502, + "step": 7466 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017440698488579222, + "loss": 0.1107, + "step": 7467 + }, + { + "epoch": 2.27, + "learning_rate": 0.000174378459347547, + "loss": 0.1109, + "step": 7468 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017434993290371345, + "loss": 0.0049, + "step": 7469 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017432140555535128, + "loss": 0.0416, + "step": 7470 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017429287730352024, + "loss": 0.0294, + "step": 7471 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017426434814928009, + "loss": 0.0999, + "step": 7472 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017423581809369052, + "loss": 0.0335, + "step": 7473 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017420728713781137, + "loss": 0.0337, + "step": 7474 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001741787552827025, + "loss": 0.1248, + "step": 7475 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017415022252942382, + "loss": 0.1514, + "step": 7476 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017412168887903512, + "loss": 0.175, + "step": 7477 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017409315433259645, + "loss": 0.1319, + "step": 7478 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001740646188911677, + "loss": 0.0921, + "step": 7479 + }, + { + "epoch": 2.27, + "learning_rate": 0.000174036082555809, + "loss": 0.1473, + "step": 7480 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017400754532758032, + "loss": 0.0937, + "step": 7481 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017397900720754166, + "loss": 0.0685, + "step": 7482 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017395046819675329, + "loss": 0.0984, + "step": 7483 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001739219282962752, + "loss": 0.1125, + "step": 7484 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017389338750716763, + "loss": 0.1028, + "step": 7485 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017386484583049077, + "loss": 0.1104, + "step": 7486 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017383630326730487, + "loss": 0.1332, + "step": 7487 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017380775981867022, + "loss": 0.0849, + "step": 7488 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017377921548564714, + "loss": 0.0872, + "step": 7489 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001737506702692959, + "loss": 0.1089, + "step": 7490 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017372212417067693, + "loss": 0.0537, + "step": 7491 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017369357719085058, + "loss": 0.0798, + "step": 7492 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017366502933087734, + "loss": 0.1221, + "step": 7493 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017363648059181766, + "loss": 0.0825, + "step": 7494 + }, + { + "epoch": 2.28, + "learning_rate": 0.000173607930974732, + "loss": 0.0793, + "step": 7495 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017357938048068102, + "loss": 0.0941, + "step": 7496 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001735508291107251, + "loss": 0.2017, + "step": 7497 + }, + { + "epoch": 2.28, + "learning_rate": 0.000173522276865925, + "loss": 0.0548, + "step": 7498 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017349372374734131, + "loss": 0.0346, + "step": 7499 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017346516975603462, + "loss": 0.0571, + "step": 7500 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017343661489306573, + "loss": 0.1006, + "step": 7501 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017340805915949529, + "loss": 0.0658, + "step": 7502 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001733795025563841, + "loss": 0.1136, + "step": 7503 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017335094508479296, + "loss": 0.1159, + "step": 7504 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001733223867457827, + "loss": 0.1198, + "step": 7505 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017329382754041416, + "loss": 0.1717, + "step": 7506 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017326526746974823, + "loss": 0.1192, + "step": 7507 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017323670653484583, + "loss": 0.036, + "step": 7508 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017320814473676795, + "loss": 0.0726, + "step": 7509 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017317958207657553, + "loss": 0.0744, + "step": 7510 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017315101855532958, + "loss": 0.085, + "step": 7511 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001731224541740912, + "loss": 0.0308, + "step": 7512 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017309388893392146, + "loss": 0.1391, + "step": 7513 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001730653228358815, + "loss": 0.0842, + "step": 7514 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001730367558810324, + "loss": 0.0574, + "step": 7515 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017300818807043539, + "loss": 0.1, + "step": 7516 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017297961940515168, + "loss": 0.1587, + "step": 7517 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017295104988624245, + "loss": 0.1708, + "step": 7518 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017292247951476903, + "loss": 0.1082, + "step": 7519 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017289390829179272, + "loss": 0.1311, + "step": 7520 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017286533621837483, + "loss": 0.0385, + "step": 7521 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001728367632955768, + "loss": 0.0793, + "step": 7522 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017280818952445997, + "loss": 0.11, + "step": 7523 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017277961490608581, + "loss": 0.0568, + "step": 7524 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017275103944151571, + "loss": 0.1595, + "step": 7525 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017272246313181124, + "loss": 0.0789, + "step": 7526 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017269388597803385, + "loss": 0.1255, + "step": 7527 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001726653079812452, + "loss": 0.072, + "step": 7528 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001726367291425068, + "loss": 0.1523, + "step": 7529 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001726081494628803, + "loss": 0.0857, + "step": 7530 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017257956894342733, + "loss": 0.0642, + "step": 7531 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017255098758520962, + "loss": 0.1228, + "step": 7532 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017252240538928883, + "loss": 0.1246, + "step": 7533 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017249382235672673, + "loss": 0.1148, + "step": 7534 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001724652384885851, + "loss": 0.1131, + "step": 7535 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001724366537859257, + "loss": 0.0966, + "step": 7536 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017240806824981043, + "loss": 0.2437, + "step": 7537 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017237948188130115, + "loss": 0.106, + "step": 7538 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017235089468145974, + "loss": 0.1131, + "step": 7539 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017232230665134812, + "loss": 0.0369, + "step": 7540 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001722937177920283, + "loss": 0.0839, + "step": 7541 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017226512810456223, + "loss": 0.0808, + "step": 7542 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001722365375900119, + "loss": 0.1089, + "step": 7543 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017220794624943947, + "loss": 0.1371, + "step": 7544 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001721793540839069, + "loss": 0.076, + "step": 7545 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017215076109447638, + "loss": 0.1319, + "step": 7546 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001721221672822101, + "loss": 0.1345, + "step": 7547 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017209357264817013, + "loss": 0.1391, + "step": 7548 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001720649771934187, + "loss": 0.0686, + "step": 7549 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017203638091901812, + "loss": 0.0772, + "step": 7550 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001720077838260306, + "loss": 0.093, + "step": 7551 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017197918591551844, + "loss": 0.111, + "step": 7552 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017195058718854394, + "loss": 0.1441, + "step": 7553 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017192198764616956, + "loss": 0.1084, + "step": 7554 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017189338728945764, + "loss": 0.1316, + "step": 7555 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017186478611947052, + "loss": 0.1111, + "step": 7556 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017183618413727075, + "loss": 0.0683, + "step": 7557 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017180758134392076, + "loss": 0.1169, + "step": 7558 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017177897774048304, + "loss": 0.1362, + "step": 7559 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001717503733280202, + "loss": 0.0349, + "step": 7560 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017172176810759476, + "loss": 0.1014, + "step": 7561 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017169316208026934, + "loss": 0.0727, + "step": 7562 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001716645552471066, + "loss": 0.0555, + "step": 7563 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001716359476091691, + "loss": 0.1379, + "step": 7564 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017160733916751964, + "loss": 0.0978, + "step": 7565 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017157872992322085, + "loss": 0.1203, + "step": 7566 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001715501198773356, + "loss": 0.1562, + "step": 7567 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017152150903092649, + "loss": 0.1512, + "step": 7568 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017149289738505649, + "loss": 0.0791, + "step": 7569 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017146428494078838, + "loss": 0.1704, + "step": 7570 + }, + { + "epoch": 2.3, + "learning_rate": 0.000171435671699185, + "loss": 0.0873, + "step": 7571 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017140705766130934, + "loss": 0.0642, + "step": 7572 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001713784428282242, + "loss": 0.1452, + "step": 7573 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001713498272009926, + "loss": 0.138, + "step": 7574 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017132121078067757, + "loss": 0.0518, + "step": 7575 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017129259356834207, + "loss": 0.0885, + "step": 7576 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017126397556504913, + "loss": 0.148, + "step": 7577 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017123535677186186, + "loss": 0.0744, + "step": 7578 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017120673718984335, + "loss": 0.0561, + "step": 7579 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001711781168200568, + "loss": 0.1633, + "step": 7580 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017114949566356528, + "loss": 0.1815, + "step": 7581 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017112087372143198, + "loss": 0.0674, + "step": 7582 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017109225099472023, + "loss": 0.0972, + "step": 7583 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017106362748449316, + "loss": 0.1035, + "step": 7584 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017103500319181407, + "loss": 0.0801, + "step": 7585 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001710063781177463, + "loss": 0.06, + "step": 7586 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017097775226335323, + "loss": 0.0665, + "step": 7587 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001709491256296982, + "loss": 0.0996, + "step": 7588 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017092049821784455, + "loss": 0.0514, + "step": 7589 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017089187002885574, + "loss": 0.0926, + "step": 7590 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017086324106379522, + "loss": 0.1361, + "step": 7591 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001708346113237265, + "loss": 0.0539, + "step": 7592 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017080598080971304, + "loss": 0.0884, + "step": 7593 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001707773495228184, + "loss": 0.1423, + "step": 7594 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017074871746410616, + "loss": 0.1645, + "step": 7595 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017072008463463992, + "loss": 0.1069, + "step": 7596 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017069145103548332, + "loss": 0.1264, + "step": 7597 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017066281666769996, + "loss": 0.0357, + "step": 7598 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017063418153235354, + "loss": 0.114, + "step": 7599 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017060554563050781, + "loss": 0.0763, + "step": 7600 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017057690896322647, + "loss": 0.1512, + "step": 7601 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001705482715315733, + "loss": 0.0574, + "step": 7602 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017051963333661213, + "loss": 0.0737, + "step": 7603 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017049099437940675, + "loss": 0.1357, + "step": 7604 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017046235466102103, + "loss": 0.1098, + "step": 7605 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017043371418251881, + "loss": 0.138, + "step": 7606 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017040507294496403, + "loss": 0.1084, + "step": 7607 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017037643094942065, + "loss": 0.0636, + "step": 7608 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017034778819695256, + "loss": 0.1318, + "step": 7609 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017031914468862381, + "loss": 0.0961, + "step": 7610 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017029050042549848, + "loss": 0.0941, + "step": 7611 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017026185540864054, + "loss": 0.0835, + "step": 7612 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017023320963911411, + "loss": 0.0928, + "step": 7613 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017020456311798323, + "loss": 0.0624, + "step": 7614 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017017591584631212, + "loss": 0.119, + "step": 7615 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001701472678251649, + "loss": 0.1446, + "step": 7616 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017011861905560574, + "loss": 0.0531, + "step": 7617 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017008996953869887, + "loss": 0.0857, + "step": 7618 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017006131927550853, + "loss": 0.1239, + "step": 7619 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017003266826709905, + "loss": 0.0299, + "step": 7620 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001700040165145347, + "loss": 0.1017, + "step": 7621 + }, + { + "epoch": 2.31, + "learning_rate": 0.00016997536401887976, + "loss": 0.0626, + "step": 7622 + }, + { + "epoch": 2.31, + "learning_rate": 0.00016994671078119866, + "loss": 0.102, + "step": 7623 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001699180568025557, + "loss": 0.0723, + "step": 7624 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016988940208401536, + "loss": 0.0845, + "step": 7625 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016986074662664205, + "loss": 0.0499, + "step": 7626 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016983209043150023, + "loss": 0.0717, + "step": 7627 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016980343349965447, + "loss": 0.1012, + "step": 7628 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016977477583216918, + "loss": 0.0632, + "step": 7629 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016974611743010896, + "loss": 0.1561, + "step": 7630 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016971745829453842, + "loss": 0.1064, + "step": 7631 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001696887984265221, + "loss": 0.0583, + "step": 7632 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016966013782712463, + "loss": 0.0643, + "step": 7633 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016963147649741068, + "loss": 0.1548, + "step": 7634 + }, + { + "epoch": 2.32, + "learning_rate": 0.000169602814438445, + "loss": 0.0583, + "step": 7635 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016957415165129225, + "loss": 0.1223, + "step": 7636 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016954548813701714, + "loss": 0.0525, + "step": 7637 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016951682389668447, + "loss": 0.1064, + "step": 7638 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016948815893135904, + "loss": 0.1409, + "step": 7639 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016945949324210564, + "loss": 0.1284, + "step": 7640 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001694308268299891, + "loss": 0.0874, + "step": 7641 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016940215969607435, + "loss": 0.1361, + "step": 7642 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016937349184142626, + "loss": 0.0503, + "step": 7643 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016934482326710977, + "loss": 0.1085, + "step": 7644 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016931615397418982, + "loss": 0.1486, + "step": 7645 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016928748396373143, + "loss": 0.1267, + "step": 7646 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016925881323679953, + "loss": 0.1132, + "step": 7647 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016923014179445918, + "loss": 0.1704, + "step": 7648 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016920146963777548, + "loss": 0.1047, + "step": 7649 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016917279676781344, + "loss": 0.0858, + "step": 7650 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001691441231856383, + "loss": 0.1048, + "step": 7651 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001691154488923151, + "loss": 0.1159, + "step": 7652 + }, + { + "epoch": 2.32, + "learning_rate": 0.000169086773888909, + "loss": 0.1723, + "step": 7653 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016905809817648529, + "loss": 0.0911, + "step": 7654 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016902942175610905, + "loss": 0.0925, + "step": 7655 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001690007446288456, + "loss": 0.1334, + "step": 7656 + }, + { + "epoch": 2.32, + "learning_rate": 0.00016897206679576023, + "loss": 0.1021, + "step": 7657 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001689433882579182, + "loss": 0.0634, + "step": 7658 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016891470901638487, + "loss": 0.1563, + "step": 7659 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016888602907222554, + "loss": 0.1088, + "step": 7660 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016885734842650562, + "loss": 0.0508, + "step": 7661 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001688286670802905, + "loss": 0.0917, + "step": 7662 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016879998503464561, + "loss": 0.1858, + "step": 7663 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016877130229063644, + "loss": 0.1519, + "step": 7664 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001687426188493284, + "loss": 0.1644, + "step": 7665 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016871393471178705, + "loss": 0.215, + "step": 7666 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001686852498790779, + "loss": 0.1966, + "step": 7667 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001686565643522665, + "loss": 0.1023, + "step": 7668 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001686278781324185, + "loss": 0.1157, + "step": 7669 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016859919122059938, + "loss": 0.1353, + "step": 7670 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001685705036178749, + "loss": 0.0952, + "step": 7671 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016854181532531065, + "loss": 0.0846, + "step": 7672 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001685131263439723, + "loss": 0.1018, + "step": 7673 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016848443667492562, + "loss": 0.0764, + "step": 7674 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016845574631923634, + "loss": 0.0552, + "step": 7675 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016842705527797018, + "loss": 0.0517, + "step": 7676 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016839836355219296, + "loss": 0.063, + "step": 7677 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001683696711429705, + "loss": 0.0684, + "step": 7678 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016834097805136863, + "loss": 0.0968, + "step": 7679 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016831228427845318, + "loss": 0.0544, + "step": 7680 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001682835898252901, + "loss": 0.0918, + "step": 7681 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016825489469294522, + "loss": 0.1135, + "step": 7682 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001682261988824846, + "loss": 0.0384, + "step": 7683 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016819750239497412, + "loss": 0.0914, + "step": 7684 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016816880523147975, + "loss": 0.1299, + "step": 7685 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016814010739306758, + "loss": 0.1225, + "step": 7686 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016811140888080365, + "loss": 0.078, + "step": 7687 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016808270969575397, + "loss": 0.1375, + "step": 7688 + }, + { + "epoch": 2.33, + "learning_rate": 0.00016805400983898466, + "loss": 0.192, + "step": 7689 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001680253093115618, + "loss": 0.0842, + "step": 7690 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016799660811455148, + "loss": 0.1334, + "step": 7691 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016796790624902007, + "loss": 0.1208, + "step": 7692 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016793920371603358, + "loss": 0.0309, + "step": 7693 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016791050051665832, + "loss": 0.0613, + "step": 7694 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016788179665196045, + "loss": 0.03, + "step": 7695 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016785309212300625, + "loss": 0.1137, + "step": 7696 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001678243869308621, + "loss": 0.0837, + "step": 7697 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016779568107659417, + "loss": 0.1209, + "step": 7698 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016776697456126892, + "loss": 0.1347, + "step": 7699 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016773826738595267, + "loss": 0.0573, + "step": 7700 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016770955955171176, + "loss": 0.1207, + "step": 7701 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001676808510596127, + "loss": 0.1415, + "step": 7702 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016765214191072182, + "loss": 0.1336, + "step": 7703 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016762343210610566, + "loss": 0.0471, + "step": 7704 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001675947216468307, + "loss": 0.1532, + "step": 7705 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001675660105339634, + "loss": 0.049, + "step": 7706 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016753729876857031, + "loss": 0.1257, + "step": 7707 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016750858635171802, + "loss": 0.1283, + "step": 7708 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016747987328447306, + "loss": 0.1654, + "step": 7709 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016745115956790212, + "loss": 0.1465, + "step": 7710 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016742244520307173, + "loss": 0.1299, + "step": 7711 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016739373019104866, + "loss": 0.0596, + "step": 7712 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016736501453289943, + "loss": 0.1311, + "step": 7713 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016733629822969086, + "loss": 0.0789, + "step": 7714 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016730758128248967, + "loss": 0.0819, + "step": 7715 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016727886369236256, + "loss": 0.1265, + "step": 7716 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016725014546037636, + "loss": 0.0417, + "step": 7717 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016722142658759785, + "loss": 0.0625, + "step": 7718 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016719270707509382, + "loss": 0.0438, + "step": 7719 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016716398692393115, + "loss": 0.0991, + "step": 7720 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001671352661351767, + "loss": 0.1432, + "step": 7721 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016710654470989737, + "loss": 0.1906, + "step": 7722 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016707782264916001, + "loss": 0.09, + "step": 7723 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016704909995403166, + "loss": 0.0977, + "step": 7724 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016702037662557926, + "loss": 0.082, + "step": 7725 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016699165266486978, + "loss": 0.0998, + "step": 7726 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016696292807297025, + "loss": 0.0872, + "step": 7727 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016693420285094764, + "loss": 0.069, + "step": 7728 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016690547699986906, + "loss": 0.058, + "step": 7729 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016687675052080161, + "loss": 0.0935, + "step": 7730 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016684802341481234, + "loss": 0.0633, + "step": 7731 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001668192956829684, + "loss": 0.0819, + "step": 7732 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016679056732633697, + "loss": 0.1012, + "step": 7733 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001667618383459852, + "loss": 0.0829, + "step": 7734 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016673310874298032, + "loss": 0.0865, + "step": 7735 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016670437851838948, + "loss": 0.1293, + "step": 7736 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016667564767327992, + "loss": 0.0886, + "step": 7737 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016664691620871906, + "loss": 0.1338, + "step": 7738 + }, + { + "epoch": 2.35, + "learning_rate": 0.000166618184125774, + "loss": 0.142, + "step": 7739 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016658945142551212, + "loss": 0.1365, + "step": 7740 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001665607181090008, + "loss": 0.0597, + "step": 7741 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016653198417730734, + "loss": 0.1453, + "step": 7742 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016650324963149923, + "loss": 0.1185, + "step": 7743 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016647451447264372, + "loss": 0.0355, + "step": 7744 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016644577870180834, + "loss": 0.1043, + "step": 7745 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001664170423200605, + "loss": 0.042, + "step": 7746 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016638830532846765, + "loss": 0.099, + "step": 7747 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016635956772809732, + "loss": 0.115, + "step": 7748 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016633082952001704, + "loss": 0.0588, + "step": 7749 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016630209070529433, + "loss": 0.1148, + "step": 7750 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016627335128499676, + "loss": 0.1807, + "step": 7751 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001662446112601919, + "loss": 0.1903, + "step": 7752 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001662158706319474, + "loss": 0.0635, + "step": 7753 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016618712940133084, + "loss": 0.1124, + "step": 7754 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016615838756940985, + "loss": 0.0353, + "step": 7755 + }, + { + "epoch": 2.35, + "learning_rate": 0.00016612964513725218, + "loss": 0.1283, + "step": 7756 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001661009021059255, + "loss": 0.0878, + "step": 7757 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016607215847649754, + "loss": 0.0805, + "step": 7758 + }, + { + "epoch": 2.36, + "learning_rate": 0.000166043414250036, + "loss": 0.0275, + "step": 7759 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001660146694276087, + "loss": 0.1175, + "step": 7760 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016598592401028336, + "loss": 0.07, + "step": 7761 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016595717799912778, + "loss": 0.0443, + "step": 7762 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001659284313952099, + "loss": 0.0953, + "step": 7763 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016589968419959743, + "loss": 0.0995, + "step": 7764 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016587093641335843, + "loss": 0.0399, + "step": 7765 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016584218803756057, + "loss": 0.1324, + "step": 7766 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016581343907327194, + "loss": 0.1038, + "step": 7767 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001657846895215604, + "loss": 0.1385, + "step": 7768 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016575593938349397, + "loss": 0.1286, + "step": 7769 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016572718866014058, + "loss": 0.0544, + "step": 7770 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016569843735256824, + "loss": 0.0684, + "step": 7771 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016566968546184502, + "loss": 0.173, + "step": 7772 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016564093298903886, + "loss": 0.0355, + "step": 7773 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016561217993521794, + "loss": 0.1156, + "step": 7774 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016558342630145032, + "loss": 0.0558, + "step": 7775 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016555467208880415, + "loss": 0.1675, + "step": 7776 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001655259172983475, + "loss": 0.1135, + "step": 7777 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016549716193114855, + "loss": 0.1174, + "step": 7778 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016546840598827546, + "loss": 0.1017, + "step": 7779 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016543964947079648, + "loss": 0.1635, + "step": 7780 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016541089237977975, + "loss": 0.1405, + "step": 7781 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001653821347162936, + "loss": 0.144, + "step": 7782 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016535337648140627, + "loss": 0.0803, + "step": 7783 + }, + { + "epoch": 2.36, + "learning_rate": 0.000165324617676186, + "loss": 0.1096, + "step": 7784 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016529585830170112, + "loss": 0.068, + "step": 7785 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016526709835902, + "loss": 0.0862, + "step": 7786 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016523833784921088, + "loss": 0.1787, + "step": 7787 + }, + { + "epoch": 2.36, + "learning_rate": 0.00016520957677334223, + "loss": 0.0574, + "step": 7788 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001651808151324824, + "loss": 0.0922, + "step": 7789 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016515205292769977, + "loss": 0.0951, + "step": 7790 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016512329016006287, + "loss": 0.0877, + "step": 7791 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016509452683064006, + "loss": 0.1112, + "step": 7792 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016506576294049986, + "loss": 0.099, + "step": 7793 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001650369984907107, + "loss": 0.1127, + "step": 7794 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016500823348234117, + "loss": 0.0566, + "step": 7795 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016497946791645978, + "loss": 0.0411, + "step": 7796 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016495070179413506, + "loss": 0.1093, + "step": 7797 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016492193511643568, + "loss": 0.1518, + "step": 7798 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016489316788443012, + "loss": 0.0895, + "step": 7799 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016486440009918704, + "loss": 0.0604, + "step": 7800 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001648356317617751, + "loss": 0.1342, + "step": 7801 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016480686287326297, + "loss": 0.0807, + "step": 7802 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016477809343471924, + "loss": 0.1763, + "step": 7803 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016474932344721273, + "loss": 0.1888, + "step": 7804 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001647205529118121, + "loss": 0.0971, + "step": 7805 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016469178182958608, + "loss": 0.0747, + "step": 7806 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016466301020160346, + "loss": 0.048, + "step": 7807 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016463423802893297, + "loss": 0.0973, + "step": 7808 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001646054653126435, + "loss": 0.0698, + "step": 7809 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016457669205380382, + "loss": 0.0494, + "step": 7810 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016454791825348277, + "loss": 0.1232, + "step": 7811 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001645191439127492, + "loss": 0.0791, + "step": 7812 + }, + { + "epoch": 2.37, + "learning_rate": 0.000164490369032672, + "loss": 0.1269, + "step": 7813 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016446159361432013, + "loss": 0.0956, + "step": 7814 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016443281765876245, + "loss": 0.096, + "step": 7815 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001644040411670679, + "loss": 0.0959, + "step": 7816 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016437526414030546, + "loss": 0.0522, + "step": 7817 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016434648657954413, + "loss": 0.083, + "step": 7818 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016431770848585292, + "loss": 0.1261, + "step": 7819 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016428892986030075, + "loss": 0.0856, + "step": 7820 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001642601507039568, + "loss": 0.1101, + "step": 7821 + }, + { + "epoch": 2.37, + "learning_rate": 0.00016423137101789003, + "loss": 0.0986, + "step": 7822 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016420259080316958, + "loss": 0.1255, + "step": 7823 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016417381006086457, + "loss": 0.118, + "step": 7824 + }, + { + "epoch": 2.38, + "learning_rate": 0.000164145028792044, + "loss": 0.0906, + "step": 7825 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016411624699777717, + "loss": 0.0925, + "step": 7826 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001640874646791331, + "loss": 0.0607, + "step": 7827 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016405868183718103, + "loss": 0.0711, + "step": 7828 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016402989847299018, + "loss": 0.0288, + "step": 7829 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016400111458762974, + "loss": 0.0897, + "step": 7830 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016397233018216893, + "loss": 0.1182, + "step": 7831 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001639435452576771, + "loss": 0.1173, + "step": 7832 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016391475981522336, + "loss": 0.1189, + "step": 7833 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016388597385587716, + "loss": 0.0899, + "step": 7834 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001638571873807077, + "loss": 0.1302, + "step": 7835 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016382840039078442, + "loss": 0.0516, + "step": 7836 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001637996128871766, + "loss": 0.0635, + "step": 7837 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016377082487095356, + "loss": 0.1744, + "step": 7838 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016374203634318487, + "loss": 0.1013, + "step": 7839 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016371324730493975, + "loss": 0.1473, + "step": 7840 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016368445775728774, + "loss": 0.2416, + "step": 7841 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016365566770129827, + "loss": 0.1236, + "step": 7842 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016362687713804077, + "loss": 0.0648, + "step": 7843 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016359808606858473, + "loss": 0.0377, + "step": 7844 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016356929449399968, + "loss": 0.035, + "step": 7845 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001635405024153551, + "loss": 0.0766, + "step": 7846 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001635117098337206, + "loss": 0.0778, + "step": 7847 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001634829167501657, + "loss": 0.0691, + "step": 7848 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016345412316575998, + "loss": 0.0998, + "step": 7849 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016342532908157303, + "loss": 0.1284, + "step": 7850 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016339653449867447, + "loss": 0.0764, + "step": 7851 + }, + { + "epoch": 2.38, + "learning_rate": 0.000163367739418134, + "loss": 0.0856, + "step": 7852 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016333894384102114, + "loss": 0.1052, + "step": 7853 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016331014776840562, + "loss": 0.0202, + "step": 7854 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016328135120135715, + "loss": 0.1292, + "step": 7855 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016325255414094547, + "loss": 0.1442, + "step": 7856 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016322375658824027, + "loss": 0.2605, + "step": 7857 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016319495854431125, + "loss": 0.0947, + "step": 7858 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016316616001022828, + "loss": 0.0811, + "step": 7859 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016313736098706098, + "loss": 0.0264, + "step": 7860 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016310856147587932, + "loss": 0.0327, + "step": 7861 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016307976147775298, + "loss": 0.0784, + "step": 7862 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001630509609937519, + "loss": 0.0931, + "step": 7863 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001630221600249459, + "loss": 0.0864, + "step": 7864 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016299335857240484, + "loss": 0.1294, + "step": 7865 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016296455663719858, + "loss": 0.0763, + "step": 7866 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016293575422039709, + "loss": 0.1448, + "step": 7867 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016290695132307025, + "loss": 0.0951, + "step": 7868 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016287814794628801, + "loss": 0.1299, + "step": 7869 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016284934409112033, + "loss": 0.0938, + "step": 7870 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016282053975863722, + "loss": 0.0403, + "step": 7871 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016279173494990865, + "loss": 0.0914, + "step": 7872 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016276292966600463, + "loss": 0.1428, + "step": 7873 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001627341239079952, + "loss": 0.1533, + "step": 7874 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016270531767695047, + "loss": 0.105, + "step": 7875 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016267651097394035, + "loss": 0.048, + "step": 7876 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001626477038000351, + "loss": 0.0443, + "step": 7877 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001626188961563047, + "loss": 0.1721, + "step": 7878 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016259008804381928, + "loss": 0.1121, + "step": 7879 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016256127946364906, + "loss": 0.0605, + "step": 7880 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001625324704168641, + "loss": 0.0633, + "step": 7881 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001625036609045347, + "loss": 0.1406, + "step": 7882 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016247485092773088, + "loss": 0.0791, + "step": 7883 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016244604048752296, + "loss": 0.1072, + "step": 7884 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016241722958498117, + "loss": 0.1667, + "step": 7885 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016238841822117567, + "loss": 0.1249, + "step": 7886 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016235960639717674, + "loss": 0.0745, + "step": 7887 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001623307941140547, + "loss": 0.1119, + "step": 7888 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016230198137287983, + "loss": 0.13, + "step": 7889 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016227316817472244, + "loss": 0.0775, + "step": 7890 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016224435452065283, + "loss": 0.1615, + "step": 7891 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016221554041174132, + "loss": 0.129, + "step": 7892 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016218672584905838, + "loss": 0.0802, + "step": 7893 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016215791083367423, + "loss": 0.1316, + "step": 7894 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001621290953666594, + "loss": 0.1284, + "step": 7895 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016210027944908422, + "loss": 0.138, + "step": 7896 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016207146308201917, + "loss": 0.17, + "step": 7897 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016204264626653466, + "loss": 0.0864, + "step": 7898 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016201382900370113, + "loss": 0.1416, + "step": 7899 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016198501129458909, + "loss": 0.0922, + "step": 7900 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016195619314026904, + "loss": 0.0783, + "step": 7901 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016192737454181146, + "loss": 0.0349, + "step": 7902 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001618985555002869, + "loss": 0.0838, + "step": 7903 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016186973601676588, + "loss": 0.0461, + "step": 7904 + }, + { + "epoch": 2.4, + "learning_rate": 0.000161840916092319, + "loss": 0.1288, + "step": 7905 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016181209572801682, + "loss": 0.0643, + "step": 7906 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001617832749249299, + "loss": 0.0892, + "step": 7907 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016175445368412888, + "loss": 0.1246, + "step": 7908 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001617256320066844, + "loss": 0.1164, + "step": 7909 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016169680989366704, + "loss": 0.094, + "step": 7910 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016166798734614746, + "loss": 0.1313, + "step": 7911 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016163916436519638, + "loss": 0.0669, + "step": 7912 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016161034095188453, + "loss": 0.1202, + "step": 7913 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001615815171072825, + "loss": 0.0674, + "step": 7914 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016155269283246116, + "loss": 0.151, + "step": 7915 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001615238681284911, + "loss": 0.1168, + "step": 7916 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016149504299644313, + "loss": 0.0864, + "step": 7917 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016146621743738803, + "loss": 0.1241, + "step": 7918 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016143739145239657, + "loss": 0.1587, + "step": 7919 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016140856504253954, + "loss": 0.0372, + "step": 7920 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001613797382088878, + "loss": 0.1124, + "step": 7921 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016135091095251212, + "loss": 0.122, + "step": 7922 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016132208327448346, + "loss": 0.0994, + "step": 7923 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016129325517587255, + "loss": 0.1526, + "step": 7924 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016126442665775033, + "loss": 0.0389, + "step": 7925 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016123559772118773, + "loss": 0.1136, + "step": 7926 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001612067683672556, + "loss": 0.1143, + "step": 7927 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016117793859702487, + "loss": 0.1303, + "step": 7928 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001611491084115665, + "loss": 0.1009, + "step": 7929 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016112027781195142, + "loss": 0.0865, + "step": 7930 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016109144679925073, + "loss": 0.0859, + "step": 7931 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016106261537453525, + "loss": 0.0772, + "step": 7932 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016103378353887608, + "loss": 0.045, + "step": 7933 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016100495129334417, + "loss": 0.1375, + "step": 7934 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001609761186390106, + "loss": 0.0671, + "step": 7935 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001609472855769464, + "loss": 0.0369, + "step": 7936 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016091845210822265, + "loss": 0.1557, + "step": 7937 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001608896182339105, + "loss": 0.0633, + "step": 7938 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016086078395508084, + "loss": 0.1308, + "step": 7939 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016083194927280495, + "loss": 0.1083, + "step": 7940 + }, + { + "epoch": 2.41, + "learning_rate": 0.000160803114188154, + "loss": 0.1635, + "step": 7941 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001607742787021989, + "loss": 0.0546, + "step": 7942 + }, + { + "epoch": 2.41, + "learning_rate": 0.000160745442816011, + "loss": 0.09, + "step": 7943 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001607166065306614, + "loss": 0.1225, + "step": 7944 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001606877698472213, + "loss": 0.1088, + "step": 7945 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016065893276676193, + "loss": 0.1197, + "step": 7946 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016063009529035442, + "loss": 0.1327, + "step": 7947 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016060125741907007, + "loss": 0.0368, + "step": 7948 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001605724191539801, + "loss": 0.1118, + "step": 7949 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016054358049615573, + "loss": 0.0867, + "step": 7950 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016051474144666828, + "loss": 0.0287, + "step": 7951 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016048590200658897, + "loss": 0.1053, + "step": 7952 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016045706217698925, + "loss": 0.1202, + "step": 7953 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016042822195894024, + "loss": 0.1976, + "step": 7954 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001603993813535134, + "loss": 0.1219, + "step": 7955 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016037054036178007, + "loss": 0.197, + "step": 7956 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001603416989848115, + "loss": 0.085, + "step": 7957 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001603128572236792, + "loss": 0.092, + "step": 7958 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016028401507945454, + "loss": 0.0101, + "step": 7959 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001602551725532088, + "loss": 0.1299, + "step": 7960 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001602263296460135, + "loss": 0.0856, + "step": 7961 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016019748635894, + "loss": 0.0663, + "step": 7962 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016016864269305982, + "loss": 0.0641, + "step": 7963 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001601397986494444, + "loss": 0.015, + "step": 7964 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016011095422916518, + "loss": 0.0455, + "step": 7965 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016008210943329367, + "loss": 0.0972, + "step": 7966 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016005326426290136, + "loss": 0.1176, + "step": 7967 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016002441871905976, + "loss": 0.2, + "step": 7968 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015999557280284039, + "loss": 0.0932, + "step": 7969 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001599667265153148, + "loss": 0.0707, + "step": 7970 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015993787985755454, + "loss": 0.1398, + "step": 7971 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015990903283063124, + "loss": 0.1552, + "step": 7972 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015988018543561635, + "loss": 0.1196, + "step": 7973 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001598513376735816, + "loss": 0.0841, + "step": 7974 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015982248954559853, + "loss": 0.0914, + "step": 7975 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015979364105273879, + "loss": 0.0578, + "step": 7976 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015976479219607397, + "loss": 0.22, + "step": 7977 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015973594297667576, + "loss": 0.1679, + "step": 7978 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015970709339561587, + "loss": 0.0462, + "step": 7979 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015967824345396585, + "loss": 0.1005, + "step": 7980 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015964939315279758, + "loss": 0.0246, + "step": 7981 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015962054249318253, + "loss": 0.1712, + "step": 7982 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001595916914761926, + "loss": 0.1677, + "step": 7983 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015956284010289944, + "loss": 0.0431, + "step": 7984 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001595339883743748, + "loss": 0.2027, + "step": 7985 + }, + { + "epoch": 2.42, + "learning_rate": 0.00015950513629169047, + "loss": 0.1112, + "step": 7986 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001594762838559182, + "loss": 0.0778, + "step": 7987 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001594474310681298, + "loss": 0.1145, + "step": 7988 + }, + { + "epoch": 2.43, + "learning_rate": 0.000159418577929397, + "loss": 0.1637, + "step": 7989 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015938972444079166, + "loss": 0.0586, + "step": 7990 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001593608706033856, + "loss": 0.1182, + "step": 7991 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015933201641825067, + "loss": 0.1154, + "step": 7992 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015930316188645863, + "loss": 0.0738, + "step": 7993 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015927430700908145, + "loss": 0.0442, + "step": 7994 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015924545178719096, + "loss": 0.0936, + "step": 7995 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015921659622185902, + "loss": 0.1598, + "step": 7996 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015918774031415764, + "loss": 0.0541, + "step": 7997 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015915888406515857, + "loss": 0.0972, + "step": 7998 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015913002747593383, + "loss": 0.0383, + "step": 7999 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015910117054755536, + "loss": 0.1517, + "step": 8000 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001590723132810951, + "loss": 0.1274, + "step": 8001 + }, + { + "epoch": 2.43, + "learning_rate": 0.000159043455677625, + "loss": 0.0914, + "step": 8002 + }, + { + "epoch": 2.43, + "learning_rate": 0.000159014597738217, + "loss": 0.122, + "step": 8003 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015898573946394317, + "loss": 0.1151, + "step": 8004 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015895688085587544, + "loss": 0.111, + "step": 8005 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015892802191508587, + "loss": 0.0728, + "step": 8006 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015889916264264653, + "loss": 0.0595, + "step": 8007 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001588703030396293, + "loss": 0.0475, + "step": 8008 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015884144310710635, + "loss": 0.0897, + "step": 8009 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015881258284614972, + "loss": 0.0252, + "step": 8010 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015878372225783144, + "loss": 0.0459, + "step": 8011 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015875486134322373, + "loss": 0.1267, + "step": 8012 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015872600010339853, + "loss": 0.0952, + "step": 8013 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015869713853942807, + "loss": 0.1075, + "step": 8014 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015866827665238434, + "loss": 0.1485, + "step": 8015 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015863941444333962, + "loss": 0.0222, + "step": 8016 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015861055191336592, + "loss": 0.1269, + "step": 8017 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015858168906353548, + "loss": 0.0245, + "step": 8018 + }, + { + "epoch": 2.43, + "learning_rate": 0.00015855282589492047, + "loss": 0.1304, + "step": 8019 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015852396240859313, + "loss": 0.0493, + "step": 8020 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001584950986056255, + "loss": 0.0435, + "step": 8021 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001584662344870899, + "loss": 0.1846, + "step": 8022 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001584373700540585, + "loss": 0.1347, + "step": 8023 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015840850530760353, + "loss": 0.1065, + "step": 8024 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015837964024879728, + "loss": 0.093, + "step": 8025 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015835077487871192, + "loss": 0.0773, + "step": 8026 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015832190919841982, + "loss": 0.0875, + "step": 8027 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015829304320899318, + "loss": 0.0614, + "step": 8028 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015826417691150427, + "loss": 0.157, + "step": 8029 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001582353103070255, + "loss": 0.1642, + "step": 8030 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015820644339662905, + "loss": 0.1728, + "step": 8031 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001581775761813873, + "loss": 0.1033, + "step": 8032 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015814870866237256, + "loss": 0.0549, + "step": 8033 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015811984084065716, + "loss": 0.0474, + "step": 8034 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015809097271731357, + "loss": 0.0747, + "step": 8035 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015806210429341398, + "loss": 0.1344, + "step": 8036 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015803323557003093, + "loss": 0.0723, + "step": 8037 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001580043665482367, + "loss": 0.1011, + "step": 8038 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015797549722910373, + "loss": 0.195, + "step": 8039 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015794662761370445, + "loss": 0.1355, + "step": 8040 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001579177577031112, + "loss": 0.0556, + "step": 8041 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015788888749839651, + "loss": 0.1184, + "step": 8042 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001578600170006328, + "loss": 0.0624, + "step": 8043 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015783114621089245, + "loss": 0.1157, + "step": 8044 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015780227513024806, + "loss": 0.1518, + "step": 8045 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015777340375977198, + "loss": 0.0301, + "step": 8046 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015774453210053675, + "loss": 0.0899, + "step": 8047 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015771566015361488, + "loss": 0.071, + "step": 8048 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015768678792007883, + "loss": 0.0802, + "step": 8049 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015765791540100116, + "loss": 0.1099, + "step": 8050 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001576290425974544, + "loss": 0.0981, + "step": 8051 + }, + { + "epoch": 2.44, + "learning_rate": 0.00015760016951051108, + "loss": 0.0915, + "step": 8052 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015757129614124376, + "loss": 0.0814, + "step": 8053 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015754242249072499, + "loss": 0.132, + "step": 8054 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001575135485600273, + "loss": 0.1506, + "step": 8055 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015748467435022334, + "loss": 0.1243, + "step": 8056 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001574557998623857, + "loss": 0.0588, + "step": 8057 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015742692509758695, + "loss": 0.1493, + "step": 8058 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015739805005689968, + "loss": 0.2068, + "step": 8059 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001573691747413966, + "loss": 0.039, + "step": 8060 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015734029915215022, + "loss": 0.1384, + "step": 8061 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015731142329023328, + "loss": 0.0756, + "step": 8062 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015728254715671844, + "loss": 0.1259, + "step": 8063 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001572536707526783, + "loss": 0.1249, + "step": 8064 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001572247940791856, + "loss": 0.0287, + "step": 8065 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001571959171373129, + "loss": 0.1134, + "step": 8066 + }, + { + "epoch": 2.45, + "learning_rate": 0.000157167039928133, + "loss": 0.1261, + "step": 8067 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015713816245271868, + "loss": 0.1371, + "step": 8068 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015710928471214246, + "loss": 0.0554, + "step": 8069 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001570804067074772, + "loss": 0.0588, + "step": 8070 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001570515284397956, + "loss": 0.082, + "step": 8071 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015702264991017037, + "loss": 0.1119, + "step": 8072 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015699377111967432, + "loss": 0.0633, + "step": 8073 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015696489206938017, + "loss": 0.0623, + "step": 8074 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015693601276036068, + "loss": 0.0916, + "step": 8075 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015690713319368868, + "loss": 0.0936, + "step": 8076 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001568782533704369, + "loss": 0.1154, + "step": 8077 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015684937329167825, + "loss": 0.0708, + "step": 8078 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015682049295848543, + "loss": 0.0547, + "step": 8079 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015679161237193128, + "loss": 0.0763, + "step": 8080 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001567627315330887, + "loss": 0.186, + "step": 8081 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001567338504430304, + "loss": 0.1369, + "step": 8082 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001567049691028293, + "loss": 0.1117, + "step": 8083 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015667608751355832, + "loss": 0.1253, + "step": 8084 + }, + { + "epoch": 2.45, + "learning_rate": 0.00015664720567629023, + "loss": 0.1734, + "step": 8085 + }, + { + "epoch": 2.46, + "learning_rate": 0.000156618323592098, + "loss": 0.0799, + "step": 8086 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001565894412620544, + "loss": 0.1101, + "step": 8087 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015656055868723241, + "loss": 0.154, + "step": 8088 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015653167586870487, + "loss": 0.0561, + "step": 8089 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015650279280754475, + "loss": 0.0978, + "step": 8090 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015647390950482493, + "loss": 0.1102, + "step": 8091 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015644502596161836, + "loss": 0.1094, + "step": 8092 + }, + { + "epoch": 2.46, + "learning_rate": 0.000156416142178998, + "loss": 0.1248, + "step": 8093 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015638725815803674, + "loss": 0.141, + "step": 8094 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001563583738998076, + "loss": 0.0028, + "step": 8095 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001563294894053835, + "loss": 0.0273, + "step": 8096 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015630060467583743, + "loss": 0.0233, + "step": 8097 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015627171971224237, + "loss": 0.1655, + "step": 8098 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015624283451567131, + "loss": 0.1185, + "step": 8099 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015621394908719725, + "loss": 0.0782, + "step": 8100 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015618506342789324, + "loss": 0.0046, + "step": 8101 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015615617753883223, + "loss": 0.1168, + "step": 8102 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001561272914210873, + "loss": 0.1769, + "step": 8103 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015609840507573147, + "loss": 0.0709, + "step": 8104 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001560695185038377, + "loss": 0.0984, + "step": 8105 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001560406317064792, + "loss": 0.0948, + "step": 8106 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015601174468472887, + "loss": 0.0526, + "step": 8107 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015598285743965995, + "loss": 0.0777, + "step": 8108 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015595396997234533, + "loss": 0.113, + "step": 8109 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015592508228385823, + "loss": 0.0697, + "step": 8110 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015589619437527172, + "loss": 0.1112, + "step": 8111 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015586730624765884, + "loss": 0.2121, + "step": 8112 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015583841790209273, + "loss": 0.1014, + "step": 8113 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015580952933964655, + "loss": 0.1105, + "step": 8114 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015578064056139337, + "loss": 0.0531, + "step": 8115 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015575175156840637, + "loss": 0.1064, + "step": 8116 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015572286236175865, + "loss": 0.1223, + "step": 8117 + }, + { + "epoch": 2.46, + "learning_rate": 0.00015569397294252333, + "loss": 0.1339, + "step": 8118 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015566508331177372, + "loss": 0.0695, + "step": 8119 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001556361934705828, + "loss": 0.085, + "step": 8120 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015560730342002382, + "loss": 0.1259, + "step": 8121 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015557841316116996, + "loss": 0.0602, + "step": 8122 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015554952269509445, + "loss": 0.1502, + "step": 8123 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015552063202287043, + "loss": 0.103, + "step": 8124 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015549174114557108, + "loss": 0.2196, + "step": 8125 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015546285006426972, + "loss": 0.0797, + "step": 8126 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015543395878003943, + "loss": 0.0377, + "step": 8127 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015540506729395354, + "loss": 0.1079, + "step": 8128 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015537617560708528, + "loss": 0.0964, + "step": 8129 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001553472837205078, + "loss": 0.0758, + "step": 8130 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015531839163529445, + "loss": 0.145, + "step": 8131 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001552894993525184, + "loss": 0.1467, + "step": 8132 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015526060687325299, + "loss": 0.1646, + "step": 8133 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001552317141985715, + "loss": 0.053, + "step": 8134 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015520282132954713, + "loss": 0.1166, + "step": 8135 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015517392826725323, + "loss": 0.043, + "step": 8136 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015514503501276304, + "loss": 0.0673, + "step": 8137 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015511614156714989, + "loss": 0.0599, + "step": 8138 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015508724793148707, + "loss": 0.1077, + "step": 8139 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015505835410684792, + "loss": 0.0552, + "step": 8140 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001550294600943058, + "loss": 0.0543, + "step": 8141 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001550005658949339, + "loss": 0.0779, + "step": 8142 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015497167150980572, + "loss": 0.1014, + "step": 8143 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001549427769399945, + "loss": 0.1002, + "step": 8144 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001549138821865736, + "loss": 0.0951, + "step": 8145 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015488498725061637, + "loss": 0.0638, + "step": 8146 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015485609213319624, + "loss": 0.1346, + "step": 8147 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015482719683538646, + "loss": 0.0909, + "step": 8148 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001547983013582605, + "loss": 0.1157, + "step": 8149 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015476940570289171, + "loss": 0.0793, + "step": 8150 + }, + { + "epoch": 2.47, + "learning_rate": 0.00015474050987035346, + "loss": 0.0675, + "step": 8151 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015471161386171922, + "loss": 0.0932, + "step": 8152 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015468271767806233, + "loss": 0.0412, + "step": 8153 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015465382132045624, + "loss": 0.1175, + "step": 8154 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001546249247899743, + "loss": 0.1129, + "step": 8155 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015459602808768995, + "loss": 0.0516, + "step": 8156 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015456713121467664, + "loss": 0.0864, + "step": 8157 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015453823417200778, + "loss": 0.0772, + "step": 8158 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015450933696075693, + "loss": 0.048, + "step": 8159 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015448043958199732, + "loss": 0.1207, + "step": 8160 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015445154203680257, + "loss": 0.0328, + "step": 8161 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001544226443262461, + "loss": 0.148, + "step": 8162 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015439374645140135, + "loss": 0.102, + "step": 8163 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001543648484133418, + "loss": 0.0569, + "step": 8164 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015433595021314088, + "loss": 0.1099, + "step": 8165 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015430705185187215, + "loss": 0.1601, + "step": 8166 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015427815333060916, + "loss": 0.0925, + "step": 8167 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015424925465042522, + "loss": 0.1062, + "step": 8168 + }, + { + "epoch": 2.48, + "learning_rate": 0.000154220355812394, + "loss": 0.0962, + "step": 8169 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015419145681758888, + "loss": 0.0504, + "step": 8170 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015416255766708344, + "loss": 0.1631, + "step": 8171 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001541336583619512, + "loss": 0.1028, + "step": 8172 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015410475890326568, + "loss": 0.0779, + "step": 8173 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015407585929210038, + "loss": 0.063, + "step": 8174 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001540469595295289, + "loss": 0.1603, + "step": 8175 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015401805961662472, + "loss": 0.1072, + "step": 8176 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015398915955446146, + "loss": 0.1471, + "step": 8177 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015396025934411253, + "loss": 0.0434, + "step": 8178 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015393135898665163, + "loss": 0.0655, + "step": 8179 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015390245848315227, + "loss": 0.0, + "step": 8180 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015387355783468798, + "loss": 0.0512, + "step": 8181 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015384465704233242, + "loss": 0.1007, + "step": 8182 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001538157561071591, + "loss": 0.1131, + "step": 8183 + }, + { + "epoch": 2.48, + "learning_rate": 0.00015378685503024164, + "loss": 0.1194, + "step": 8184 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015375795381265367, + "loss": 0.08, + "step": 8185 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015372905245546867, + "loss": 0.1126, + "step": 8186 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015370015095976037, + "loss": 0.05, + "step": 8187 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015367124932660225, + "loss": 0.0408, + "step": 8188 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015364234755706798, + "loss": 0.115, + "step": 8189 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001536134456522312, + "loss": 0.0621, + "step": 8190 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001535845436131655, + "loss": 0.0638, + "step": 8191 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015355564144094457, + "loss": 0.106, + "step": 8192 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015352673913664194, + "loss": 0.0697, + "step": 8193 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015349783670133126, + "loss": 0.2048, + "step": 8194 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015346893413608628, + "loss": 0.1151, + "step": 8195 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015344003144198052, + "loss": 0.1467, + "step": 8196 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001534111286200877, + "loss": 0.0751, + "step": 8197 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015338222567148142, + "loss": 0.1981, + "step": 8198 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001533533225972354, + "loss": 0.0583, + "step": 8199 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001533244193984233, + "loss": 0.1014, + "step": 8200 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015329551607611873, + "loss": 0.0686, + "step": 8201 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015326661263139545, + "loss": 0.0664, + "step": 8202 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015323770906532707, + "loss": 0.1848, + "step": 8203 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015320880537898724, + "loss": 0.0869, + "step": 8204 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015317990157344977, + "loss": 0.1762, + "step": 8205 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015315099764978823, + "loss": 0.0789, + "step": 8206 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015312209360907638, + "loss": 0.0753, + "step": 8207 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015309318945238795, + "loss": 0.0219, + "step": 8208 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015306428518079655, + "loss": 0.12, + "step": 8209 + }, + { + "epoch": 2.49, + "learning_rate": 0.000153035380795376, + "loss": 0.1021, + "step": 8210 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015300647629719991, + "loss": 0.0746, + "step": 8211 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015297757168734205, + "loss": 0.0827, + "step": 8212 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001529486669668761, + "loss": 0.0608, + "step": 8213 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015291976213687586, + "loss": 0.0981, + "step": 8214 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015289085719841504, + "loss": 0.1284, + "step": 8215 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001528619521525673, + "loss": 0.0783, + "step": 8216 + }, + { + "epoch": 2.49, + "learning_rate": 0.00015283304700040647, + "loss": 0.105, + "step": 8217 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015280414174300626, + "loss": 0.092, + "step": 8218 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015277523638144037, + "loss": 0.0673, + "step": 8219 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015274633091678263, + "loss": 0.1004, + "step": 8220 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001527174253501067, + "loss": 0.1237, + "step": 8221 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015268851968248636, + "loss": 0.0898, + "step": 8222 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001526596139149955, + "loss": 0.1175, + "step": 8223 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001526307080487077, + "loss": 0.091, + "step": 8224 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015260180208469688, + "loss": 0.0397, + "step": 8225 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001525728960240367, + "loss": 0.131, + "step": 8226 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015254398986780095, + "loss": 0.1033, + "step": 8227 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015251508361706348, + "loss": 0.0587, + "step": 8228 + }, + { + "epoch": 2.5, + "learning_rate": 0.000152486177272898, + "loss": 0.1142, + "step": 8229 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015245727083637833, + "loss": 0.0831, + "step": 8230 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015242836430857823, + "loss": 0.1701, + "step": 8231 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015239945769057156, + "loss": 0.071, + "step": 8232 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015237055098343212, + "loss": 0.0947, + "step": 8233 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015234164418823356, + "loss": 0.1028, + "step": 8234 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015231273730604986, + "loss": 0.0705, + "step": 8235 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015228383033795472, + "loss": 0.0734, + "step": 8236 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015225492328502197, + "loss": 0.1234, + "step": 8237 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015222601614832545, + "loss": 0.1616, + "step": 8238 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015219710892893891, + "loss": 0.062, + "step": 8239 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015216820162793627, + "loss": 0.1176, + "step": 8240 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015213929424639133, + "loss": 0.0817, + "step": 8241 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015211038678537782, + "loss": 0.1483, + "step": 8242 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015208147924596966, + "loss": 0.0617, + "step": 8243 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015205257162924065, + "loss": 0.1071, + "step": 8244 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015202366393626457, + "loss": 0.0829, + "step": 8245 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015199475616811534, + "loss": 0.0868, + "step": 8246 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001519658483258668, + "loss": 0.0819, + "step": 8247 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015193694041059274, + "loss": 0.0776, + "step": 8248 + }, + { + "epoch": 2.5, + "learning_rate": 0.00015190803242336702, + "loss": 0.0961, + "step": 8249 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001518791243652635, + "loss": 0.1136, + "step": 8250 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015185021623735604, + "loss": 0.078, + "step": 8251 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015182130804071843, + "loss": 0.1015, + "step": 8252 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001517923997764246, + "loss": 0.0905, + "step": 8253 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001517634914455483, + "loss": 0.1043, + "step": 8254 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015173458304916355, + "loss": 0.1632, + "step": 8255 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001517056745883441, + "loss": 0.0605, + "step": 8256 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015167676606416382, + "loss": 0.1307, + "step": 8257 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015164785747769663, + "loss": 0.094, + "step": 8258 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015161894883001636, + "loss": 0.2, + "step": 8259 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015159004012219688, + "loss": 0.0579, + "step": 8260 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015156113135531208, + "loss": 0.1467, + "step": 8261 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015153222253043582, + "loss": 0.0896, + "step": 8262 + }, + { + "epoch": 2.51, + "learning_rate": 0.000151503313648642, + "loss": 0.0873, + "step": 8263 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015147440471100445, + "loss": 0.0515, + "step": 8264 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001514454957185971, + "loss": 0.0122, + "step": 8265 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015141658667249384, + "loss": 0.144, + "step": 8266 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015138767757376853, + "loss": 0.0558, + "step": 8267 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001513587684234951, + "loss": 0.1172, + "step": 8268 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015132985922274738, + "loss": 0.0703, + "step": 8269 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001513009499725993, + "loss": 0.0765, + "step": 8270 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015127204067412472, + "loss": 0.0882, + "step": 8271 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015124313132839755, + "loss": 0.1008, + "step": 8272 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015121422193649172, + "loss": 0.1013, + "step": 8273 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015118531249948112, + "loss": 0.1157, + "step": 8274 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001511564030184396, + "loss": 0.062, + "step": 8275 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015112749349444114, + "loss": 0.1408, + "step": 8276 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001510985839285596, + "loss": 0.0495, + "step": 8277 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015106967432186886, + "loss": 0.0815, + "step": 8278 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015104076467544288, + "loss": 0.1041, + "step": 8279 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001510118549903555, + "loss": 0.1836, + "step": 8280 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015098294526768072, + "loss": 0.0927, + "step": 8281 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015095403550849238, + "loss": 0.025, + "step": 8282 + }, + { + "epoch": 2.51, + "learning_rate": 0.00015092512571386444, + "loss": 0.1914, + "step": 8283 + }, + { + "epoch": 2.52, + "learning_rate": 0.0001508962158848708, + "loss": 0.067, + "step": 8284 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015086730602258534, + "loss": 0.1424, + "step": 8285 + }, + { + "epoch": 2.52, + "learning_rate": 0.000150838396128082, + "loss": 0.0624, + "step": 8286 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015080948620243474, + "loss": 0.0942, + "step": 8287 + }, + { + "epoch": 2.52, + "learning_rate": 0.0001507805762467174, + "loss": 0.1383, + "step": 8288 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015075166626200398, + "loss": 0.1226, + "step": 8289 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015072275624936836, + "loss": 0.103, + "step": 8290 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015069384620988447, + "loss": 0.0967, + "step": 8291 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015066493614462618, + "loss": 0.1042, + "step": 8292 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015063602605466748, + "loss": 0.0567, + "step": 8293 + }, + { + "epoch": 2.52, + "learning_rate": 0.0001506071159410823, + "loss": 0.0922, + "step": 8294 + }, + { + "epoch": 2.52, + "learning_rate": 0.0001505782058049445, + "loss": 0.0463, + "step": 8295 + }, + { + "epoch": 2.52, + "learning_rate": 0.0001505492956473281, + "loss": 0.0777, + "step": 8296 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015052038546930696, + "loss": 0.1003, + "step": 8297 + }, + { + "epoch": 2.52, + "learning_rate": 0.000150491475271955, + "loss": 0.1719, + "step": 8298 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015046256505634622, + "loss": 0.1232, + "step": 8299 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015043365482355449, + "loss": 0.0561, + "step": 8300 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015040474457465373, + "loss": 0.1662, + "step": 8301 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015037583431071793, + "loss": 0.1231, + "step": 8302 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015034692403282098, + "loss": 0.1159, + "step": 8303 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015031801374203678, + "loss": 0.1568, + "step": 8304 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015028910343943932, + "loss": 0.1186, + "step": 8305 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015026019312610253, + "loss": 0.0078, + "step": 8306 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015023128280310035, + "loss": 0.07, + "step": 8307 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015020237247150665, + "loss": 0.0689, + "step": 8308 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015017346213239548, + "loss": 0.1269, + "step": 8309 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015014455178684062, + "loss": 0.1231, + "step": 8310 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015011564143591608, + "loss": 0.0534, + "step": 8311 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015008673108069583, + "loss": 0.1056, + "step": 8312 + }, + { + "epoch": 2.52, + "learning_rate": 0.0001500578207222538, + "loss": 0.1245, + "step": 8313 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015002891036166387, + "loss": 0.1042, + "step": 8314 + }, + { + "epoch": 2.52, + "learning_rate": 0.00015, + "loss": 0.1169, + "step": 8315 + }, + { + "epoch": 2.52, + "learning_rate": 0.0001499710896383361, + "loss": 0.0292, + "step": 8316 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014994217927774618, + "loss": 0.0679, + "step": 8317 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014991326891930417, + "loss": 0.0643, + "step": 8318 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001498843585640839, + "loss": 0.0341, + "step": 8319 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014985544821315938, + "loss": 0.1444, + "step": 8320 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014982653786760453, + "loss": 0.0651, + "step": 8321 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001497976275284933, + "loss": 0.0684, + "step": 8322 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014976871719689968, + "loss": 0.0601, + "step": 8323 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014973980687389745, + "loss": 0.045, + "step": 8324 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014971089656056066, + "loss": 0.081, + "step": 8325 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001496819862579632, + "loss": 0.1203, + "step": 8326 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014965307596717902, + "loss": 0.1309, + "step": 8327 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001496241656892821, + "loss": 0.0465, + "step": 8328 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014959525542534625, + "loss": 0.076, + "step": 8329 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014956634517644552, + "loss": 0.0957, + "step": 8330 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014953743494365375, + "loss": 0.0582, + "step": 8331 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014950852472804497, + "loss": 0.1264, + "step": 8332 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014947961453069307, + "loss": 0.084, + "step": 8333 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014945070435267192, + "loss": 0.0734, + "step": 8334 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014942179419505548, + "loss": 0.1489, + "step": 8335 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014939288405891766, + "loss": 0.0717, + "step": 8336 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014936397394533246, + "loss": 0.0703, + "step": 8337 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014933506385537385, + "loss": 0.1133, + "step": 8338 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014930615379011555, + "loss": 0.0953, + "step": 8339 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014927724375063165, + "loss": 0.088, + "step": 8340 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014924833373799602, + "loss": 0.0734, + "step": 8341 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014921942375328257, + "loss": 0.1421, + "step": 8342 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014919051379756526, + "loss": 0.0444, + "step": 8343 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014916160387191797, + "loss": 0.076, + "step": 8344 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014913269397741463, + "loss": 0.0793, + "step": 8345 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014910378411512918, + "loss": 0.1192, + "step": 8346 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014907487428613553, + "loss": 0.1226, + "step": 8347 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014904596449150754, + "loss": 0.1075, + "step": 8348 + }, + { + "epoch": 2.53, + "learning_rate": 0.00014901705473231928, + "loss": 0.134, + "step": 8349 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014898814500964447, + "loss": 0.1262, + "step": 8350 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014895923532455714, + "loss": 0.11, + "step": 8351 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014893032567813114, + "loss": 0.1284, + "step": 8352 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001489014160714404, + "loss": 0.0037, + "step": 8353 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014887250650555886, + "loss": 0.091, + "step": 8354 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014884359698156036, + "loss": 0.0528, + "step": 8355 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014881468750051885, + "loss": 0.0471, + "step": 8356 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014878577806350825, + "loss": 0.0944, + "step": 8357 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001487568686716024, + "loss": 0.0844, + "step": 8358 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014872795932587528, + "loss": 0.1344, + "step": 8359 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001486990500274007, + "loss": 0.1222, + "step": 8360 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014867014077725262, + "loss": 0.0751, + "step": 8361 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014864123157650488, + "loss": 0.0966, + "step": 8362 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001486123224262314, + "loss": 0.0696, + "step": 8363 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014858341332750616, + "loss": 0.0499, + "step": 8364 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001485545042814029, + "loss": 0.0237, + "step": 8365 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014852559528899555, + "loss": 0.0744, + "step": 8366 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014849668635135801, + "loss": 0.1715, + "step": 8367 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014846777746956415, + "loss": 0.0516, + "step": 8368 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014843886864468795, + "loss": 0.0918, + "step": 8369 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014840995987780312, + "loss": 0.1, + "step": 8370 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014838105116998364, + "loss": 0.1076, + "step": 8371 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014835214252230334, + "loss": 0.066, + "step": 8372 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014832323393583613, + "loss": 0.1649, + "step": 8373 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014829432541165591, + "loss": 0.1018, + "step": 8374 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014826541695083648, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014823650855445166, + "loss": 0.0614, + "step": 8376 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014820760022357538, + "loss": 0.0888, + "step": 8377 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014817869195928154, + "loss": 0.041, + "step": 8378 + }, + { + "epoch": 2.54, + "learning_rate": 0.000148149783762644, + "loss": 0.1663, + "step": 8379 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001481208756347365, + "loss": 0.1499, + "step": 8380 + }, + { + "epoch": 2.54, + "learning_rate": 0.00014809196757663298, + "loss": 0.0835, + "step": 8381 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014806305958940726, + "loss": 0.1499, + "step": 8382 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014803415167413317, + "loss": 0.0293, + "step": 8383 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014800524383188463, + "loss": 0.0978, + "step": 8384 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001479763360637354, + "loss": 0.0728, + "step": 8385 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014794742837075935, + "loss": 0.0799, + "step": 8386 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001479185207540303, + "loss": 0.0355, + "step": 8387 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014788961321462215, + "loss": 0.1045, + "step": 8388 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014786070575360872, + "loss": 0.1185, + "step": 8389 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001478317983720637, + "loss": 0.1017, + "step": 8390 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014780289107106106, + "loss": 0.0903, + "step": 8391 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014777398385167455, + "loss": 0.0807, + "step": 8392 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014774507671497803, + "loss": 0.1017, + "step": 8393 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001477161696620453, + "loss": 0.047, + "step": 8394 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014768726269395014, + "loss": 0.1294, + "step": 8395 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014765835581176642, + "loss": 0.0653, + "step": 8396 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001476294490165679, + "loss": 0.0872, + "step": 8397 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014760054230942839, + "loss": 0.1053, + "step": 8398 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014757163569142177, + "loss": 0.1168, + "step": 8399 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014754272916362164, + "loss": 0.0513, + "step": 8400 + }, + { + "epoch": 2.55, + "learning_rate": 0.000147513822727102, + "loss": 0.0893, + "step": 8401 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014748491638293652, + "loss": 0.0362, + "step": 8402 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014745601013219903, + "loss": 0.1532, + "step": 8403 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014742710397596336, + "loss": 0.0566, + "step": 8404 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014739819791530315, + "loss": 0.1383, + "step": 8405 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001473692919512923, + "loss": 0.1079, + "step": 8406 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014734038608500452, + "loss": 0.0434, + "step": 8407 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014731148031751359, + "loss": 0.0749, + "step": 8408 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001472825746498933, + "loss": 0.1084, + "step": 8409 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001472536690832174, + "loss": 0.1076, + "step": 8410 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014722476361855963, + "loss": 0.1428, + "step": 8411 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014719585825699374, + "loss": 0.1021, + "step": 8412 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001471669529995935, + "loss": 0.0936, + "step": 8413 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014713804784743264, + "loss": 0.1185, + "step": 8414 + }, + { + "epoch": 2.56, + "learning_rate": 0.000147109142801585, + "loss": 0.0665, + "step": 8415 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014708023786312412, + "loss": 0.0506, + "step": 8416 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014705133303312388, + "loss": 0.1065, + "step": 8417 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014702242831265795, + "loss": 0.0666, + "step": 8418 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014699352370280006, + "loss": 0.0958, + "step": 8419 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014696461920462401, + "loss": 0.1051, + "step": 8420 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014693571481920342, + "loss": 0.1399, + "step": 8421 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014690681054761205, + "loss": 0.0912, + "step": 8422 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001468779063909236, + "loss": 0.0376, + "step": 8423 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014684900235021172, + "loss": 0.1286, + "step": 8424 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014682009842655023, + "loss": 0.0433, + "step": 8425 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014679119462101273, + "loss": 0.0043, + "step": 8426 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014676229093467293, + "loss": 0.0917, + "step": 8427 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014673338736860453, + "loss": 0.103, + "step": 8428 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014670448392388122, + "loss": 0.1009, + "step": 8429 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014667558060157672, + "loss": 0.0689, + "step": 8430 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014664667740276458, + "loss": 0.0839, + "step": 8431 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014661777432851855, + "loss": 0.1157, + "step": 8432 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001465888713799123, + "loss": 0.1487, + "step": 8433 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014655996855801948, + "loss": 0.146, + "step": 8434 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014653106586391375, + "loss": 0.1695, + "step": 8435 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001465021632986687, + "loss": 0.1355, + "step": 8436 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014647326086335806, + "loss": 0.04, + "step": 8437 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014644435855905544, + "loss": 0.1231, + "step": 8438 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014641545638683447, + "loss": 0.0344, + "step": 8439 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014638655434776884, + "loss": 0.0378, + "step": 8440 + }, + { + "epoch": 2.56, + "learning_rate": 0.000146357652442932, + "loss": 0.0452, + "step": 8441 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014632875067339775, + "loss": 0.0396, + "step": 8442 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014629984904023963, + "loss": 0.0724, + "step": 8443 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014627094754453128, + "loss": 0.0923, + "step": 8444 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014624204618734636, + "loss": 0.0809, + "step": 8445 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014621314496975833, + "loss": 0.1297, + "step": 8446 + }, + { + "epoch": 2.56, + "learning_rate": 0.00014618424389284087, + "loss": 0.16, + "step": 8447 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014615534295766755, + "loss": 0.0914, + "step": 8448 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014612644216531196, + "loss": 0.07, + "step": 8449 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014609754151684775, + "loss": 0.0735, + "step": 8450 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014606864101334837, + "loss": 0.1784, + "step": 8451 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014603974065588744, + "loss": 0.0821, + "step": 8452 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014601084044553854, + "loss": 0.0655, + "step": 8453 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014598194038337522, + "loss": 0.0915, + "step": 8454 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014595304047047112, + "loss": 0.0638, + "step": 8455 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001459241407078996, + "loss": 0.0643, + "step": 8456 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014589524109673432, + "loss": 0.0579, + "step": 8457 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014586634163804877, + "loss": 0.1081, + "step": 8458 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014583744233291653, + "loss": 0.0967, + "step": 8459 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014580854318241114, + "loss": 0.0671, + "step": 8460 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014577964418760602, + "loss": 0.1986, + "step": 8461 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014575074534957478, + "loss": 0.1008, + "step": 8462 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014572184666939084, + "loss": 0.1066, + "step": 8463 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014569294814812777, + "loss": 0.0775, + "step": 8464 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014566404978685912, + "loss": 0.0706, + "step": 8465 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014563515158665824, + "loss": 0.0484, + "step": 8466 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014560625354859865, + "loss": 0.1111, + "step": 8467 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014557735567375388, + "loss": 0.0731, + "step": 8468 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001455484579631974, + "loss": 0.0715, + "step": 8469 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001455195604180027, + "loss": 0.0596, + "step": 8470 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001454906630392431, + "loss": 0.0823, + "step": 8471 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001454617658279922, + "loss": 0.0892, + "step": 8472 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014543286878532336, + "loss": 0.1339, + "step": 8473 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014540397191231005, + "loss": 0.0853, + "step": 8474 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001453750752100257, + "loss": 0.1211, + "step": 8475 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001453461786795438, + "loss": 0.0921, + "step": 8476 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014531728232193764, + "loss": 0.1584, + "step": 8477 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014528838613828076, + "loss": 0.1118, + "step": 8478 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014525949012964646, + "loss": 0.0533, + "step": 8479 + }, + { + "epoch": 2.57, + "learning_rate": 0.00014523059429710826, + "loss": 0.0913, + "step": 8480 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014520169864173953, + "loss": 0.1077, + "step": 8481 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014517280316461354, + "loss": 0.1001, + "step": 8482 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014514390786680376, + "loss": 0.0946, + "step": 8483 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001451150127493836, + "loss": 0.0766, + "step": 8484 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014508611781342639, + "loss": 0.0587, + "step": 8485 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014505722306000553, + "loss": 0.1209, + "step": 8486 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014502832849019428, + "loss": 0.1464, + "step": 8487 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014499943410506606, + "loss": 0.1262, + "step": 8488 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001449705399056942, + "loss": 0.0246, + "step": 8489 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014494164589315203, + "loss": 0.0943, + "step": 8490 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001449127520685129, + "loss": 0.0994, + "step": 8491 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001448838584328501, + "loss": 0.0372, + "step": 8492 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014485496498723696, + "loss": 0.164, + "step": 8493 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014482607173274675, + "loss": 0.1862, + "step": 8494 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001447971786704528, + "loss": 0.1686, + "step": 8495 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014476828580142854, + "loss": 0.0633, + "step": 8496 + }, + { + "epoch": 2.58, + "learning_rate": 0.000144739393126747, + "loss": 0.0825, + "step": 8497 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014471050064748158, + "loss": 0.0872, + "step": 8498 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014468160836470556, + "loss": 0.1393, + "step": 8499 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001446527162794922, + "loss": 0.0886, + "step": 8500 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014462382439291477, + "loss": 0.0663, + "step": 8501 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014459493270604646, + "loss": 0.032, + "step": 8502 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014456604121996055, + "loss": 0.1549, + "step": 8503 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001445371499357303, + "loss": 0.1069, + "step": 8504 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001445082588544289, + "loss": 0.1584, + "step": 8505 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001444793679771296, + "loss": 0.0809, + "step": 8506 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014445047730490555, + "loss": 0.0563, + "step": 8507 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014442158683883002, + "loss": 0.0591, + "step": 8508 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014439269657997615, + "loss": 0.049, + "step": 8509 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014436380652941717, + "loss": 0.0701, + "step": 8510 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014433491668822633, + "loss": 0.13, + "step": 8511 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014430602705747664, + "loss": 0.1144, + "step": 8512 + }, + { + "epoch": 2.58, + "learning_rate": 0.00014427713763824138, + "loss": 0.0553, + "step": 8513 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014424824843159363, + "loss": 0.0645, + "step": 8514 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001442193594386066, + "loss": 0.1488, + "step": 8515 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014419047066035345, + "loss": 0.0647, + "step": 8516 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014416158209790725, + "loss": 0.0839, + "step": 8517 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014413269375234116, + "loss": 0.1176, + "step": 8518 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014410380562472828, + "loss": 0.1066, + "step": 8519 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014407491771614172, + "loss": 0.0981, + "step": 8520 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014404603002765468, + "loss": 0.0762, + "step": 8521 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001440171425603401, + "loss": 0.0656, + "step": 8522 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001439882553152711, + "loss": 0.1321, + "step": 8523 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014395936829352077, + "loss": 0.2076, + "step": 8524 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014393048149616224, + "loss": 0.0576, + "step": 8525 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014390159492426858, + "loss": 0.1309, + "step": 8526 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014387270857891272, + "loss": 0.082, + "step": 8527 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014384382246116775, + "loss": 0.1294, + "step": 8528 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014381493657210674, + "loss": 0.0594, + "step": 8529 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001437860509128027, + "loss": 0.0862, + "step": 8530 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014375716548432869, + "loss": 0.08, + "step": 8531 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001437282802877576, + "loss": 0.0856, + "step": 8532 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014369939532416255, + "loss": 0.069, + "step": 8533 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014367051059461649, + "loss": 0.1308, + "step": 8534 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014364162610019238, + "loss": 0.1105, + "step": 8535 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001436127418419632, + "loss": 0.1186, + "step": 8536 + }, + { + "epoch": 2.59, + "learning_rate": 0.000143583857821002, + "loss": 0.0499, + "step": 8537 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014355497403838164, + "loss": 0.169, + "step": 8538 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014352609049517507, + "loss": 0.1864, + "step": 8539 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014349720719245525, + "loss": 0.0678, + "step": 8540 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001434683241312951, + "loss": 0.1101, + "step": 8541 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001434394413127676, + "loss": 0.0673, + "step": 8542 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001434105587379456, + "loss": 0.0972, + "step": 8543 + }, + { + "epoch": 2.59, + "learning_rate": 0.000143381676407902, + "loss": 0.0439, + "step": 8544 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014335279432370972, + "loss": 0.0896, + "step": 8545 + }, + { + "epoch": 2.59, + "learning_rate": 0.00014332391248644163, + "loss": 0.0652, + "step": 8546 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014329503089717066, + "loss": 0.0556, + "step": 8547 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014326614955696959, + "loss": 0.0956, + "step": 8548 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014323726846691132, + "loss": 0.0508, + "step": 8549 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014320838762806867, + "loss": 0.0917, + "step": 8550 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014317950704151455, + "loss": 0.0706, + "step": 8551 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014315062670832178, + "loss": 0.093, + "step": 8552 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014312174662956307, + "loss": 0.1491, + "step": 8553 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014309286680631132, + "loss": 0.081, + "step": 8554 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014306398723963932, + "loss": 0.1237, + "step": 8555 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014303510793061983, + "loss": 0.2036, + "step": 8556 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014300622888032568, + "loss": 0.0852, + "step": 8557 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014297735008982963, + "loss": 0.1092, + "step": 8558 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001429484715602044, + "loss": 0.1039, + "step": 8559 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014291959329252278, + "loss": 0.0687, + "step": 8560 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001428907152878575, + "loss": 0.0595, + "step": 8561 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014286183754728135, + "loss": 0.1074, + "step": 8562 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014283296007186697, + "loss": 0.0807, + "step": 8563 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001428040828626871, + "loss": 0.0828, + "step": 8564 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001427752059208144, + "loss": 0.0596, + "step": 8565 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014274632924732167, + "loss": 0.1098, + "step": 8566 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014271745284328159, + "loss": 0.1622, + "step": 8567 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001426885767097667, + "loss": 0.0832, + "step": 8568 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014265970084784976, + "loss": 0.0907, + "step": 8569 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001426308252586034, + "loss": 0.0762, + "step": 8570 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001426019499431003, + "loss": 0.0798, + "step": 8571 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014257307490241305, + "loss": 0.1212, + "step": 8572 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001425442001376143, + "loss": 0.1501, + "step": 8573 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014251532564977664, + "loss": 0.076, + "step": 8574 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014248645143997266, + "loss": 0.1296, + "step": 8575 + }, + { + "epoch": 2.6, + "learning_rate": 0.000142457577509275, + "loss": 0.0518, + "step": 8576 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014242870385875627, + "loss": 0.0848, + "step": 8577 + }, + { + "epoch": 2.6, + "learning_rate": 0.00014239983048948892, + "loss": 0.1032, + "step": 8578 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001423709574025456, + "loss": 0.0785, + "step": 8579 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001423420845989988, + "loss": 0.0567, + "step": 8580 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014231321207992115, + "loss": 0.0708, + "step": 8581 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014228433984638515, + "loss": 0.0909, + "step": 8582 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014225546789946325, + "loss": 0.0762, + "step": 8583 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014222659624022802, + "loss": 0.1056, + "step": 8584 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014219772486975194, + "loss": 0.0766, + "step": 8585 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001421688537891075, + "loss": 0.0951, + "step": 8586 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001421399829993672, + "loss": 0.1261, + "step": 8587 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014211111250160346, + "loss": 0.1123, + "step": 8588 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014208224229688877, + "loss": 0.2047, + "step": 8589 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014205337238629552, + "loss": 0.0632, + "step": 8590 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001420245027708962, + "loss": 0.0927, + "step": 8591 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001419956334517633, + "loss": 0.0762, + "step": 8592 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014196676442996907, + "loss": 0.0512, + "step": 8593 + }, + { + "epoch": 2.61, + "learning_rate": 0.000141937895706586, + "loss": 0.1113, + "step": 8594 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014190902728268643, + "loss": 0.1743, + "step": 8595 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014188015915934279, + "loss": 0.1089, + "step": 8596 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014185129133762744, + "loss": 0.0997, + "step": 8597 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014182242381861272, + "loss": 0.0873, + "step": 8598 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014179355660337095, + "loss": 0.1237, + "step": 8599 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014176468969297448, + "loss": 0.069, + "step": 8600 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014173582308849565, + "loss": 0.0519, + "step": 8601 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001417069567910068, + "loss": 0.1292, + "step": 8602 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014167809080158018, + "loss": 0.107, + "step": 8603 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014164922512128805, + "loss": 0.1949, + "step": 8604 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014162035975120272, + "loss": 0.049, + "step": 8605 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014159149469239641, + "loss": 0.063, + "step": 8606 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014156262994594146, + "loss": 0.0349, + "step": 8607 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014153376551291012, + "loss": 0.0513, + "step": 8608 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014150490139437449, + "loss": 0.0346, + "step": 8609 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001414760375914069, + "loss": 0.0623, + "step": 8610 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014144717410507948, + "loss": 0.1023, + "step": 8611 + }, + { + "epoch": 2.61, + "learning_rate": 0.00014141831093646446, + "loss": 0.1036, + "step": 8612 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014138944808663408, + "loss": 0.0996, + "step": 8613 + }, + { + "epoch": 2.62, + "learning_rate": 0.0001413605855566604, + "loss": 0.0992, + "step": 8614 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014133172334761563, + "loss": 0.1273, + "step": 8615 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014130286146057193, + "loss": 0.0806, + "step": 8616 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014127399989660142, + "loss": 0.1346, + "step": 8617 + }, + { + "epoch": 2.62, + "learning_rate": 0.0001412451386567763, + "loss": 0.0818, + "step": 8618 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014121627774216854, + "loss": 0.1061, + "step": 8619 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014118741715385028, + "loss": 0.1236, + "step": 8620 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014115855689289365, + "loss": 0.1001, + "step": 8621 + }, + { + "epoch": 2.62, + "learning_rate": 0.0001411296969603707, + "loss": 0.0864, + "step": 8622 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014110083735735353, + "loss": 0.0698, + "step": 8623 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014107197808491413, + "loss": 0.1336, + "step": 8624 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014104311914412454, + "loss": 0.0596, + "step": 8625 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014101426053605683, + "loss": 0.1239, + "step": 8626 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014098540226178296, + "loss": 0.144, + "step": 8627 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014095654432237507, + "loss": 0.1035, + "step": 8628 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014092768671890491, + "loss": 0.095, + "step": 8629 + }, + { + "epoch": 2.62, + "learning_rate": 0.0001408988294524446, + "loss": 0.1145, + "step": 8630 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014086997252406614, + "loss": 0.0512, + "step": 8631 + }, + { + "epoch": 2.62, + "learning_rate": 0.0001408411159348414, + "loss": 0.1291, + "step": 8632 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014081225968584242, + "loss": 0.0475, + "step": 8633 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014078340377814095, + "loss": 0.0866, + "step": 8634 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014075454821280904, + "loss": 0.1023, + "step": 8635 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014072569299091852, + "loss": 0.086, + "step": 8636 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014069683811354135, + "loss": 0.086, + "step": 8637 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014066798358174936, + "loss": 0.0541, + "step": 8638 + }, + { + "epoch": 2.62, + "learning_rate": 0.0001406391293966144, + "loss": 0.022, + "step": 8639 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014061027555920832, + "loss": 0.0607, + "step": 8640 + }, + { + "epoch": 2.62, + "learning_rate": 0.000140581422070603, + "loss": 0.108, + "step": 8641 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014055256893187018, + "loss": 0.102, + "step": 8642 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014052371614408182, + "loss": 0.1385, + "step": 8643 + }, + { + "epoch": 2.62, + "learning_rate": 0.00014049486370830953, + "loss": 0.0885, + "step": 8644 + }, + { + "epoch": 2.62, + "learning_rate": 0.0001404660116256252, + "loss": 0.1129, + "step": 8645 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014043715989710056, + "loss": 0.1318, + "step": 8646 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014040830852380737, + "loss": 0.0795, + "step": 8647 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014037945750681747, + "loss": 0.0575, + "step": 8648 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014035060684720248, + "loss": 0.0949, + "step": 8649 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014032175654603413, + "loss": 0.0662, + "step": 8650 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014029290660438413, + "loss": 0.1075, + "step": 8651 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001402640570233242, + "loss": 0.0676, + "step": 8652 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014023520780392603, + "loss": 0.0866, + "step": 8653 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014020635894726122, + "loss": 0.2019, + "step": 8654 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014017751045440144, + "loss": 0.1103, + "step": 8655 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014014866232641836, + "loss": 0.1983, + "step": 8656 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001401198145643836, + "loss": 0.1379, + "step": 8657 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001400909671693688, + "loss": 0.0583, + "step": 8658 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014006212014244546, + "loss": 0.0824, + "step": 8659 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001400332734846852, + "loss": 0.0829, + "step": 8660 + }, + { + "epoch": 2.63, + "learning_rate": 0.00014000442719715962, + "loss": 0.0623, + "step": 8661 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013997558128094024, + "loss": 0.0503, + "step": 8662 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013994673573709864, + "loss": 0.1187, + "step": 8663 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013991789056670633, + "loss": 0.0511, + "step": 8664 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001398890457708348, + "loss": 0.0347, + "step": 8665 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001398602013505556, + "loss": 0.1036, + "step": 8666 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013983135730694012, + "loss": 0.068, + "step": 8667 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013980251364105996, + "loss": 0.034, + "step": 8668 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013977367035398653, + "loss": 0.0915, + "step": 8669 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001397448274467912, + "loss": 0.0584, + "step": 8670 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013971598492054548, + "loss": 0.0877, + "step": 8671 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013968714277632076, + "loss": 0.0884, + "step": 8672 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001396583010151884, + "loss": 0.0954, + "step": 8673 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013962945963821996, + "loss": 0.1172, + "step": 8674 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001396006186464866, + "loss": 0.1066, + "step": 8675 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013957177804105976, + "loss": 0.0629, + "step": 8676 + }, + { + "epoch": 2.63, + "learning_rate": 0.00013954293782301078, + "loss": 0.1134, + "step": 8677 + }, + { + "epoch": 2.63, + "learning_rate": 0.000139514097993411, + "loss": 0.1081, + "step": 8678 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013948525855333175, + "loss": 0.1106, + "step": 8679 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013945641950384427, + "loss": 0.121, + "step": 8680 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013942758084601993, + "loss": 0.0829, + "step": 8681 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001393987425809299, + "loss": 0.1077, + "step": 8682 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013936990470964552, + "loss": 0.1235, + "step": 8683 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001393410672332381, + "loss": 0.1268, + "step": 8684 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013931223015277867, + "loss": 0.1155, + "step": 8685 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013928339346933857, + "loss": 0.0868, + "step": 8686 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013925455718398898, + "loss": 0.0553, + "step": 8687 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001392257212978011, + "loss": 0.0518, + "step": 8688 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013919688581184606, + "loss": 0.1158, + "step": 8689 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013916805072719502, + "loss": 0.0812, + "step": 8690 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013913921604491913, + "loss": 0.1643, + "step": 8691 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013911038176608954, + "loss": 0.1091, + "step": 8692 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001390815478917773, + "loss": 0.1511, + "step": 8693 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001390527144230536, + "loss": 0.0913, + "step": 8694 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001390238813609894, + "loss": 0.1316, + "step": 8695 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001389950487066558, + "loss": 0.1319, + "step": 8696 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013896621646112392, + "loss": 0.0922, + "step": 8697 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013893738462546473, + "loss": 0.0519, + "step": 8698 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001389085532007493, + "loss": 0.0569, + "step": 8699 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013887972218804855, + "loss": 0.0515, + "step": 8700 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001388508915884335, + "loss": 0.1241, + "step": 8701 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013882206140297513, + "loss": 0.1432, + "step": 8702 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001387932316327444, + "loss": 0.1665, + "step": 8703 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001387644022788123, + "loss": 0.0456, + "step": 8704 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013873557334224965, + "loss": 0.0846, + "step": 8705 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013870674482412745, + "loss": 0.1107, + "step": 8706 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013867791672551654, + "loss": 0.1202, + "step": 8707 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013864908904748783, + "loss": 0.0734, + "step": 8708 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013862026179111222, + "loss": 0.0811, + "step": 8709 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013859143495746046, + "loss": 0.0966, + "step": 8710 + }, + { + "epoch": 2.64, + "learning_rate": 0.00013856260854760343, + "loss": 0.0773, + "step": 8711 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013853378256261194, + "loss": 0.072, + "step": 8712 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013850495700355685, + "loss": 0.1112, + "step": 8713 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013847613187150895, + "loss": 0.0509, + "step": 8714 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013844730716753887, + "loss": 0.0718, + "step": 8715 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013841848289271747, + "loss": 0.0266, + "step": 8716 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013838965904811547, + "loss": 0.1069, + "step": 8717 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013836083563480356, + "loss": 0.069, + "step": 8718 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013833201265385254, + "loss": 0.094, + "step": 8719 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013830319010633299, + "loss": 0.1732, + "step": 8720 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013827436799331561, + "loss": 0.0639, + "step": 8721 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001382455463158711, + "loss": 0.0447, + "step": 8722 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001382167250750701, + "loss": 0.0903, + "step": 8723 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013818790427198315, + "loss": 0.0238, + "step": 8724 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013815908390768102, + "loss": 0.1188, + "step": 8725 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001381302639832341, + "loss": 0.0436, + "step": 8726 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001381014444997131, + "loss": 0.0947, + "step": 8727 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001380726254581885, + "loss": 0.0659, + "step": 8728 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013804380685973096, + "loss": 0.1221, + "step": 8729 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013801498870541092, + "loss": 0.1542, + "step": 8730 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013798617099629887, + "loss": 0.1684, + "step": 8731 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013795735373346534, + "loss": 0.1558, + "step": 8732 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001379285369179808, + "loss": 0.1198, + "step": 8733 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013789972055091573, + "loss": 0.0298, + "step": 8734 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001378709046333406, + "loss": 0.0411, + "step": 8735 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013784208916632574, + "loss": 0.0441, + "step": 8736 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013781327415094162, + "loss": 0.1121, + "step": 8737 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013778445958825863, + "loss": 0.0497, + "step": 8738 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013775564547934715, + "loss": 0.1522, + "step": 8739 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013772683182527757, + "loss": 0.1005, + "step": 8740 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013769801862712017, + "loss": 0.0981, + "step": 8741 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001376692058859453, + "loss": 0.0442, + "step": 8742 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013764039360282323, + "loss": 0.1026, + "step": 8743 + }, + { + "epoch": 2.65, + "learning_rate": 0.00013761158177882433, + "loss": 0.1086, + "step": 8744 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013758277041501886, + "loss": 0.0983, + "step": 8745 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013755395951247702, + "loss": 0.0753, + "step": 8746 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001375251490722691, + "loss": 0.1317, + "step": 8747 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001374963390954653, + "loss": 0.1709, + "step": 8748 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013746752958313584, + "loss": 0.1247, + "step": 8749 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013743872053635097, + "loss": 0.0232, + "step": 8750 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013740991195618072, + "loss": 0.0303, + "step": 8751 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013738110384369528, + "loss": 0.0863, + "step": 8752 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013735229619996487, + "loss": 0.0697, + "step": 8753 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001373234890260596, + "loss": 0.115, + "step": 8754 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001372946823230496, + "loss": 0.1255, + "step": 8755 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001372658760920048, + "loss": 0.1008, + "step": 8756 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013723707033399534, + "loss": 0.0786, + "step": 8757 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013720826505009132, + "loss": 0.0701, + "step": 8758 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013717946024136276, + "loss": 0.0975, + "step": 8759 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013715065590887965, + "loss": 0.1668, + "step": 8760 + }, + { + "epoch": 2.66, + "learning_rate": 0.000137121852053712, + "loss": 0.0978, + "step": 8761 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013709304867692975, + "loss": 0.1195, + "step": 8762 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001370642457796029, + "loss": 0.0579, + "step": 8763 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001370354433628014, + "loss": 0.0911, + "step": 8764 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001370066414275952, + "loss": 0.2031, + "step": 8765 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013697783997505413, + "loss": 0.0895, + "step": 8766 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013694903900624808, + "loss": 0.1192, + "step": 8767 + }, + { + "epoch": 2.66, + "learning_rate": 0.000136920238522247, + "loss": 0.118, + "step": 8768 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013689143852412068, + "loss": 0.1029, + "step": 8769 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013686263901293902, + "loss": 0.1951, + "step": 8770 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013683383998977175, + "loss": 0.1271, + "step": 8771 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013680504145568872, + "loss": 0.0278, + "step": 8772 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001367762434117597, + "loss": 0.0466, + "step": 8773 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013674744585905448, + "loss": 0.0413, + "step": 8774 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013671864879864282, + "loss": 0.0744, + "step": 8775 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013668985223159436, + "loss": 0.1059, + "step": 8776 + }, + { + "epoch": 2.66, + "learning_rate": 0.00013666105615897886, + "loss": 0.0732, + "step": 8777 + }, + { + "epoch": 2.67, + "learning_rate": 0.000136632260581866, + "loss": 0.0728, + "step": 8778 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013660346550132548, + "loss": 0.139, + "step": 8779 + }, + { + "epoch": 2.67, + "learning_rate": 0.000136574670918427, + "loss": 0.1041, + "step": 8780 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013654587683424002, + "loss": 0.1379, + "step": 8781 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001365170832498343, + "loss": 0.0458, + "step": 8782 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001364882901662794, + "loss": 0.1695, + "step": 8783 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013645949758464486, + "loss": 0.072, + "step": 8784 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013643070550600035, + "loss": 0.1458, + "step": 8785 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013640191393141527, + "loss": 0.1615, + "step": 8786 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013637312286195926, + "loss": 0.1142, + "step": 8787 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013634433229870173, + "loss": 0.1466, + "step": 8788 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013631554224271224, + "loss": 0.086, + "step": 8789 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001362867526950602, + "loss": 0.0732, + "step": 8790 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013625796365681516, + "loss": 0.1235, + "step": 8791 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001362291751290464, + "loss": 0.0694, + "step": 8792 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013620038711282338, + "loss": 0.0416, + "step": 8793 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013617159960921556, + "loss": 0.0426, + "step": 8794 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013614281261929224, + "loss": 0.0646, + "step": 8795 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013611402614412287, + "loss": 0.1241, + "step": 8796 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013608524018477662, + "loss": 0.0734, + "step": 8797 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013605645474232294, + "loss": 0.0878, + "step": 8798 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013602766981783102, + "loss": 0.0834, + "step": 8799 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013599888541237023, + "loss": 0.1011, + "step": 8800 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013597010152700982, + "loss": 0.1422, + "step": 8801 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013594131816281894, + "loss": 0.1338, + "step": 8802 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001359125353208669, + "loss": 0.1294, + "step": 8803 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013588375300222283, + "loss": 0.0965, + "step": 8804 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013585497120795594, + "loss": 0.1193, + "step": 8805 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001358261899391355, + "loss": 0.0786, + "step": 8806 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013579740919683042, + "loss": 0.159, + "step": 8807 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013576862898210994, + "loss": 0.1764, + "step": 8808 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001357398492960432, + "loss": 0.0695, + "step": 8809 + }, + { + "epoch": 2.67, + "learning_rate": 0.00013571107013969923, + "loss": 0.0982, + "step": 8810 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001356822915141471, + "loss": 0.0991, + "step": 8811 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013565351342045587, + "loss": 0.1356, + "step": 8812 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001356247358596945, + "loss": 0.1954, + "step": 8813 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013559595883293207, + "loss": 0.0503, + "step": 8814 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013556718234123753, + "loss": 0.0918, + "step": 8815 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013553840638567987, + "loss": 0.0628, + "step": 8816 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013550963096732797, + "loss": 0.0855, + "step": 8817 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013548085608725078, + "loss": 0.1181, + "step": 8818 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001354520817465172, + "loss": 0.1648, + "step": 8819 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013542330794619615, + "loss": 0.1647, + "step": 8820 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001353945346873565, + "loss": 0.1322, + "step": 8821 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013536576197106703, + "loss": 0.1305, + "step": 8822 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013533698979839654, + "loss": 0.1374, + "step": 8823 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013530821817041392, + "loss": 0.1566, + "step": 8824 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001352794470881879, + "loss": 0.0641, + "step": 8825 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013525067655278727, + "loss": 0.0996, + "step": 8826 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013522190656528073, + "loss": 0.1822, + "step": 8827 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013519313712673703, + "loss": 0.0881, + "step": 8828 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013516436823822486, + "loss": 0.1121, + "step": 8829 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013513559990081293, + "loss": 0.0574, + "step": 8830 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001351068321155699, + "loss": 0.0556, + "step": 8831 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013507806488356435, + "loss": 0.1771, + "step": 8832 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001350492982058649, + "loss": 0.093, + "step": 8833 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001350205320835402, + "loss": 0.0983, + "step": 8834 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013499176651765875, + "loss": 0.1663, + "step": 8835 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001349630015092893, + "loss": 0.1109, + "step": 8836 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013493423705950014, + "loss": 0.1119, + "step": 8837 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013490547316935995, + "loss": 0.0669, + "step": 8838 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013487670983993713, + "loss": 0.2115, + "step": 8839 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001348479470723002, + "loss": 0.0216, + "step": 8840 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013481918486751761, + "loss": 0.046, + "step": 8841 + }, + { + "epoch": 2.68, + "learning_rate": 0.00013479042322665777, + "loss": 0.0314, + "step": 8842 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001347616621507891, + "loss": 0.0871, + "step": 8843 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013473290164098001, + "loss": 0.1, + "step": 8844 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013470414169829885, + "loss": 0.0229, + "step": 8845 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013467538232381403, + "loss": 0.1427, + "step": 8846 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013464662351859376, + "loss": 0.0487, + "step": 8847 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001346178652837064, + "loss": 0.1492, + "step": 8848 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013458910762022023, + "loss": 0.1184, + "step": 8849 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013456035052920352, + "loss": 0.1045, + "step": 8850 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001345315940117245, + "loss": 0.1458, + "step": 8851 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013450283806885145, + "loss": 0.1291, + "step": 8852 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001344740827016525, + "loss": 0.1213, + "step": 8853 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013444532791119585, + "loss": 0.1646, + "step": 8854 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013441657369854963, + "loss": 0.0959, + "step": 8855 + }, + { + "epoch": 2.69, + "learning_rate": 0.000134387820064782, + "loss": 0.1112, + "step": 8856 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013435906701096112, + "loss": 0.0133, + "step": 8857 + }, + { + "epoch": 2.69, + "learning_rate": 0.000134330314538155, + "loss": 0.1113, + "step": 8858 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013430156264743174, + "loss": 0.0676, + "step": 8859 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001342728113398594, + "loss": 0.0277, + "step": 8860 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013424406061650598, + "loss": 0.0512, + "step": 8861 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001342153104784396, + "loss": 0.0585, + "step": 8862 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013418656092672806, + "loss": 0.0453, + "step": 8863 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001341578119624394, + "loss": 0.0569, + "step": 8864 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013412906358664157, + "loss": 0.0841, + "step": 8865 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013410031580040252, + "loss": 0.0676, + "step": 8866 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001340715686047901, + "loss": 0.0834, + "step": 8867 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001340428220008722, + "loss": 0.0928, + "step": 8868 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013401407598971667, + "loss": 0.0867, + "step": 8869 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001339853305723913, + "loss": 0.0731, + "step": 8870 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013395658574996396, + "loss": 0.0975, + "step": 8871 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001339278415235025, + "loss": 0.128, + "step": 8872 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001338990978940745, + "loss": 0.0154, + "step": 8873 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001338703548627478, + "loss": 0.0936, + "step": 8874 + }, + { + "epoch": 2.69, + "learning_rate": 0.00013384161243059012, + "loss": 0.1169, + "step": 8875 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013381287059866914, + "loss": 0.0371, + "step": 8876 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001337841293680526, + "loss": 0.0755, + "step": 8877 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013375538873980808, + "loss": 0.1221, + "step": 8878 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013372664871500324, + "loss": 0.1613, + "step": 8879 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013369790929470567, + "loss": 0.0546, + "step": 8880 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013366917047998294, + "loss": 0.1254, + "step": 8881 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013364043227190268, + "loss": 0.0752, + "step": 8882 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013361169467153235, + "loss": 0.048, + "step": 8883 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013358295767993954, + "loss": 0.0639, + "step": 8884 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013355422129819166, + "loss": 0.076, + "step": 8885 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013352548552735625, + "loss": 0.0563, + "step": 8886 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013349675036850083, + "loss": 0.1722, + "step": 8887 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013346801582269263, + "loss": 0.1626, + "step": 8888 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001334392818909992, + "loss": 0.1145, + "step": 8889 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013341054857448788, + "loss": 0.0842, + "step": 8890 + }, + { + "epoch": 2.7, + "learning_rate": 0.000133381815874226, + "loss": 0.0435, + "step": 8891 + }, + { + "epoch": 2.7, + "learning_rate": 0.000133353083791281, + "loss": 0.0782, + "step": 8892 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013332435232672006, + "loss": 0.1359, + "step": 8893 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013329562148161055, + "loss": 0.1474, + "step": 8894 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001332668912570197, + "loss": 0.1108, + "step": 8895 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013323816165401476, + "loss": 0.0358, + "step": 8896 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013320943267366306, + "loss": 0.1197, + "step": 8897 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013318070431703159, + "loss": 0.0523, + "step": 8898 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013315197658518766, + "loss": 0.0862, + "step": 8899 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001331232494791984, + "loss": 0.0917, + "step": 8900 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001330945230001309, + "loss": 0.0729, + "step": 8901 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001330657971490524, + "loss": 0.1205, + "step": 8902 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013303707192702978, + "loss": 0.0357, + "step": 8903 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013300834733513022, + "loss": 0.1376, + "step": 8904 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013297962337442072, + "loss": 0.081, + "step": 8905 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001329509000459683, + "loss": 0.0917, + "step": 8906 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013292217735083999, + "loss": 0.1373, + "step": 8907 + }, + { + "epoch": 2.7, + "learning_rate": 0.00013289345529010263, + "loss": 0.1349, + "step": 8908 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001328647338648233, + "loss": 0.1087, + "step": 8909 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013283601307606882, + "loss": 0.1133, + "step": 8910 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013280729292490615, + "loss": 0.084, + "step": 8911 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013277857341240212, + "loss": 0.0928, + "step": 8912 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013274985453962367, + "loss": 0.0696, + "step": 8913 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013272113630763744, + "loss": 0.1525, + "step": 8914 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013269241871751035, + "loss": 0.0225, + "step": 8915 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013266370177030914, + "loss": 0.0264, + "step": 8916 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013263498546710055, + "loss": 0.0839, + "step": 8917 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001326062698089514, + "loss": 0.099, + "step": 8918 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013257755479692828, + "loss": 0.0428, + "step": 8919 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013254884043209788, + "loss": 0.1288, + "step": 8920 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001325201267155269, + "loss": 0.1366, + "step": 8921 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013249141364828196, + "loss": 0.0823, + "step": 8922 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001324627012314297, + "loss": 0.2425, + "step": 8923 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001324339894660366, + "loss": 0.0533, + "step": 8924 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013240527835316928, + "loss": 0.118, + "step": 8925 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013237656789389428, + "loss": 0.1303, + "step": 8926 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013234785808927813, + "loss": 0.0483, + "step": 8927 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013231914894038733, + "loss": 0.1315, + "step": 8928 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013229044044828824, + "loss": 0.1284, + "step": 8929 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013226173261404734, + "loss": 0.1373, + "step": 8930 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013223302543873108, + "loss": 0.0269, + "step": 8931 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001322043189234058, + "loss": 0.1052, + "step": 8932 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013217561306913794, + "loss": 0.124, + "step": 8933 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013214690787699372, + "loss": 0.0383, + "step": 8934 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013211820334803955, + "loss": 0.1445, + "step": 8935 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013208949948334168, + "loss": 0.118, + "step": 8936 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013206079628396637, + "loss": 0.1363, + "step": 8937 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013203209375097993, + "loss": 0.1442, + "step": 8938 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001320033918854485, + "loss": 0.1118, + "step": 8939 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001319746906884382, + "loss": 0.0137, + "step": 8940 + }, + { + "epoch": 2.71, + "learning_rate": 0.00013194599016101534, + "loss": 0.1521, + "step": 8941 + }, + { + "epoch": 2.72, + "learning_rate": 0.000131917290304246, + "loss": 0.1302, + "step": 8942 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013188859111919635, + "loss": 0.0185, + "step": 8943 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001318598926069324, + "loss": 0.1001, + "step": 8944 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013183119476852022, + "loss": 0.1153, + "step": 8945 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013180249760502588, + "loss": 0.1083, + "step": 8946 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013177380111751538, + "loss": 0.1666, + "step": 8947 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013174510530705475, + "loss": 0.0459, + "step": 8948 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001317164101747099, + "loss": 0.1576, + "step": 8949 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001316877157215468, + "loss": 0.0802, + "step": 8950 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013165902194863137, + "loss": 0.0622, + "step": 8951 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013163032885702948, + "loss": 0.1359, + "step": 8952 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013160163644780707, + "loss": 0.0752, + "step": 8953 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013157294472202985, + "loss": 0.0786, + "step": 8954 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001315442536807637, + "loss": 0.0434, + "step": 8955 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013151556332507438, + "loss": 0.0863, + "step": 8956 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001314868736560277, + "loss": 0.0365, + "step": 8957 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001314581846746894, + "loss": 0.1045, + "step": 8958 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013142949638212512, + "loss": 0.1128, + "step": 8959 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013140080877940063, + "loss": 0.0513, + "step": 8960 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013137212186758153, + "loss": 0.1034, + "step": 8961 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013134343564773346, + "loss": 0.0544, + "step": 8962 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013131475012092208, + "loss": 0.0991, + "step": 8963 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013128606528821292, + "loss": 0.0899, + "step": 8964 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013125738115067157, + "loss": 0.0622, + "step": 8965 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013122869770936353, + "loss": 0.1145, + "step": 8966 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013120001496535433, + "loss": 0.0868, + "step": 8967 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001311713329197095, + "loss": 0.0962, + "step": 8968 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013114265157349438, + "loss": 0.1802, + "step": 8969 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013111397092777446, + "loss": 0.0221, + "step": 8970 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013108529098361516, + "loss": 0.1131, + "step": 8971 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001310566117420818, + "loss": 0.0279, + "step": 8972 + }, + { + "epoch": 2.72, + "learning_rate": 0.00013102793320423977, + "loss": 0.1214, + "step": 8973 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001309992553711544, + "loss": 0.0669, + "step": 8974 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013097057824389095, + "loss": 0.0854, + "step": 8975 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013094190182351472, + "loss": 0.0288, + "step": 8976 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013091322611109098, + "loss": 0.1353, + "step": 8977 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013088455110768488, + "loss": 0.1041, + "step": 8978 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001308558768143617, + "loss": 0.0686, + "step": 8979 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013082720323218653, + "loss": 0.166, + "step": 8980 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001307985303622245, + "loss": 0.1382, + "step": 8981 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001307698582055408, + "loss": 0.1302, + "step": 8982 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013074118676320045, + "loss": 0.0982, + "step": 8983 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001307125160362686, + "loss": 0.1404, + "step": 8984 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013068384602581018, + "loss": 0.0557, + "step": 8985 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001306551767328902, + "loss": 0.2005, + "step": 8986 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001306265081585737, + "loss": 0.0987, + "step": 8987 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013059784030392563, + "loss": 0.1235, + "step": 8988 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013056917317001089, + "loss": 0.0538, + "step": 8989 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001305405067578944, + "loss": 0.0283, + "step": 8990 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013051184106864096, + "loss": 0.1161, + "step": 8991 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001304831761033155, + "loss": 0.1387, + "step": 8992 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013045451186298283, + "loss": 0.0916, + "step": 8993 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013042584834870778, + "loss": 0.122, + "step": 8994 + }, + { + "epoch": 2.73, + "learning_rate": 0.000130397185561555, + "loss": 0.1096, + "step": 8995 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001303685235025893, + "loss": 0.0695, + "step": 8996 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013033986217287537, + "loss": 0.1228, + "step": 8997 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013031120157347791, + "loss": 0.0611, + "step": 8998 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001302825417054616, + "loss": 0.0883, + "step": 8999 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013025388256989102, + "loss": 0.1008, + "step": 9000 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001302252241678308, + "loss": 0.0761, + "step": 9001 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001301965665003455, + "loss": 0.0431, + "step": 9002 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013016790956849972, + "loss": 0.0685, + "step": 9003 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013013925337335795, + "loss": 0.0458, + "step": 9004 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013011059791598464, + "loss": 0.1462, + "step": 9005 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001300819431974443, + "loss": 0.1006, + "step": 9006 + }, + { + "epoch": 2.73, + "learning_rate": 0.00013005328921880134, + "loss": 0.0842, + "step": 9007 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001300246359811202, + "loss": 0.1054, + "step": 9008 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012999598348546534, + "loss": 0.0933, + "step": 9009 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012996733173290095, + "loss": 0.0643, + "step": 9010 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012993868072449147, + "loss": 0.0596, + "step": 9011 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012991003046130113, + "loss": 0.0992, + "step": 9012 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001298813809443943, + "loss": 0.1502, + "step": 9013 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012985273217483513, + "loss": 0.0618, + "step": 9014 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012982408415368788, + "loss": 0.1753, + "step": 9015 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012979543688201674, + "loss": 0.0529, + "step": 9016 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001297667903608859, + "loss": 0.0938, + "step": 9017 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001297381445913594, + "loss": 0.1322, + "step": 9018 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012970949957450152, + "loss": 0.0705, + "step": 9019 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012968085531137616, + "loss": 0.0829, + "step": 9020 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012965221180304744, + "loss": 0.1663, + "step": 9021 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012962356905057935, + "loss": 0.1355, + "step": 9022 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012959492705503595, + "loss": 0.092, + "step": 9023 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012956628581748124, + "loss": 0.0482, + "step": 9024 + }, + { + "epoch": 2.74, + "learning_rate": 0.000129537645338979, + "loss": 0.0914, + "step": 9025 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012950900562059325, + "loss": 0.1156, + "step": 9026 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012948036666338784, + "loss": 0.0809, + "step": 9027 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012945172846842663, + "loss": 0.0661, + "step": 9028 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001294230910367735, + "loss": 0.0689, + "step": 9029 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001293944543694922, + "loss": 0.0713, + "step": 9030 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012936581846764643, + "loss": 0.078, + "step": 9031 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001293371833323, + "loss": 0.1209, + "step": 9032 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012930854896451666, + "loss": 0.0824, + "step": 9033 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012927991536536008, + "loss": 0.1311, + "step": 9034 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012925128253589384, + "loss": 0.1125, + "step": 9035 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001292226504771816, + "loss": 0.0886, + "step": 9036 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012919401919028696, + "loss": 0.1155, + "step": 9037 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001291653886762735, + "loss": 0.1374, + "step": 9038 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012913675893620475, + "loss": 0.0695, + "step": 9039 + }, + { + "epoch": 2.74, + "learning_rate": 0.00012910812997114426, + "loss": 0.0653, + "step": 9040 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012907950178215545, + "loss": 0.1138, + "step": 9041 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001290508743703018, + "loss": 0.1053, + "step": 9042 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012902224773664672, + "loss": 0.0996, + "step": 9043 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001289936218822536, + "loss": 0.0754, + "step": 9044 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001289649968081859, + "loss": 0.0137, + "step": 9045 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012893637251550684, + "loss": 0.1353, + "step": 9046 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012890774900527977, + "loss": 0.1149, + "step": 9047 + }, + { + "epoch": 2.75, + "learning_rate": 0.000128879126278568, + "loss": 0.058, + "step": 9048 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001288505043364347, + "loss": 0.0818, + "step": 9049 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012882188317994323, + "loss": 0.1022, + "step": 9050 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012879326281015662, + "loss": 0.1265, + "step": 9051 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012876464322813814, + "loss": 0.0592, + "step": 9052 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012873602443495087, + "loss": 0.094, + "step": 9053 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012870740643165795, + "loss": 0.0961, + "step": 9054 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012867878921932245, + "loss": 0.0617, + "step": 9055 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012865017279900737, + "loss": 0.068, + "step": 9056 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001286215571717758, + "loss": 0.0765, + "step": 9057 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012859294233869069, + "loss": 0.2714, + "step": 9058 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012856432830081496, + "loss": 0.1024, + "step": 9059 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012853571505921165, + "loss": 0.1242, + "step": 9060 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012850710261494351, + "loss": 0.1235, + "step": 9061 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001284784909690735, + "loss": 0.1811, + "step": 9062 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001284498801226644, + "loss": 0.1707, + "step": 9063 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001284212700767791, + "loss": 0.1073, + "step": 9064 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012839266083248038, + "loss": 0.0968, + "step": 9065 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012836405239083088, + "loss": 0.1104, + "step": 9066 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001283354447528934, + "loss": 0.1347, + "step": 9067 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012830683791973063, + "loss": 0.091, + "step": 9068 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001282782318924052, + "loss": 0.0921, + "step": 9069 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012824962667197978, + "loss": 0.1061, + "step": 9070 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012822102225951693, + "loss": 0.0007, + "step": 9071 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012819241865607924, + "loss": 0.0866, + "step": 9072 + }, + { + "epoch": 2.75, + "learning_rate": 0.00012816381586272925, + "loss": 0.0566, + "step": 9073 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012813521388052945, + "loss": 0.1003, + "step": 9074 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012810661271054241, + "loss": 0.0948, + "step": 9075 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012807801235383044, + "loss": 0.1211, + "step": 9076 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012804941281145603, + "loss": 0.0671, + "step": 9077 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012802081408448156, + "loss": 0.1514, + "step": 9078 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012799221617396938, + "loss": 0.1942, + "step": 9079 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012796361908098188, + "loss": 0.1106, + "step": 9080 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012793502280658128, + "loss": 0.0663, + "step": 9081 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012790642735182987, + "loss": 0.0854, + "step": 9082 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001278778327177899, + "loss": 0.0929, + "step": 9083 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012784923890552357, + "loss": 0.0329, + "step": 9084 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012782064591609313, + "loss": 0.0959, + "step": 9085 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012779205375056056, + "loss": 0.0795, + "step": 9086 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012776346240998807, + "loss": 0.0926, + "step": 9087 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012773487189543777, + "loss": 0.098, + "step": 9088 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012770628220797167, + "loss": 0.1088, + "step": 9089 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012767769334865188, + "loss": 0.124, + "step": 9090 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012764910531854026, + "loss": 0.0483, + "step": 9091 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012762051811869885, + "loss": 0.0878, + "step": 9092 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012759193175018954, + "loss": 0.0634, + "step": 9093 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012756334621407428, + "loss": 0.0515, + "step": 9094 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012753476151141494, + "loss": 0.1976, + "step": 9095 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001275061776432733, + "loss": 0.1118, + "step": 9096 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012747759461071117, + "loss": 0.1185, + "step": 9097 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012744901241479035, + "loss": 0.0414, + "step": 9098 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012742043105657262, + "loss": 0.1549, + "step": 9099 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012739185053711968, + "loss": 0.0766, + "step": 9100 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012736327085749322, + "loss": 0.0688, + "step": 9101 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012733469201875482, + "loss": 0.0798, + "step": 9102 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012730611402196612, + "loss": 0.085, + "step": 9103 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012727753686818876, + "loss": 0.1092, + "step": 9104 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012724896055848423, + "loss": 0.0885, + "step": 9105 + }, + { + "epoch": 2.76, + "learning_rate": 0.00012722038509391421, + "loss": 0.1103, + "step": 9106 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012719181047554, + "loss": 0.0764, + "step": 9107 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012716323670442317, + "loss": 0.1118, + "step": 9108 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001271346637816251, + "loss": 0.0628, + "step": 9109 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012710609170820726, + "loss": 0.0935, + "step": 9110 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012707752048523097, + "loss": 0.097, + "step": 9111 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012704895011375755, + "loss": 0.1089, + "step": 9112 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012702038059484832, + "loss": 0.078, + "step": 9113 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001269918119295646, + "loss": 0.0155, + "step": 9114 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012696324411896756, + "loss": 0.0428, + "step": 9115 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012693467716411852, + "loss": 0.0855, + "step": 9116 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012690611106607852, + "loss": 0.0983, + "step": 9117 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012687754582590876, + "loss": 0.1009, + "step": 9118 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001268489814446704, + "loss": 0.0737, + "step": 9119 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012682041792342447, + "loss": 0.1177, + "step": 9120 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012679185526323207, + "loss": 0.1459, + "step": 9121 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012676329346515417, + "loss": 0.0847, + "step": 9122 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012673473253025177, + "loss": 0.086, + "step": 9123 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001267061724595858, + "loss": 0.0875, + "step": 9124 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012667761325421727, + "loss": 0.123, + "step": 9125 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012664905491520707, + "loss": 0.0471, + "step": 9126 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012662049744361587, + "loss": 0.1202, + "step": 9127 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001265919408405047, + "loss": 0.1701, + "step": 9128 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012656338510693424, + "loss": 0.1296, + "step": 9129 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012653483024396533, + "loss": 0.0167, + "step": 9130 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012650627625265874, + "loss": 0.1528, + "step": 9131 + }, + { + "epoch": 2.77, + "learning_rate": 0.000126477723134075, + "loss": 0.1134, + "step": 9132 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012644917088927487, + "loss": 0.0888, + "step": 9133 + }, + { + "epoch": 2.77, + "learning_rate": 0.000126420619519319, + "loss": 0.1079, + "step": 9134 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012639206902526794, + "loss": 0.0539, + "step": 9135 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012636351940818234, + "loss": 0.0812, + "step": 9136 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012633497066912266, + "loss": 0.074, + "step": 9137 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001263064228091494, + "loss": 0.098, + "step": 9138 + }, + { + "epoch": 2.77, + "learning_rate": 0.00012627787582932304, + "loss": 0.1323, + "step": 9139 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012624932973070405, + "loss": 0.1001, + "step": 9140 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001262207845143529, + "loss": 0.0637, + "step": 9141 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012619224018132975, + "loss": 0.0412, + "step": 9142 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001261636967326951, + "loss": 0.2038, + "step": 9143 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001261351541695092, + "loss": 0.0345, + "step": 9144 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012610661249283237, + "loss": 0.1726, + "step": 9145 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012607807170372483, + "loss": 0.0325, + "step": 9146 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012604953180324674, + "loss": 0.1023, + "step": 9147 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001260209927924583, + "loss": 0.1066, + "step": 9148 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001259924546724197, + "loss": 0.087, + "step": 9149 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012596391744419097, + "loss": 0.1532, + "step": 9150 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012593538110883227, + "loss": 0.0787, + "step": 9151 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012590684566740355, + "loss": 0.0595, + "step": 9152 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012587831112096488, + "loss": 0.0134, + "step": 9153 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001258497774705762, + "loss": 0.1401, + "step": 9154 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012582124471729748, + "loss": 0.1362, + "step": 9155 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012579271286218866, + "loss": 0.0425, + "step": 9156 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001257641819063095, + "loss": 0.0256, + "step": 9157 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012573565185071994, + "loss": 0.0431, + "step": 9158 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012570712269647973, + "loss": 0.091, + "step": 9159 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001256785944446487, + "loss": 0.0656, + "step": 9160 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012565006709628658, + "loss": 0.062, + "step": 9161 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012562154065245303, + "loss": 0.1101, + "step": 9162 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012559301511420778, + "loss": 0.0963, + "step": 9163 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001255644904826104, + "loss": 0.0109, + "step": 9164 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012553596675872057, + "loss": 0.1028, + "step": 9165 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012550744394359786, + "loss": 0.0651, + "step": 9166 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001254789220383018, + "loss": 0.1163, + "step": 9167 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001254504010438918, + "loss": 0.1105, + "step": 9168 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012542188096142744, + "loss": 0.0747, + "step": 9169 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012539336179196814, + "loss": 0.1377, + "step": 9170 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001253648435365733, + "loss": 0.1097, + "step": 9171 + }, + { + "epoch": 2.78, + "learning_rate": 0.00012533632619630232, + "loss": 0.0764, + "step": 9172 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012530780977221444, + "loss": 0.0362, + "step": 9173 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012527929426536902, + "loss": 0.0676, + "step": 9174 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001252507796768253, + "loss": 0.0629, + "step": 9175 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001252222660076426, + "loss": 0.1121, + "step": 9176 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012519375325888003, + "loss": 0.1427, + "step": 9177 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012516524143159677, + "loss": 0.0701, + "step": 9178 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012513673052685196, + "loss": 0.1176, + "step": 9179 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012510822054570471, + "loss": 0.1029, + "step": 9180 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012507971148921406, + "loss": 0.141, + "step": 9181 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012505120335843913, + "loss": 0.1183, + "step": 9182 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012502269615443878, + "loss": 0.1246, + "step": 9183 + }, + { + "epoch": 2.79, + "learning_rate": 0.000124994189878272, + "loss": 0.1172, + "step": 9184 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012496568453099774, + "loss": 0.0808, + "step": 9185 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012493718011367488, + "loss": 0.0718, + "step": 9186 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001249086766273623, + "loss": 0.0888, + "step": 9187 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001248801740731188, + "loss": 0.1393, + "step": 9188 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001248516724520032, + "loss": 0.0677, + "step": 9189 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012482317176507418, + "loss": 0.0462, + "step": 9190 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001247946720133905, + "loss": 0.118, + "step": 9191 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012476617319801087, + "loss": 0.0935, + "step": 9192 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001247376753199939, + "loss": 0.047, + "step": 9193 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012470917838039818, + "loss": 0.0807, + "step": 9194 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012468068238028233, + "loss": 0.1409, + "step": 9195 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012465218732070487, + "loss": 0.0653, + "step": 9196 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012462369320272442, + "loss": 0.0752, + "step": 9197 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012459520002739926, + "loss": 0.0404, + "step": 9198 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001245667077957879, + "loss": 0.114, + "step": 9199 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001245382165089488, + "loss": 0.0483, + "step": 9200 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012450972616794028, + "loss": 0.0453, + "step": 9201 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012448123677382073, + "loss": 0.0784, + "step": 9202 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012445274832764835, + "loss": 0.1049, + "step": 9203 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012442426083048144, + "loss": 0.0534, + "step": 9204 + }, + { + "epoch": 2.79, + "learning_rate": 0.00012439577428337826, + "loss": 0.0719, + "step": 9205 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012436728868739697, + "loss": 0.0564, + "step": 9206 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012433880404359582, + "loss": 0.0839, + "step": 9207 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012431032035303276, + "loss": 0.0834, + "step": 9208 + }, + { + "epoch": 2.8, + "learning_rate": 0.000124281837616766, + "loss": 0.1224, + "step": 9209 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001242533558358535, + "loss": 0.0584, + "step": 9210 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012422487501135337, + "loss": 0.0906, + "step": 9211 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001241963951443236, + "loss": 0.0343, + "step": 9212 + }, + { + "epoch": 2.8, + "learning_rate": 0.000124167916235822, + "loss": 0.1074, + "step": 9213 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001241394382869066, + "loss": 0.0569, + "step": 9214 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001241109612986352, + "loss": 0.0579, + "step": 9215 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001240824852720657, + "loss": 0.1121, + "step": 9216 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001240540102082559, + "loss": 0.1244, + "step": 9217 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012402553610826346, + "loss": 0.0498, + "step": 9218 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012399706297314621, + "loss": 0.1701, + "step": 9219 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012396859080396184, + "loss": 0.1196, + "step": 9220 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012394011960176795, + "loss": 0.0661, + "step": 9221 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012391164936762227, + "loss": 0.0512, + "step": 9222 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012388318010258227, + "loss": 0.0215, + "step": 9223 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012385471180770553, + "loss": 0.222, + "step": 9224 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012382624448404957, + "loss": 0.1087, + "step": 9225 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012379777813267188, + "loss": 0.0585, + "step": 9226 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001237693127546299, + "loss": 0.1226, + "step": 9227 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012374084835098102, + "loss": 0.1265, + "step": 9228 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012371238492278264, + "loss": 0.0818, + "step": 9229 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012368392247109205, + "loss": 0.1857, + "step": 9230 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012365546099696656, + "loss": 0.1027, + "step": 9231 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012362700050146347, + "loss": 0.0937, + "step": 9232 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012359854098563998, + "loss": 0.0407, + "step": 9233 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012357008245055323, + "loss": 0.0761, + "step": 9234 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012354162489726044, + "loss": 0.1082, + "step": 9235 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001235131683268187, + "loss": 0.0885, + "step": 9236 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012348471274028504, + "loss": 0.0356, + "step": 9237 + }, + { + "epoch": 2.8, + "learning_rate": 0.00012345625813871665, + "loss": 0.0708, + "step": 9238 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012342780452317034, + "loss": 0.0515, + "step": 9239 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012339935189470318, + "loss": 0.0992, + "step": 9240 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012337090025437211, + "loss": 0.1097, + "step": 9241 + }, + { + "epoch": 2.81, + "learning_rate": 0.000123342449603234, + "loss": 0.1306, + "step": 9242 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012331399994234575, + "loss": 0.0958, + "step": 9243 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001232855512727641, + "loss": 0.1111, + "step": 9244 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012325710359554587, + "loss": 0.1846, + "step": 9245 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012322865691174784, + "loss": 0.1092, + "step": 9246 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001232002112224267, + "loss": 0.1775, + "step": 9247 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001231717665286392, + "loss": 0.1003, + "step": 9248 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001231433228314418, + "loss": 0.1704, + "step": 9249 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001231148801318912, + "loss": 0.0884, + "step": 9250 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012308643843104395, + "loss": 0.0527, + "step": 9251 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012305799772995663, + "loss": 0.0807, + "step": 9252 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012302955802968572, + "loss": 0.1003, + "step": 9253 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001230011193312876, + "loss": 0.0766, + "step": 9254 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012297268163581873, + "loss": 0.0602, + "step": 9255 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012294424494433545, + "loss": 0.1373, + "step": 9256 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012291580925789417, + "loss": 0.0768, + "step": 9257 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012288737457755115, + "loss": 0.0983, + "step": 9258 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012285894090436264, + "loss": 0.1582, + "step": 9259 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001228305082393849, + "loss": 0.1226, + "step": 9260 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012280207658367406, + "loss": 0.1072, + "step": 9261 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012277364593828638, + "loss": 0.0307, + "step": 9262 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001227452163042779, + "loss": 0.0671, + "step": 9263 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012271678768270471, + "loss": 0.056, + "step": 9264 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012268836007462284, + "loss": 0.1091, + "step": 9265 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012265993348108828, + "loss": 0.0114, + "step": 9266 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012263150790315703, + "loss": 0.1329, + "step": 9267 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012260308334188503, + "loss": 0.0859, + "step": 9268 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012257465979832814, + "loss": 0.1631, + "step": 9269 + }, + { + "epoch": 2.81, + "learning_rate": 0.00012254623727354217, + "loss": 0.0406, + "step": 9270 + }, + { + "epoch": 2.81, + "learning_rate": 0.000122517815768583, + "loss": 0.065, + "step": 9271 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012248939528450637, + "loss": 0.0964, + "step": 9272 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012246097582236808, + "loss": 0.1051, + "step": 9273 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012243255738322376, + "loss": 0.0325, + "step": 9274 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012240413996812905, + "loss": 0.168, + "step": 9275 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012237572357813966, + "loss": 0.075, + "step": 9276 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012234730821431111, + "loss": 0.0882, + "step": 9277 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012231889387769905, + "loss": 0.0857, + "step": 9278 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012229048056935883, + "loss": 0.0489, + "step": 9279 + }, + { + "epoch": 2.82, + "learning_rate": 0.000122262068290346, + "loss": 0.1088, + "step": 9280 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012223365704171598, + "loss": 0.0963, + "step": 9281 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012220524682452418, + "loss": 0.1039, + "step": 9282 + }, + { + "epoch": 2.82, + "learning_rate": 0.000122176837639826, + "loss": 0.0774, + "step": 9283 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012214842948867668, + "loss": 0.0953, + "step": 9284 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012212002237213152, + "loss": 0.0586, + "step": 9285 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012209161629124577, + "loss": 0.0919, + "step": 9286 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012206321124707464, + "loss": 0.0822, + "step": 9287 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012203480724067329, + "loss": 0.1363, + "step": 9288 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001220064042730969, + "loss": 0.1177, + "step": 9289 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012197800234540043, + "loss": 0.1516, + "step": 9290 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012194960145863902, + "loss": 0.1698, + "step": 9291 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012192120161386762, + "loss": 0.0713, + "step": 9292 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012189280281214126, + "loss": 0.0877, + "step": 9293 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012186440505451492, + "loss": 0.0692, + "step": 9294 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012183600834204337, + "loss": 0.0574, + "step": 9295 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012180761267578152, + "loss": 0.0828, + "step": 9296 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012177921805678419, + "loss": 0.0657, + "step": 9297 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012175082448610614, + "loss": 0.1741, + "step": 9298 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012172243196480214, + "loss": 0.0947, + "step": 9299 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012169404049392686, + "loss": 0.0779, + "step": 9300 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012166565007453497, + "loss": 0.1707, + "step": 9301 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012163726070768107, + "loss": 0.082, + "step": 9302 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012160887239441977, + "loss": 0.1009, + "step": 9303 + }, + { + "epoch": 2.82, + "learning_rate": 0.00012158048513580569, + "loss": 0.0494, + "step": 9304 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012155209893289316, + "loss": 0.1289, + "step": 9305 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012152371378673673, + "loss": 0.08, + "step": 9306 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012149532969839085, + "loss": 0.0662, + "step": 9307 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012146694666890987, + "loss": 0.1107, + "step": 9308 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012143856469934817, + "loss": 0.064, + "step": 9309 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012141018379076001, + "loss": 0.0123, + "step": 9310 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001213818039441997, + "loss": 0.0764, + "step": 9311 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012135342516072142, + "loss": 0.0912, + "step": 9312 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001213250474413794, + "loss": 0.1201, + "step": 9313 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012129667078722786, + "loss": 0.0689, + "step": 9314 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012126829519932071, + "loss": 0.118, + "step": 9315 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012123992067871219, + "loss": 0.1134, + "step": 9316 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012121154722645626, + "loss": 0.1101, + "step": 9317 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012118317484360694, + "loss": 0.0316, + "step": 9318 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012115480353121825, + "loss": 0.1492, + "step": 9319 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012112643329034393, + "loss": 0.053, + "step": 9320 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012109806412203796, + "loss": 0.0952, + "step": 9321 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012106969602735414, + "loss": 0.1715, + "step": 9322 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012104132900734629, + "loss": 0.0765, + "step": 9323 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012101296306306816, + "loss": 0.0341, + "step": 9324 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012098459819557342, + "loss": 0.1117, + "step": 9325 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012095623440591579, + "loss": 0.1268, + "step": 9326 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012092787169514887, + "loss": 0.0707, + "step": 9327 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012089951006432625, + "loss": 0.0535, + "step": 9328 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012087114951450158, + "loss": 0.1677, + "step": 9329 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001208427900467282, + "loss": 0.0995, + "step": 9330 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012081443166205969, + "loss": 0.109, + "step": 9331 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012078607436154943, + "loss": 0.0946, + "step": 9332 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012075771814625082, + "loss": 0.1141, + "step": 9333 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012072936301721735, + "loss": 0.092, + "step": 9334 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012070100897550211, + "loss": 0.0429, + "step": 9335 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012067265602215848, + "loss": 0.1219, + "step": 9336 + }, + { + "epoch": 2.83, + "learning_rate": 0.00012064430415823968, + "loss": 0.1044, + "step": 9337 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001206159533847989, + "loss": 0.0899, + "step": 9338 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001205876037028893, + "loss": 0.0599, + "step": 9339 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012055925511356395, + "loss": 0.0942, + "step": 9340 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012053090761787594, + "loss": 0.109, + "step": 9341 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012050256121687829, + "loss": 0.0379, + "step": 9342 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012047421591162399, + "loss": 0.0593, + "step": 9343 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012044587170316605, + "loss": 0.0489, + "step": 9344 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012041752859255725, + "loss": 0.0917, + "step": 9345 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001203891865808505, + "loss": 0.0954, + "step": 9346 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012036084566909864, + "loss": 0.1041, + "step": 9347 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012033250585835446, + "loss": 0.1082, + "step": 9348 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012030416714967071, + "loss": 0.17, + "step": 9349 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012027582954410005, + "loss": 0.0137, + "step": 9350 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012024749304269513, + "loss": 0.0438, + "step": 9351 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012021915764650861, + "loss": 0.0149, + "step": 9352 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012019082335659304, + "loss": 0.0802, + "step": 9353 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012016249017400099, + "loss": 0.074, + "step": 9354 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012013415809978498, + "loss": 0.0695, + "step": 9355 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012010582713499732, + "loss": 0.0674, + "step": 9356 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012007749728069057, + "loss": 0.0516, + "step": 9357 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012004916853791704, + "loss": 0.0785, + "step": 9358 + }, + { + "epoch": 2.84, + "learning_rate": 0.00012002084090772906, + "loss": 0.0425, + "step": 9359 + }, + { + "epoch": 2.84, + "learning_rate": 0.000119992514391179, + "loss": 0.0643, + "step": 9360 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011996418898931895, + "loss": 0.1169, + "step": 9361 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011993586470320121, + "loss": 0.1347, + "step": 9362 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011990754153387794, + "loss": 0.057, + "step": 9363 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011987921948240126, + "loss": 0.0855, + "step": 9364 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011985089854982328, + "loss": 0.1224, + "step": 9365 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011982257873719596, + "loss": 0.0695, + "step": 9366 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011979426004557135, + "loss": 0.0219, + "step": 9367 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011976594247600142, + "loss": 0.0655, + "step": 9368 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011973762602953805, + "loss": 0.1116, + "step": 9369 + }, + { + "epoch": 2.84, + "learning_rate": 0.00011970931070723319, + "loss": 0.0732, + "step": 9370 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011968099651013856, + "loss": 0.0886, + "step": 9371 + }, + { + "epoch": 2.85, + "learning_rate": 0.000119652683439306, + "loss": 0.0768, + "step": 9372 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011962437149578724, + "loss": 0.1005, + "step": 9373 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011959606068063402, + "loss": 0.1846, + "step": 9374 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011956775099489801, + "loss": 0.0895, + "step": 9375 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011953944243963077, + "loss": 0.0733, + "step": 9376 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011951113501588393, + "loss": 0.1413, + "step": 9377 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011948282872470902, + "loss": 0.1287, + "step": 9378 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011945452356715751, + "loss": 0.1778, + "step": 9379 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011942621954428094, + "loss": 0.074, + "step": 9380 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011939791665713061, + "loss": 0.074, + "step": 9381 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011936961490675793, + "loss": 0.0477, + "step": 9382 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011934131429421422, + "loss": 0.0862, + "step": 9383 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011931301482055081, + "loss": 0.0863, + "step": 9384 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011928471648681896, + "loss": 0.0987, + "step": 9385 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011925641929406977, + "loss": 0.083, + "step": 9386 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011922812324335442, + "loss": 0.1546, + "step": 9387 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011919982833572408, + "loss": 0.0231, + "step": 9388 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011917153457222977, + "loss": 0.1108, + "step": 9389 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001191432419539226, + "loss": 0.0732, + "step": 9390 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011911495048185347, + "loss": 0.1212, + "step": 9391 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011908666015707337, + "loss": 0.0791, + "step": 9392 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011905837098063316, + "loss": 0.0787, + "step": 9393 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011903008295358377, + "loss": 0.1032, + "step": 9394 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011900179607697602, + "loss": 0.146, + "step": 9395 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011897351035186059, + "loss": 0.0933, + "step": 9396 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011894522577928823, + "loss": 0.1028, + "step": 9397 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011891694236030971, + "loss": 0.0816, + "step": 9398 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001188886600959756, + "loss": 0.0539, + "step": 9399 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011886037898733664, + "loss": 0.0667, + "step": 9400 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011883209903544318, + "loss": 0.1306, + "step": 9401 + }, + { + "epoch": 2.85, + "learning_rate": 0.00011880382024134585, + "loss": 0.0217, + "step": 9402 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011877554260609513, + "loss": 0.164, + "step": 9403 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011874726613074145, + "loss": 0.1174, + "step": 9404 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011871899081633518, + "loss": 0.1073, + "step": 9405 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011869071666392668, + "loss": 0.0871, + "step": 9406 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011866244367456621, + "loss": 0.0606, + "step": 9407 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011863417184930406, + "loss": 0.0067, + "step": 9408 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011860590118919046, + "loss": 0.1008, + "step": 9409 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011857763169527564, + "loss": 0.1404, + "step": 9410 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011854936336860958, + "loss": 0.1211, + "step": 9411 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011852109621024244, + "loss": 0.1259, + "step": 9412 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011849283022122428, + "loss": 0.0502, + "step": 9413 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011846456540260506, + "loss": 0.1134, + "step": 9414 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011843630175543478, + "loss": 0.1232, + "step": 9415 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011840803928076336, + "loss": 0.0585, + "step": 9416 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011837977797964059, + "loss": 0.0883, + "step": 9417 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011835151785311635, + "loss": 0.0976, + "step": 9418 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011832325890224041, + "loss": 0.1419, + "step": 9419 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011829500112806253, + "loss": 0.1712, + "step": 9420 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001182667445316324, + "loss": 0.1149, + "step": 9421 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011823848911399963, + "loss": 0.0648, + "step": 9422 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011821023487621384, + "loss": 0.1491, + "step": 9423 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011818198181932461, + "loss": 0.0843, + "step": 9424 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011815372994438145, + "loss": 0.0479, + "step": 9425 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011812547925243388, + "loss": 0.0907, + "step": 9426 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011809722974453124, + "loss": 0.1877, + "step": 9427 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011806898142172295, + "loss": 0.0735, + "step": 9428 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011804073428505836, + "loss": 0.1049, + "step": 9429 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011801248833558679, + "loss": 0.1432, + "step": 9430 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011798424357435749, + "loss": 0.0487, + "step": 9431 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011795600000241965, + "loss": 0.076, + "step": 9432 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011792775762082241, + "loss": 0.1092, + "step": 9433 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011789951643061492, + "loss": 0.0832, + "step": 9434 + }, + { + "epoch": 2.86, + "learning_rate": 0.00011787127643284628, + "loss": 0.0681, + "step": 9435 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011784303762856559, + "loss": 0.0866, + "step": 9436 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011781480001882165, + "loss": 0.0485, + "step": 9437 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011778656360466348, + "loss": 0.1024, + "step": 9438 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011775832838714006, + "loss": 0.0423, + "step": 9439 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011773009436730017, + "loss": 0.0778, + "step": 9440 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011770186154619272, + "loss": 0.1551, + "step": 9441 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011767362992486635, + "loss": 0.0928, + "step": 9442 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011764539950436981, + "loss": 0.1667, + "step": 9443 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001176171702857518, + "loss": 0.0608, + "step": 9444 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011758894227006095, + "loss": 0.1304, + "step": 9445 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001175607154583459, + "loss": 0.0612, + "step": 9446 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011753248985165509, + "loss": 0.0615, + "step": 9447 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011750426545103708, + "loss": 0.0903, + "step": 9448 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011747604225754032, + "loss": 0.1937, + "step": 9449 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001174478202722132, + "loss": 0.097, + "step": 9450 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011741959949610419, + "loss": 0.0632, + "step": 9451 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011739137993026145, + "loss": 0.1419, + "step": 9452 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001173631615757333, + "loss": 0.0852, + "step": 9453 + }, + { + "epoch": 2.87, + "learning_rate": 0.000117334944433568, + "loss": 0.1384, + "step": 9454 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001173067285048137, + "loss": 0.0618, + "step": 9455 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011727851379051865, + "loss": 0.0888, + "step": 9456 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011725030029173078, + "loss": 0.0554, + "step": 9457 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011722208800949823, + "loss": 0.0604, + "step": 9458 + }, + { + "epoch": 2.87, + "learning_rate": 0.000117193876944869, + "loss": 0.146, + "step": 9459 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011716566709889104, + "loss": 0.1275, + "step": 9460 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011713745847261226, + "loss": 0.0528, + "step": 9461 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011710925106708053, + "loss": 0.1236, + "step": 9462 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011708104488334366, + "loss": 0.077, + "step": 9463 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011705283992244947, + "loss": 0.0847, + "step": 9464 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001170246361854456, + "loss": 0.1147, + "step": 9465 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011699643367337991, + "loss": 0.075, + "step": 9466 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011696823238729984, + "loss": 0.1069, + "step": 9467 + }, + { + "epoch": 2.87, + "learning_rate": 0.00011694003232825307, + "loss": 0.0705, + "step": 9468 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011691183349728717, + "loss": 0.0376, + "step": 9469 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001168836358954496, + "loss": 0.1203, + "step": 9470 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001168554395237879, + "loss": 0.0673, + "step": 9471 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011682724438334938, + "loss": 0.1529, + "step": 9472 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011679905047518147, + "loss": 0.105, + "step": 9473 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011677085780033145, + "loss": 0.1357, + "step": 9474 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011674266635984662, + "loss": 0.0898, + "step": 9475 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011671447615477423, + "loss": 0.2044, + "step": 9476 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011668628718616149, + "loss": 0.1138, + "step": 9477 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011665809945505543, + "loss": 0.0477, + "step": 9478 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011662991296250319, + "loss": 0.1457, + "step": 9479 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011660172770955181, + "loss": 0.1411, + "step": 9480 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011657354369724834, + "loss": 0.1055, + "step": 9481 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011654536092663975, + "loss": 0.0906, + "step": 9482 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011651717939877282, + "loss": 0.064, + "step": 9483 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011648899911469452, + "loss": 0.0923, + "step": 9484 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011646082007545162, + "loss": 0.0619, + "step": 9485 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001164326422820909, + "loss": 0.1102, + "step": 9486 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011640446573565912, + "loss": 0.0438, + "step": 9487 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011637629043720288, + "loss": 0.1299, + "step": 9488 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011634811638776884, + "loss": 0.1022, + "step": 9489 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001163199435884036, + "loss": 0.1133, + "step": 9490 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011629177204015372, + "loss": 0.092, + "step": 9491 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011626360174406569, + "loss": 0.1211, + "step": 9492 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011623543270118588, + "loss": 0.0644, + "step": 9493 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011620726491256071, + "loss": 0.0433, + "step": 9494 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011617909837923657, + "loss": 0.1283, + "step": 9495 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011615093310225974, + "loss": 0.042, + "step": 9496 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011612276908267652, + "loss": 0.1738, + "step": 9497 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011609460632153304, + "loss": 0.107, + "step": 9498 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011606644481987552, + "loss": 0.0502, + "step": 9499 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011603828457875005, + "loss": 0.0547, + "step": 9500 + }, + { + "epoch": 2.88, + "learning_rate": 0.00011601012559920275, + "loss": 0.063, + "step": 9501 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001159819678822796, + "loss": 0.0678, + "step": 9502 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011595381142902658, + "loss": 0.1077, + "step": 9503 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011592565624048963, + "loss": 0.079, + "step": 9504 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001158975023177146, + "loss": 0.1749, + "step": 9505 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011586934966174736, + "loss": 0.0323, + "step": 9506 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011584119827363379, + "loss": 0.022, + "step": 9507 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011581304815441944, + "loss": 0.1005, + "step": 9508 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001157848993051501, + "loss": 0.0821, + "step": 9509 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011575675172687143, + "loss": 0.1381, + "step": 9510 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011572860542062902, + "loss": 0.095, + "step": 9511 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011570046038746843, + "loss": 0.1895, + "step": 9512 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011567231662843514, + "loss": 0.011, + "step": 9513 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011564417414457461, + "loss": 0.0576, + "step": 9514 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011561603293693227, + "loss": 0.0644, + "step": 9515 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011558789300655347, + "loss": 0.0753, + "step": 9516 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011555975435448359, + "loss": 0.013, + "step": 9517 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001155316169817678, + "loss": 0.0972, + "step": 9518 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011550348088945134, + "loss": 0.1827, + "step": 9519 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011547534607857942, + "loss": 0.0411, + "step": 9520 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011544721255019711, + "loss": 0.1347, + "step": 9521 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011541908030534964, + "loss": 0.1005, + "step": 9522 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011539094934508184, + "loss": 0.1055, + "step": 9523 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011536281967043878, + "loss": 0.0451, + "step": 9524 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011533469128246541, + "loss": 0.056, + "step": 9525 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001153065641822066, + "loss": 0.0112, + "step": 9526 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011527843837070724, + "loss": 0.1152, + "step": 9527 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011525031384901201, + "loss": 0.1322, + "step": 9528 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011522219061816576, + "loss": 0.0991, + "step": 9529 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011519406867921311, + "loss": 0.1381, + "step": 9530 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011516594803319874, + "loss": 0.0912, + "step": 9531 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011513782868116734, + "loss": 0.0675, + "step": 9532 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001151097106241633, + "loss": 0.1579, + "step": 9533 + }, + { + "epoch": 2.89, + "learning_rate": 0.00011508159386323119, + "loss": 0.066, + "step": 9534 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011505347839941546, + "loss": 0.1491, + "step": 9535 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011502536423376053, + "loss": 0.0525, + "step": 9536 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001149972513673108, + "loss": 0.0147, + "step": 9537 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001149691398011105, + "loss": 0.083, + "step": 9538 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011494102953620395, + "loss": 0.1462, + "step": 9539 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001149129205736353, + "loss": 0.02, + "step": 9540 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011488481291444879, + "loss": 0.0413, + "step": 9541 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001148567065596885, + "loss": 0.0167, + "step": 9542 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011482860151039854, + "loss": 0.0853, + "step": 9543 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011480049776762285, + "loss": 0.1283, + "step": 9544 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011477239533240546, + "loss": 0.0837, + "step": 9545 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011474429420579026, + "loss": 0.1008, + "step": 9546 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011471619438882117, + "loss": 0.0823, + "step": 9547 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011468809588254204, + "loss": 0.0993, + "step": 9548 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011465999868799653, + "loss": 0.0442, + "step": 9549 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011463190280622844, + "loss": 0.1344, + "step": 9550 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011460380823828145, + "loss": 0.1183, + "step": 9551 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011457571498519917, + "loss": 0.0829, + "step": 9552 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011454762304802523, + "loss": 0.048, + "step": 9553 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001145195324278031, + "loss": 0.1089, + "step": 9554 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011449144312557631, + "loss": 0.0408, + "step": 9555 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011446335514238828, + "loss": 0.0922, + "step": 9556 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011443526847928238, + "loss": 0.1775, + "step": 9557 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011440718313730204, + "loss": 0.0777, + "step": 9558 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011437909911749039, + "loss": 0.0699, + "step": 9559 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011435101642089077, + "loss": 0.078, + "step": 9560 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011432293504854634, + "loss": 0.0654, + "step": 9561 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011429485500150022, + "loss": 0.1007, + "step": 9562 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011426677628079563, + "loss": 0.079, + "step": 9563 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011423869888747544, + "loss": 0.0753, + "step": 9564 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011421062282258274, + "loss": 0.0166, + "step": 9565 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011418254808716043, + "loss": 0.0898, + "step": 9566 + }, + { + "epoch": 2.9, + "learning_rate": 0.00011415447468225144, + "loss": 0.1411, + "step": 9567 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011412640260889862, + "loss": 0.0936, + "step": 9568 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011409833186814472, + "loss": 0.0679, + "step": 9569 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011407026246103249, + "loss": 0.16, + "step": 9570 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011404219438860468, + "loss": 0.1453, + "step": 9571 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011401412765190388, + "loss": 0.0853, + "step": 9572 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011398606225197277, + "loss": 0.0998, + "step": 9573 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011395799818985378, + "loss": 0.084, + "step": 9574 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011392993546658949, + "loss": 0.0597, + "step": 9575 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001139018740832223, + "loss": 0.1176, + "step": 9576 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011387381404079464, + "loss": 0.0792, + "step": 9577 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011384575534034885, + "loss": 0.0466, + "step": 9578 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011381769798292722, + "loss": 0.0638, + "step": 9579 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011378964196957202, + "loss": 0.0407, + "step": 9580 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011376158730132541, + "loss": 0.069, + "step": 9581 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011373353397922957, + "loss": 0.0574, + "step": 9582 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011370548200432665, + "loss": 0.0848, + "step": 9583 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011367743137765857, + "loss": 0.1611, + "step": 9584 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011364938210026741, + "loss": 0.0654, + "step": 9585 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011362133417319513, + "loss": 0.1006, + "step": 9586 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011359328759748357, + "loss": 0.0884, + "step": 9587 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001135652423741747, + "loss": 0.0742, + "step": 9588 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011353719850431016, + "loss": 0.0885, + "step": 9589 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011350915598893177, + "loss": 0.115, + "step": 9590 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011348111482908123, + "loss": 0.1233, + "step": 9591 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011345307502580019, + "loss": 0.0638, + "step": 9592 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011342503658013026, + "loss": 0.1214, + "step": 9593 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011339699949311295, + "loss": 0.028, + "step": 9594 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011336896376578975, + "loss": 0.0874, + "step": 9595 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011334092939920215, + "loss": 0.0814, + "step": 9596 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011331289639439151, + "loss": 0.0845, + "step": 9597 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011328486475239925, + "loss": 0.0482, + "step": 9598 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011325683447426653, + "loss": 0.0911, + "step": 9599 + }, + { + "epoch": 2.91, + "learning_rate": 0.00011322880556103466, + "loss": 0.1003, + "step": 9600 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011320077801374485, + "loss": 0.0381, + "step": 9601 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001131727518334382, + "loss": 0.1239, + "step": 9602 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011314472702115579, + "loss": 0.0947, + "step": 9603 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011311670357793881, + "loss": 0.1043, + "step": 9604 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011308868150482805, + "loss": 0.0548, + "step": 9605 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011306066080286455, + "loss": 0.1316, + "step": 9606 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011303264147308916, + "loss": 0.0699, + "step": 9607 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011300462351654274, + "loss": 0.0765, + "step": 9608 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011297660693426611, + "loss": 0.043, + "step": 9609 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011294859172729992, + "loss": 0.0794, + "step": 9610 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001129205778966849, + "loss": 0.064, + "step": 9611 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011289256544346168, + "loss": 0.0692, + "step": 9612 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011286455436867082, + "loss": 0.1108, + "step": 9613 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011283654467335294, + "loss": 0.1079, + "step": 9614 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011280853635854839, + "loss": 0.0389, + "step": 9615 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011278052942529764, + "loss": 0.0684, + "step": 9616 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011275252387464108, + "loss": 0.0444, + "step": 9617 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011272451970761904, + "loss": 0.0467, + "step": 9618 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001126965169252718, + "loss": 0.0744, + "step": 9619 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011266851552863954, + "loss": 0.1311, + "step": 9620 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011264051551876245, + "loss": 0.08, + "step": 9621 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011261251689668065, + "loss": 0.0914, + "step": 9622 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011258451966343422, + "loss": 0.1285, + "step": 9623 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011255652382006321, + "loss": 0.0893, + "step": 9624 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011252852936760749, + "loss": 0.0795, + "step": 9625 + }, + { + "epoch": 2.92, + "learning_rate": 0.000112500536307107, + "loss": 0.0505, + "step": 9626 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011247254463960162, + "loss": 0.0781, + "step": 9627 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011244455436613119, + "loss": 0.1433, + "step": 9628 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011241656548773547, + "loss": 0.1688, + "step": 9629 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011238857800545408, + "loss": 0.0719, + "step": 9630 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011236059192032672, + "loss": 0.1159, + "step": 9631 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011233260723339301, + "loss": 0.0902, + "step": 9632 + }, + { + "epoch": 2.92, + "learning_rate": 0.00011230462394569246, + "loss": 0.0757, + "step": 9633 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011227664205826465, + "loss": 0.1169, + "step": 9634 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001122486615721489, + "loss": 0.1011, + "step": 9635 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011222068248838469, + "loss": 0.0556, + "step": 9636 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011219270480801132, + "loss": 0.0705, + "step": 9637 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001121647285320681, + "loss": 0.0735, + "step": 9638 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011213675366159434, + "loss": 0.0905, + "step": 9639 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001121087801976291, + "loss": 0.1827, + "step": 9640 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011208080814121151, + "loss": 0.0892, + "step": 9641 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011205283749338073, + "loss": 0.153, + "step": 9642 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011202486825517577, + "loss": 0.0609, + "step": 9643 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001119969004276356, + "loss": 0.0724, + "step": 9644 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011196893401179913, + "loss": 0.051, + "step": 9645 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011194096900870523, + "loss": 0.0911, + "step": 9646 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011191300541939272, + "loss": 0.0592, + "step": 9647 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011188504324490035, + "loss": 0.0819, + "step": 9648 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011185708248626692, + "loss": 0.0783, + "step": 9649 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011182912314453097, + "loss": 0.1216, + "step": 9650 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011180116522073115, + "loss": 0.1173, + "step": 9651 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011177320871590605, + "loss": 0.1917, + "step": 9652 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011174525363109414, + "loss": 0.0312, + "step": 9653 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011171729996733393, + "loss": 0.0492, + "step": 9654 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001116893477256637, + "loss": 0.1009, + "step": 9655 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011166139690712186, + "loss": 0.0646, + "step": 9656 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001116334475127467, + "loss": 0.083, + "step": 9657 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011160549954357647, + "loss": 0.1586, + "step": 9658 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011157755300064935, + "loss": 0.1298, + "step": 9659 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011154960788500343, + "loss": 0.1532, + "step": 9660 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011152166419767683, + "loss": 0.2237, + "step": 9661 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011149372193970757, + "loss": 0.0393, + "step": 9662 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011146578111213361, + "loss": 0.112, + "step": 9663 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001114378417159929, + "loss": 0.0707, + "step": 9664 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011140990375232335, + "loss": 0.144, + "step": 9665 + }, + { + "epoch": 2.93, + "learning_rate": 0.00011138196722216263, + "loss": 0.1197, + "step": 9666 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011135403212654856, + "loss": 0.0932, + "step": 9667 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001113260984665189, + "loss": 0.1305, + "step": 9668 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011129816624311127, + "loss": 0.1042, + "step": 9669 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011127023545736333, + "loss": 0.1095, + "step": 9670 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001112423061103125, + "loss": 0.0357, + "step": 9671 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011121437820299638, + "loss": 0.0674, + "step": 9672 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011118645173645234, + "loss": 0.1259, + "step": 9673 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001111585267117178, + "loss": 0.0387, + "step": 9674 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011113060312983012, + "loss": 0.0994, + "step": 9675 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011110268099182652, + "loss": 0.072, + "step": 9676 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011107476029874426, + "loss": 0.0301, + "step": 9677 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011104684105162053, + "loss": 0.0508, + "step": 9678 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001110189232514924, + "loss": 0.1278, + "step": 9679 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011099100689939702, + "loss": 0.1775, + "step": 9680 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011096309199637128, + "loss": 0.0389, + "step": 9681 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011093517854345222, + "loss": 0.0657, + "step": 9682 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001109072665416767, + "loss": 0.0969, + "step": 9683 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011087935599208158, + "loss": 0.1294, + "step": 9684 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011085144689570372, + "loss": 0.0434, + "step": 9685 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011082353925357976, + "loss": 0.1195, + "step": 9686 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011079563306674643, + "loss": 0.0911, + "step": 9687 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011076772833624038, + "loss": 0.0832, + "step": 9688 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011073982506309815, + "loss": 0.1815, + "step": 9689 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011071192324835634, + "loss": 0.0502, + "step": 9690 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011068402289305133, + "loss": 0.1703, + "step": 9691 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011065612399821959, + "loss": 0.1204, + "step": 9692 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011062822656489746, + "loss": 0.1478, + "step": 9693 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011060033059412126, + "loss": 0.0509, + "step": 9694 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001105724360869273, + "loss": 0.1534, + "step": 9695 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011054454304435165, + "loss": 0.069, + "step": 9696 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011051665146743054, + "loss": 0.0907, + "step": 9697 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011048876135720001, + "loss": 0.0841, + "step": 9698 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011046087271469616, + "loss": 0.1221, + "step": 9699 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011043298554095497, + "loss": 0.0306, + "step": 9700 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011040509983701231, + "loss": 0.1196, + "step": 9701 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011037721560390406, + "loss": 0.0979, + "step": 9702 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011034933284266608, + "loss": 0.1048, + "step": 9703 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011032145155433409, + "loss": 0.0676, + "step": 9704 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011029357173994389, + "loss": 0.1245, + "step": 9705 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011026569340053099, + "loss": 0.0885, + "step": 9706 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011023781653713109, + "loss": 0.0751, + "step": 9707 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011020994115077964, + "loss": 0.0755, + "step": 9708 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011018206724251225, + "loss": 0.1219, + "step": 9709 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011015419481336435, + "loss": 0.1103, + "step": 9710 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011012632386437118, + "loss": 0.0884, + "step": 9711 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011009845439656819, + "loss": 0.1238, + "step": 9712 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011007058641099058, + "loss": 0.1147, + "step": 9713 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001100427199086736, + "loss": 0.0779, + "step": 9714 + }, + { + "epoch": 2.95, + "learning_rate": 0.00011001485489065243, + "loss": 0.0351, + "step": 9715 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010998699135796213, + "loss": 0.0639, + "step": 9716 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010995912931163774, + "loss": 0.0673, + "step": 9717 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001099312687527143, + "loss": 0.1311, + "step": 9718 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010990340968222671, + "loss": 0.0697, + "step": 9719 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010987555210120992, + "loss": 0.0646, + "step": 9720 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010984769601069867, + "loss": 0.0901, + "step": 9721 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010981984141172775, + "loss": 0.0732, + "step": 9722 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010979198830533191, + "loss": 0.1415, + "step": 9723 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010976413669254577, + "loss": 0.1281, + "step": 9724 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010973628657440401, + "loss": 0.0648, + "step": 9725 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001097084379519411, + "loss": 0.1024, + "step": 9726 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010968059082619157, + "loss": 0.1819, + "step": 9727 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010965274519818984, + "loss": 0.0855, + "step": 9728 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010962490106897033, + "loss": 0.0972, + "step": 9729 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010959705843956733, + "loss": 0.1275, + "step": 9730 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010956921731101515, + "loss": 0.1003, + "step": 9731 + }, + { + "epoch": 2.95, + "learning_rate": 0.00010954137768434799, + "loss": 0.1364, + "step": 9732 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010951353956059997, + "loss": 0.1367, + "step": 9733 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010948570294080524, + "loss": 0.1025, + "step": 9734 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010945786782599784, + "loss": 0.1022, + "step": 9735 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010943003421721183, + "loss": 0.0491, + "step": 9736 + }, + { + "epoch": 2.96, + "learning_rate": 0.000109402202115481, + "loss": 0.0996, + "step": 9737 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010937437152183932, + "loss": 0.0649, + "step": 9738 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001093465424373206, + "loss": 0.1654, + "step": 9739 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010931871486295862, + "loss": 0.0742, + "step": 9740 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010929088879978711, + "loss": 0.1431, + "step": 9741 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010926306424883969, + "loss": 0.0687, + "step": 9742 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010923524121114994, + "loss": 0.0261, + "step": 9743 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010920741968775146, + "loss": 0.0994, + "step": 9744 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010917959967967772, + "loss": 0.0922, + "step": 9745 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010915178118796219, + "loss": 0.1192, + "step": 9746 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010912396421363815, + "loss": 0.0728, + "step": 9747 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010909614875773897, + "loss": 0.0715, + "step": 9748 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010906833482129789, + "loss": 0.0648, + "step": 9749 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010904052240534817, + "loss": 0.0678, + "step": 9750 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010901271151092298, + "loss": 0.0301, + "step": 9751 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010898490213905531, + "loss": 0.1694, + "step": 9752 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010895709429077825, + "loss": 0.1831, + "step": 9753 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001089292879671248, + "loss": 0.0848, + "step": 9754 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010890148316912783, + "loss": 0.1132, + "step": 9755 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001088736798978203, + "loss": 0.1191, + "step": 9756 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001088458781542349, + "loss": 0.1269, + "step": 9757 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010881807793940444, + "loss": 0.1215, + "step": 9758 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010879027925436164, + "loss": 0.0798, + "step": 9759 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010876248210013912, + "loss": 0.1149, + "step": 9760 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001087346864777695, + "loss": 0.0607, + "step": 9761 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010870689238828526, + "loss": 0.0719, + "step": 9762 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010867909983271883, + "loss": 0.0216, + "step": 9763 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010865130881210268, + "loss": 0.0364, + "step": 9764 + }, + { + "epoch": 2.96, + "learning_rate": 0.00010862351932746914, + "loss": 0.0731, + "step": 9765 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010859573137985058, + "loss": 0.1182, + "step": 9766 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010856794497027913, + "loss": 0.0348, + "step": 9767 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010854016009978703, + "loss": 0.0558, + "step": 9768 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001085123767694064, + "loss": 0.0438, + "step": 9769 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010848459498016931, + "loss": 0.074, + "step": 9770 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010845681473310785, + "loss": 0.1336, + "step": 9771 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010842903602925377, + "loss": 0.1011, + "step": 9772 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010840125886963918, + "loss": 0.1173, + "step": 9773 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010837348325529581, + "loss": 0.1409, + "step": 9774 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010834570918725546, + "loss": 0.0363, + "step": 9775 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010831793666654993, + "loss": 0.0923, + "step": 9776 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010829016569421077, + "loss": 0.1557, + "step": 9777 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010826239627126964, + "loss": 0.1162, + "step": 9778 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010823462839875809, + "loss": 0.1079, + "step": 9779 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001082068620777076, + "loss": 0.2043, + "step": 9780 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010817909730914968, + "loss": 0.0908, + "step": 9781 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010815133409411562, + "loss": 0.1446, + "step": 9782 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010812357243363678, + "loss": 0.1188, + "step": 9783 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010809581232874442, + "loss": 0.0745, + "step": 9784 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010806805378046974, + "loss": 0.1104, + "step": 9785 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010804029678984398, + "loss": 0.0795, + "step": 9786 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010801254135789808, + "loss": 0.0891, + "step": 9787 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010798478748566313, + "loss": 0.0743, + "step": 9788 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010795703517417012, + "loss": 0.0499, + "step": 9789 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010792928442444996, + "loss": 0.0792, + "step": 9790 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001079015352375335, + "loss": 0.0659, + "step": 9791 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010787378761445163, + "loss": 0.0341, + "step": 9792 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010784604155623496, + "loss": 0.1517, + "step": 9793 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010781829706391423, + "loss": 0.0776, + "step": 9794 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010779055413852005, + "loss": 0.1678, + "step": 9795 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010776281278108303, + "loss": 0.1096, + "step": 9796 + }, + { + "epoch": 2.97, + "learning_rate": 0.00010773507299263369, + "loss": 0.128, + "step": 9797 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001077073347742024, + "loss": 0.1069, + "step": 9798 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001076795981268196, + "loss": 0.047, + "step": 9799 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010765186305151562, + "loss": 0.1281, + "step": 9800 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010762412954932076, + "loss": 0.1576, + "step": 9801 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010759639762126529, + "loss": 0.1416, + "step": 9802 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001075686672683792, + "loss": 0.1058, + "step": 9803 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010754093849169273, + "loss": 0.1348, + "step": 9804 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010751321129223586, + "loss": 0.136, + "step": 9805 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010748548567103863, + "loss": 0.1399, + "step": 9806 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010745776162913095, + "loss": 0.0538, + "step": 9807 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010743003916754263, + "loss": 0.06, + "step": 9808 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010740231828730354, + "loss": 0.0439, + "step": 9809 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010737459898944339, + "loss": 0.053, + "step": 9810 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010734688127499188, + "loss": 0.1314, + "step": 9811 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010731916514497875, + "loss": 0.1487, + "step": 9812 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010729145060043335, + "loss": 0.1744, + "step": 9813 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010726373764238536, + "loss": 0.1271, + "step": 9814 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010723602627186419, + "loss": 0.1208, + "step": 9815 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010720831648989925, + "loss": 0.0713, + "step": 9816 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010718060829751993, + "loss": 0.0524, + "step": 9817 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010715290169575535, + "loss": 0.1148, + "step": 9818 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010712519668563486, + "loss": 0.1067, + "step": 9819 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010709749326818758, + "loss": 0.0487, + "step": 9820 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001070697914444426, + "loss": 0.1306, + "step": 9821 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010704209121542902, + "loss": 0.1062, + "step": 9822 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010701439258217574, + "loss": 0.0861, + "step": 9823 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010698669554571173, + "loss": 0.1051, + "step": 9824 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010695900010706584, + "loss": 0.1471, + "step": 9825 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010693130626726686, + "loss": 0.0585, + "step": 9826 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010690361402734365, + "loss": 0.1421, + "step": 9827 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010687592338832472, + "loss": 0.0974, + "step": 9828 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010684823435123878, + "loss": 0.0686, + "step": 9829 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010682054691711438, + "loss": 0.1139, + "step": 9830 + }, + { + "epoch": 2.98, + "learning_rate": 0.00010679286108698006, + "loss": 0.0127, + "step": 9831 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010676517686186426, + "loss": 0.0814, + "step": 9832 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010673749424279534, + "loss": 0.125, + "step": 9833 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010670981323080163, + "loss": 0.0979, + "step": 9834 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010668213382691139, + "loss": 0.0858, + "step": 9835 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010665445603215289, + "loss": 0.1403, + "step": 9836 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010662677984755424, + "loss": 0.1151, + "step": 9837 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010659910527414352, + "loss": 0.1019, + "step": 9838 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010657143231294875, + "loss": 0.1006, + "step": 9839 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010654376096499794, + "loss": 0.0527, + "step": 9840 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010651609123131896, + "loss": 0.1036, + "step": 9841 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010648842311293972, + "loss": 0.0484, + "step": 9842 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010646075661088794, + "loss": 0.0366, + "step": 9843 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010643309172619134, + "loss": 0.0849, + "step": 9844 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010640542845987764, + "loss": 0.1308, + "step": 9845 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010637776681297443, + "loss": 0.0648, + "step": 9846 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010635010678650929, + "loss": 0.0457, + "step": 9847 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010632244838150964, + "loss": 0.0454, + "step": 9848 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010629479159900298, + "loss": 0.0691, + "step": 9849 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010626713644001661, + "loss": 0.1018, + "step": 9850 + }, + { + "epoch": 2.99, + "learning_rate": 0.0001062394829055779, + "loss": 0.0834, + "step": 9851 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010621183099671407, + "loss": 0.0651, + "step": 9852 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010618418071445238, + "loss": 0.1154, + "step": 9853 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010615653205981978, + "loss": 0.09, + "step": 9854 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010612888503384348, + "loss": 0.0387, + "step": 9855 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010610123963755044, + "loss": 0.0741, + "step": 9856 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010607359587196764, + "loss": 0.0764, + "step": 9857 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010604595373812199, + "loss": 0.1064, + "step": 9858 + }, + { + "epoch": 2.99, + "learning_rate": 0.0001060183132370402, + "loss": 0.104, + "step": 9859 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010599067436974908, + "loss": 0.0469, + "step": 9860 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010596303713727538, + "loss": 0.0536, + "step": 9861 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010593540154064571, + "loss": 0.1591, + "step": 9862 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010590776758088668, + "loss": 0.0582, + "step": 9863 + }, + { + "epoch": 2.99, + "learning_rate": 0.00010588013525902473, + "loss": 0.0677, + "step": 9864 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010585250457608641, + "loss": 0.1071, + "step": 9865 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010582487553309807, + "loss": 0.035, + "step": 9866 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010579724813108605, + "loss": 0.0788, + "step": 9867 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010576962237107674, + "loss": 0.1164, + "step": 9868 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010574199825409614, + "loss": 0.0507, + "step": 9869 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010571437578117054, + "loss": 0.1154, + "step": 9870 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010568675495332604, + "loss": 0.0903, + "step": 9871 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001056591357715886, + "loss": 0.1689, + "step": 9872 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010563151823698431, + "loss": 0.0416, + "step": 9873 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010560390235053896, + "loss": 0.0869, + "step": 9874 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010557628811327843, + "loss": 0.1278, + "step": 9875 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010554867552622854, + "loss": 0.0748, + "step": 9876 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010552106459041502, + "loss": 0.0948, + "step": 9877 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010549345530686353, + "loss": 0.0832, + "step": 9878 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010546584767659962, + "loss": 0.1374, + "step": 9879 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010543824170064888, + "loss": 0.0728, + "step": 9880 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010541063738003679, + "loss": 0.111, + "step": 9881 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010538303471578877, + "loss": 0.0364, + "step": 9882 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010535543370893022, + "loss": 0.0594, + "step": 9883 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010532783436048633, + "loss": 0.0658, + "step": 9884 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010530023667148239, + "loss": 0.079, + "step": 9885 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010527264064294359, + "loss": 0.0157, + "step": 9886 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010524504627589503, + "loss": 0.02, + "step": 9887 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010521745357136178, + "loss": 0.1038, + "step": 9888 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010518986253036878, + "loss": 0.0819, + "step": 9889 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010516227315394097, + "loss": 0.087, + "step": 9890 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010513468544310324, + "loss": 0.0555, + "step": 9891 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010510709939888038, + "loss": 0.0266, + "step": 9892 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010507951502229718, + "loss": 0.0139, + "step": 9893 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010505193231437821, + "loss": 0.0246, + "step": 9894 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010502435127614815, + "loss": 0.0345, + "step": 9895 + }, + { + "epoch": 3.0, + "learning_rate": 0.00010499677190863152, + "loss": 0.0588, + "step": 9896 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010496919421285288, + "loss": 0.0475, + "step": 9897 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010494161818983667, + "loss": 0.0604, + "step": 9898 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010491404384060716, + "loss": 0.0469, + "step": 9899 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001048864711661887, + "loss": 0.0385, + "step": 9900 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010485890016760554, + "loss": 0.0455, + "step": 9901 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010483133084588187, + "loss": 0.0968, + "step": 9902 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010480376320204184, + "loss": 0.0316, + "step": 9903 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010477619723710943, + "loss": 0.0619, + "step": 9904 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010474863295210867, + "loss": 0.0084, + "step": 9905 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010472107034806352, + "loss": 0.0429, + "step": 9906 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010469350942599781, + "loss": 0.0202, + "step": 9907 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010466595018693541, + "loss": 0.0182, + "step": 9908 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010463839263189998, + "loss": 0.0663, + "step": 9909 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010461083676191525, + "loss": 0.0354, + "step": 9910 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010458328257800482, + "loss": 0.0132, + "step": 9911 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010455573008119227, + "loss": 0.023, + "step": 9912 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001045281792725011, + "loss": 0.0411, + "step": 9913 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010450063015295468, + "loss": 0.059, + "step": 9914 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010447308272357646, + "loss": 0.0596, + "step": 9915 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010444553698538969, + "loss": 0.0656, + "step": 9916 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010441799293941762, + "loss": 0.0514, + "step": 9917 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010439045058668347, + "loss": 0.0471, + "step": 9918 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010436290992821035, + "loss": 0.0562, + "step": 9919 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010433537096502126, + "loss": 0.0308, + "step": 9920 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010430783369813923, + "loss": 0.0366, + "step": 9921 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001042802981285872, + "loss": 0.0294, + "step": 9922 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010425276425738802, + "loss": 0.0456, + "step": 9923 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010422523208556455, + "loss": 0.037, + "step": 9924 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001041977016141394, + "loss": 0.0706, + "step": 9925 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010417017284413534, + "loss": 0.0646, + "step": 9926 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010414264577657498, + "loss": 0.0563, + "step": 9927 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010411512041248084, + "loss": 0.0561, + "step": 9928 + }, + { + "epoch": 3.01, + "learning_rate": 0.00010408759675287544, + "loss": 0.0397, + "step": 9929 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010406007479878116, + "loss": 0.0636, + "step": 9930 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010403255455122037, + "loss": 0.0172, + "step": 9931 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010400503601121539, + "loss": 0.0397, + "step": 9932 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010397751917978843, + "loss": 0.0482, + "step": 9933 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010395000405796176, + "loss": 0.0266, + "step": 9934 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010392249064675734, + "loss": 0.0234, + "step": 9935 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010389497894719724, + "loss": 0.0366, + "step": 9936 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010386746896030347, + "loss": 0.0503, + "step": 9937 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010383996068709794, + "loss": 0.0258, + "step": 9938 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010381245412860262, + "loss": 0.0374, + "step": 9939 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010378494928583908, + "loss": 0.0235, + "step": 9940 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010375744615982916, + "loss": 0.046, + "step": 9941 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010372994475159452, + "loss": 0.049, + "step": 9942 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010370244506215673, + "loss": 0.1063, + "step": 9943 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010367494709253738, + "loss": 0.0537, + "step": 9944 + }, + { + "epoch": 3.02, + "learning_rate": 0.0001036474508437579, + "loss": 0.0539, + "step": 9945 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010361995631683965, + "loss": 0.0579, + "step": 9946 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010359246351280402, + "loss": 0.0288, + "step": 9947 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010356497243267228, + "loss": 0.0364, + "step": 9948 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010353748307746572, + "loss": 0.0579, + "step": 9949 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010350999544820535, + "loss": 0.0378, + "step": 9950 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010348250954591232, + "loss": 0.057, + "step": 9951 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010345502537160765, + "loss": 0.0235, + "step": 9952 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010342754292631228, + "loss": 0.0394, + "step": 9953 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010340006221104718, + "loss": 0.0487, + "step": 9954 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010337258322683305, + "loss": 0.0213, + "step": 9955 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010334510597469073, + "loss": 0.0655, + "step": 9956 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010331763045564091, + "loss": 0.0506, + "step": 9957 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010329015667070424, + "loss": 0.0472, + "step": 9958 + }, + { + "epoch": 3.02, + "learning_rate": 0.0001032626846209013, + "loss": 0.0731, + "step": 9959 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010323521430725251, + "loss": 0.0355, + "step": 9960 + }, + { + "epoch": 3.02, + "learning_rate": 0.00010320774573077839, + "loss": 0.0671, + "step": 9961 + }, + { + "epoch": 3.02, + "learning_rate": 0.0001031802788924993, + "loss": 0.0461, + "step": 9962 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010315281379343554, + "loss": 0.0507, + "step": 9963 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010312535043460745, + "loss": 0.0551, + "step": 9964 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010309788881703507, + "loss": 0.0377, + "step": 9965 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010307042894173857, + "loss": 0.03, + "step": 9966 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010304297080973801, + "loss": 0.0462, + "step": 9967 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001030155144220534, + "loss": 0.0359, + "step": 9968 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010298805977970468, + "loss": 0.0314, + "step": 9969 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010296060688371165, + "loss": 0.0599, + "step": 9970 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010293315573509412, + "loss": 0.0605, + "step": 9971 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010290570633487182, + "loss": 0.0275, + "step": 9972 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010287825868406444, + "loss": 0.0987, + "step": 9973 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010285081278369163, + "loss": 0.0286, + "step": 9974 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010282336863477281, + "loss": 0.0165, + "step": 9975 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001027959262383275, + "loss": 0.0138, + "step": 9976 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001027684855953751, + "loss": 0.0561, + "step": 9977 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001027410467069349, + "loss": 0.0522, + "step": 9978 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010271360957402628, + "loss": 0.0679, + "step": 9979 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010268617419766847, + "loss": 0.0525, + "step": 9980 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010265874057888045, + "loss": 0.0612, + "step": 9981 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010263130871868142, + "loss": 0.0304, + "step": 9982 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010260387861809034, + "loss": 0.0369, + "step": 9983 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010257645027812622, + "loss": 0.0508, + "step": 9984 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001025490236998079, + "loss": 0.0187, + "step": 9985 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010252159888415417, + "loss": 0.0398, + "step": 9986 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010249417583218384, + "loss": 0.0249, + "step": 9987 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010246675454491554, + "loss": 0.0441, + "step": 9988 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010243933502336792, + "loss": 0.0255, + "step": 9989 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010241191726855962, + "loss": 0.0259, + "step": 9990 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010238450128150895, + "loss": 0.0358, + "step": 9991 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010235708706323444, + "loss": 0.0317, + "step": 9992 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010232967461475443, + "loss": 0.0206, + "step": 9993 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010230226393708722, + "loss": 0.0351, + "step": 9994 + }, + { + "epoch": 3.03, + "learning_rate": 0.00010227485503125107, + "loss": 0.0539, + "step": 9995 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010224744789826406, + "loss": 0.0538, + "step": 9996 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010222004253914433, + "loss": 0.0565, + "step": 9997 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010219263895490992, + "loss": 0.0585, + "step": 9998 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010216523714657878, + "loss": 0.071, + "step": 9999 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010213783711516882, + "loss": 0.083, + "step": 10000 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010211043886169782, + "loss": 0.0698, + "step": 10001 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010208304238718358, + "loss": 0.0224, + "step": 10002 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010205564769264383, + "loss": 0.0354, + "step": 10003 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010202825477909616, + "loss": 0.0607, + "step": 10004 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010200086364755819, + "loss": 0.0477, + "step": 10005 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010197347429904734, + "loss": 0.039, + "step": 10006 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010194608673458107, + "loss": 0.0508, + "step": 10007 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010191870095517676, + "loss": 0.0688, + "step": 10008 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010189131696185172, + "loss": 0.0405, + "step": 10009 + }, + { + "epoch": 3.04, + "learning_rate": 0.0001018639347556232, + "loss": 0.0525, + "step": 10010 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010183655433750834, + "loss": 0.035, + "step": 10011 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010180917570852423, + "loss": 0.0406, + "step": 10012 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010178179886968791, + "loss": 0.0278, + "step": 10013 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010175442382201638, + "loss": 0.041, + "step": 10014 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010172705056652659, + "loss": 0.0217, + "step": 10015 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010169967910423524, + "loss": 0.0481, + "step": 10016 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010167230943615918, + "loss": 0.0504, + "step": 10017 + }, + { + "epoch": 3.04, + "learning_rate": 0.0001016449415633151, + "loss": 0.0625, + "step": 10018 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010161757548671961, + "loss": 0.0399, + "step": 10019 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010159021120738941, + "loss": 0.0155, + "step": 10020 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010156284872634082, + "loss": 0.0124, + "step": 10021 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010153548804459037, + "loss": 0.0555, + "step": 10022 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010150812916315443, + "loss": 0.0373, + "step": 10023 + }, + { + "epoch": 3.04, + "learning_rate": 0.0001014807720830493, + "loss": 0.0357, + "step": 10024 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010145341680529121, + "loss": 0.0465, + "step": 10025 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010142606333089631, + "loss": 0.0417, + "step": 10026 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010139871166088069, + "loss": 0.0194, + "step": 10027 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010137136179626045, + "loss": 0.0392, + "step": 10028 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010134401373805148, + "loss": 0.0127, + "step": 10029 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001013166674872698, + "loss": 0.0215, + "step": 10030 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001012893230449311, + "loss": 0.0366, + "step": 10031 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010126198041205118, + "loss": 0.0295, + "step": 10032 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001012346395896458, + "loss": 0.0336, + "step": 10033 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010120730057873053, + "loss": 0.017, + "step": 10034 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010117996338032102, + "loss": 0.0349, + "step": 10035 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010115262799543264, + "loss": 0.0289, + "step": 10036 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010112529442508089, + "loss": 0.0543, + "step": 10037 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010109796267028115, + "loss": 0.0723, + "step": 10038 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010107063273204869, + "loss": 0.0158, + "step": 10039 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010104330461139873, + "loss": 0.0326, + "step": 10040 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001010159783093465, + "loss": 0.0537, + "step": 10041 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010098865382690695, + "loss": 0.0606, + "step": 10042 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010096133116509522, + "loss": 0.0554, + "step": 10043 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010093401032492621, + "loss": 0.0733, + "step": 10044 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010090669130741485, + "loss": 0.0458, + "step": 10045 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010087937411357598, + "loss": 0.0437, + "step": 10046 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010085205874442429, + "loss": 0.0625, + "step": 10047 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010082474520097446, + "loss": 0.0599, + "step": 10048 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010079743348424117, + "loss": 0.0331, + "step": 10049 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010077012359523893, + "loss": 0.051, + "step": 10050 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010074281553498224, + "loss": 0.0669, + "step": 10051 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010071550930448551, + "loss": 0.0517, + "step": 10052 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010068820490476307, + "loss": 0.0938, + "step": 10053 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010066090233682921, + "loss": 0.033, + "step": 10054 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010063360160169814, + "loss": 0.0208, + "step": 10055 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001006063027003841, + "loss": 0.0426, + "step": 10056 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010057900563390098, + "loss": 0.0461, + "step": 10057 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010055171040326286, + "loss": 0.0243, + "step": 10058 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010052441700948371, + "loss": 0.0342, + "step": 10059 + }, + { + "epoch": 3.05, + "learning_rate": 0.00010049712545357739, + "loss": 0.0653, + "step": 10060 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001004698357365577, + "loss": 0.087, + "step": 10061 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010044254785943836, + "loss": 0.066, + "step": 10062 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010041526182323302, + "loss": 0.0426, + "step": 10063 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010038797762895531, + "loss": 0.0641, + "step": 10064 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010036069527761874, + "loss": 0.0245, + "step": 10065 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010033341477023681, + "loss": 0.054, + "step": 10066 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010030613610782283, + "loss": 0.0432, + "step": 10067 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001002788592913902, + "loss": 0.0601, + "step": 10068 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010025158432195211, + "loss": 0.0472, + "step": 10069 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001002243112005218, + "loss": 0.0247, + "step": 10070 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010019703992811241, + "loss": 0.0477, + "step": 10071 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010016977050573687, + "loss": 0.0321, + "step": 10072 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010014250293440825, + "loss": 0.0552, + "step": 10073 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010011523721513942, + "loss": 0.0681, + "step": 10074 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010008797334894325, + "loss": 0.0408, + "step": 10075 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010006071133683254, + "loss": 0.0463, + "step": 10076 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010003345117981993, + "loss": 0.0579, + "step": 10077 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010000619287891808, + "loss": 0.0453, + "step": 10078 + }, + { + "epoch": 3.06, + "learning_rate": 9.997893643513955e-05, + "loss": 0.0478, + "step": 10079 + }, + { + "epoch": 3.06, + "learning_rate": 9.995168184949685e-05, + "loss": 0.057, + "step": 10080 + }, + { + "epoch": 3.06, + "learning_rate": 9.992442912300247e-05, + "loss": 0.0639, + "step": 10081 + }, + { + "epoch": 3.06, + "learning_rate": 9.989717825666866e-05, + "loss": 0.0386, + "step": 10082 + }, + { + "epoch": 3.06, + "learning_rate": 9.986992925150768e-05, + "loss": 0.0461, + "step": 10083 + }, + { + "epoch": 3.06, + "learning_rate": 9.984268210853187e-05, + "loss": 0.037, + "step": 10084 + }, + { + "epoch": 3.06, + "learning_rate": 9.981543682875334e-05, + "loss": 0.0309, + "step": 10085 + }, + { + "epoch": 3.06, + "learning_rate": 9.978819341318423e-05, + "loss": 0.0375, + "step": 10086 + }, + { + "epoch": 3.06, + "learning_rate": 9.976095186283643e-05, + "loss": 0.0406, + "step": 10087 + }, + { + "epoch": 3.06, + "learning_rate": 9.973371217872195e-05, + "loss": 0.0082, + "step": 10088 + }, + { + "epoch": 3.06, + "learning_rate": 9.970647436185264e-05, + "loss": 0.014, + "step": 10089 + }, + { + "epoch": 3.06, + "learning_rate": 9.967923841324031e-05, + "loss": 0.0349, + "step": 10090 + }, + { + "epoch": 3.06, + "learning_rate": 9.965200433389678e-05, + "loss": 0.017, + "step": 10091 + }, + { + "epoch": 3.06, + "learning_rate": 9.962477212483359e-05, + "loss": 0.0735, + "step": 10092 + }, + { + "epoch": 3.06, + "learning_rate": 9.959754178706239e-05, + "loss": 0.0704, + "step": 10093 + }, + { + "epoch": 3.06, + "learning_rate": 9.957031332159471e-05, + "loss": 0.053, + "step": 10094 + }, + { + "epoch": 3.07, + "learning_rate": 9.954308672944202e-05, + "loss": 0.0383, + "step": 10095 + }, + { + "epoch": 3.07, + "learning_rate": 9.951586201161575e-05, + "loss": 0.0629, + "step": 10096 + }, + { + "epoch": 3.07, + "learning_rate": 9.948863916912708e-05, + "loss": 0.0261, + "step": 10097 + }, + { + "epoch": 3.07, + "learning_rate": 9.946141820298738e-05, + "loss": 0.0469, + "step": 10098 + }, + { + "epoch": 3.07, + "learning_rate": 9.943419911420774e-05, + "loss": 0.0508, + "step": 10099 + }, + { + "epoch": 3.07, + "learning_rate": 9.940698190379936e-05, + "loss": 0.0247, + "step": 10100 + }, + { + "epoch": 3.07, + "learning_rate": 9.937976657277326e-05, + "loss": 0.0573, + "step": 10101 + }, + { + "epoch": 3.07, + "learning_rate": 9.935255312214035e-05, + "loss": 0.025, + "step": 10102 + }, + { + "epoch": 3.07, + "learning_rate": 9.932534155291159e-05, + "loss": 0.039, + "step": 10103 + }, + { + "epoch": 3.07, + "learning_rate": 9.929813186609778e-05, + "loss": 0.0617, + "step": 10104 + }, + { + "epoch": 3.07, + "learning_rate": 9.927092406270967e-05, + "loss": 0.0354, + "step": 10105 + }, + { + "epoch": 3.07, + "learning_rate": 9.9243718143758e-05, + "loss": 0.048, + "step": 10106 + }, + { + "epoch": 3.07, + "learning_rate": 9.921651411025336e-05, + "loss": 0.0476, + "step": 10107 + }, + { + "epoch": 3.07, + "learning_rate": 9.918931196320629e-05, + "loss": 0.0418, + "step": 10108 + }, + { + "epoch": 3.07, + "learning_rate": 9.916211170362727e-05, + "loss": 0.0406, + "step": 10109 + }, + { + "epoch": 3.07, + "learning_rate": 9.913491333252673e-05, + "loss": 0.0439, + "step": 10110 + }, + { + "epoch": 3.07, + "learning_rate": 9.910771685091497e-05, + "loss": 0.0405, + "step": 10111 + }, + { + "epoch": 3.07, + "learning_rate": 9.908052225980237e-05, + "loss": 0.0494, + "step": 10112 + }, + { + "epoch": 3.07, + "learning_rate": 9.9053329560199e-05, + "loss": 0.0833, + "step": 10113 + }, + { + "epoch": 3.07, + "learning_rate": 9.902613875311502e-05, + "loss": 0.0459, + "step": 10114 + }, + { + "epoch": 3.07, + "learning_rate": 9.899894983956051e-05, + "loss": 0.0534, + "step": 10115 + }, + { + "epoch": 3.07, + "learning_rate": 9.897176282054546e-05, + "loss": 0.0473, + "step": 10116 + }, + { + "epoch": 3.07, + "learning_rate": 9.894457769707982e-05, + "loss": 0.0169, + "step": 10117 + }, + { + "epoch": 3.07, + "learning_rate": 9.891739447017334e-05, + "loss": 0.0383, + "step": 10118 + }, + { + "epoch": 3.07, + "learning_rate": 9.889021314083587e-05, + "loss": 0.0525, + "step": 10119 + }, + { + "epoch": 3.07, + "learning_rate": 9.886303371007711e-05, + "loss": 0.0546, + "step": 10120 + }, + { + "epoch": 3.07, + "learning_rate": 9.883585617890669e-05, + "loss": 0.0238, + "step": 10121 + }, + { + "epoch": 3.07, + "learning_rate": 9.880868054833425e-05, + "loss": 0.0715, + "step": 10122 + }, + { + "epoch": 3.07, + "learning_rate": 9.878150681936912e-05, + "loss": 0.0634, + "step": 10123 + }, + { + "epoch": 3.07, + "learning_rate": 9.875433499302079e-05, + "loss": 0.0312, + "step": 10124 + }, + { + "epoch": 3.07, + "learning_rate": 9.872716507029869e-05, + "loss": 0.0412, + "step": 10125 + }, + { + "epoch": 3.07, + "learning_rate": 9.869999705221203e-05, + "loss": 0.0705, + "step": 10126 + }, + { + "epoch": 3.07, + "learning_rate": 9.867283093977011e-05, + "loss": 0.0533, + "step": 10127 + }, + { + "epoch": 3.08, + "learning_rate": 9.864566673398194e-05, + "loss": 0.0947, + "step": 10128 + }, + { + "epoch": 3.08, + "learning_rate": 9.861850443585666e-05, + "loss": 0.0598, + "step": 10129 + }, + { + "epoch": 3.08, + "learning_rate": 9.859134404640327e-05, + "loss": 0.019, + "step": 10130 + }, + { + "epoch": 3.08, + "learning_rate": 9.856418556663068e-05, + "loss": 0.0447, + "step": 10131 + }, + { + "epoch": 3.08, + "learning_rate": 9.853702899754781e-05, + "loss": 0.0604, + "step": 10132 + }, + { + "epoch": 3.08, + "learning_rate": 9.850987434016337e-05, + "loss": 0.0814, + "step": 10133 + }, + { + "epoch": 3.08, + "learning_rate": 9.848272159548608e-05, + "loss": 0.0703, + "step": 10134 + }, + { + "epoch": 3.08, + "learning_rate": 9.845557076452461e-05, + "loss": 0.0376, + "step": 10135 + }, + { + "epoch": 3.08, + "learning_rate": 9.842842184828757e-05, + "loss": 0.042, + "step": 10136 + }, + { + "epoch": 3.08, + "learning_rate": 9.840127484778346e-05, + "loss": 0.0805, + "step": 10137 + }, + { + "epoch": 3.08, + "learning_rate": 9.837412976402062e-05, + "loss": 0.0405, + "step": 10138 + }, + { + "epoch": 3.08, + "learning_rate": 9.834698659800748e-05, + "loss": 0.01, + "step": 10139 + }, + { + "epoch": 3.08, + "learning_rate": 9.83198453507523e-05, + "loss": 0.034, + "step": 10140 + }, + { + "epoch": 3.08, + "learning_rate": 9.829270602326332e-05, + "loss": 0.0681, + "step": 10141 + }, + { + "epoch": 3.08, + "learning_rate": 9.826556861654871e-05, + "loss": 0.0906, + "step": 10142 + }, + { + "epoch": 3.08, + "learning_rate": 9.823843313161649e-05, + "loss": 0.027, + "step": 10143 + }, + { + "epoch": 3.08, + "learning_rate": 9.821129956947468e-05, + "loss": 0.0163, + "step": 10144 + }, + { + "epoch": 3.08, + "learning_rate": 9.818416793113123e-05, + "loss": 0.0349, + "step": 10145 + }, + { + "epoch": 3.08, + "learning_rate": 9.8157038217594e-05, + "loss": 0.0794, + "step": 10146 + }, + { + "epoch": 3.08, + "learning_rate": 9.81299104298708e-05, + "loss": 0.0497, + "step": 10147 + }, + { + "epoch": 3.08, + "learning_rate": 9.810278456896927e-05, + "loss": 0.0315, + "step": 10148 + }, + { + "epoch": 3.08, + "learning_rate": 9.807566063589711e-05, + "loss": 0.0208, + "step": 10149 + }, + { + "epoch": 3.08, + "learning_rate": 9.80485386316619e-05, + "loss": 0.0455, + "step": 10150 + }, + { + "epoch": 3.08, + "learning_rate": 9.802141855727113e-05, + "loss": 0.0293, + "step": 10151 + }, + { + "epoch": 3.08, + "learning_rate": 9.799430041373231e-05, + "loss": 0.0485, + "step": 10152 + }, + { + "epoch": 3.08, + "learning_rate": 9.796718420205265e-05, + "loss": 0.0448, + "step": 10153 + }, + { + "epoch": 3.08, + "learning_rate": 9.794006992323951e-05, + "loss": 0.0539, + "step": 10154 + }, + { + "epoch": 3.08, + "learning_rate": 9.791295757830012e-05, + "loss": 0.0302, + "step": 10155 + }, + { + "epoch": 3.08, + "learning_rate": 9.788584716824159e-05, + "loss": 0.0509, + "step": 10156 + }, + { + "epoch": 3.08, + "learning_rate": 9.785873869407106e-05, + "loss": 0.0385, + "step": 10157 + }, + { + "epoch": 3.08, + "learning_rate": 9.783163215679546e-05, + "loss": 0.0681, + "step": 10158 + }, + { + "epoch": 3.08, + "learning_rate": 9.780452755742174e-05, + "loss": 0.0439, + "step": 10159 + }, + { + "epoch": 3.08, + "learning_rate": 9.777742489695675e-05, + "loss": 0.0608, + "step": 10160 + }, + { + "epoch": 3.09, + "learning_rate": 9.775032417640728e-05, + "loss": 0.0499, + "step": 10161 + }, + { + "epoch": 3.09, + "learning_rate": 9.772322539678011e-05, + "loss": 0.0603, + "step": 10162 + }, + { + "epoch": 3.09, + "learning_rate": 9.769612855908178e-05, + "loss": 0.0201, + "step": 10163 + }, + { + "epoch": 3.09, + "learning_rate": 9.766903366431885e-05, + "loss": 0.061, + "step": 10164 + }, + { + "epoch": 3.09, + "learning_rate": 9.76419407134979e-05, + "loss": 0.0393, + "step": 10165 + }, + { + "epoch": 3.09, + "learning_rate": 9.761484970762526e-05, + "loss": 0.0191, + "step": 10166 + }, + { + "epoch": 3.09, + "learning_rate": 9.758776064770736e-05, + "loss": 0.0342, + "step": 10167 + }, + { + "epoch": 3.09, + "learning_rate": 9.756067353475055e-05, + "loss": 0.0322, + "step": 10168 + }, + { + "epoch": 3.09, + "learning_rate": 9.753358836976086e-05, + "loss": 0.0609, + "step": 10169 + }, + { + "epoch": 3.09, + "learning_rate": 9.75065051537445e-05, + "loss": 0.046, + "step": 10170 + }, + { + "epoch": 3.09, + "learning_rate": 9.747942388770755e-05, + "loss": 0.0232, + "step": 10171 + }, + { + "epoch": 3.09, + "learning_rate": 9.745234457265597e-05, + "loss": 0.0667, + "step": 10172 + }, + { + "epoch": 3.09, + "learning_rate": 9.742526720959574e-05, + "loss": 0.043, + "step": 10173 + }, + { + "epoch": 3.09, + "learning_rate": 9.739819179953261e-05, + "loss": 0.0174, + "step": 10174 + }, + { + "epoch": 3.09, + "learning_rate": 9.737111834347242e-05, + "loss": 0.0533, + "step": 10175 + }, + { + "epoch": 3.09, + "learning_rate": 9.734404684242085e-05, + "loss": 0.0434, + "step": 10176 + }, + { + "epoch": 3.09, + "learning_rate": 9.731697729738352e-05, + "loss": 0.0211, + "step": 10177 + }, + { + "epoch": 3.09, + "learning_rate": 9.728990970936606e-05, + "loss": 0.0496, + "step": 10178 + }, + { + "epoch": 3.09, + "learning_rate": 9.726284407937381e-05, + "loss": 0.0369, + "step": 10179 + }, + { + "epoch": 3.09, + "learning_rate": 9.723578040841226e-05, + "loss": 0.0508, + "step": 10180 + }, + { + "epoch": 3.09, + "learning_rate": 9.720871869748673e-05, + "loss": 0.0477, + "step": 10181 + }, + { + "epoch": 3.09, + "learning_rate": 9.71816589476025e-05, + "loss": 0.0311, + "step": 10182 + }, + { + "epoch": 3.09, + "learning_rate": 9.715460115976478e-05, + "loss": 0.065, + "step": 10183 + }, + { + "epoch": 3.09, + "learning_rate": 9.712754533497864e-05, + "loss": 0.0272, + "step": 10184 + }, + { + "epoch": 3.09, + "learning_rate": 9.710049147424912e-05, + "loss": 0.0317, + "step": 10185 + }, + { + "epoch": 3.09, + "learning_rate": 9.707343957858123e-05, + "loss": 0.0201, + "step": 10186 + }, + { + "epoch": 3.09, + "learning_rate": 9.704638964897985e-05, + "loss": 0.0551, + "step": 10187 + }, + { + "epoch": 3.09, + "learning_rate": 9.701934168644985e-05, + "loss": 0.0552, + "step": 10188 + }, + { + "epoch": 3.09, + "learning_rate": 9.699229569199591e-05, + "loss": 0.0287, + "step": 10189 + }, + { + "epoch": 3.09, + "learning_rate": 9.696525166662271e-05, + "loss": 0.0309, + "step": 10190 + }, + { + "epoch": 3.09, + "learning_rate": 9.693820961133492e-05, + "loss": 0.0484, + "step": 10191 + }, + { + "epoch": 3.09, + "learning_rate": 9.691116952713705e-05, + "loss": 0.0458, + "step": 10192 + }, + { + "epoch": 3.09, + "learning_rate": 9.688413141503358e-05, + "loss": 0.0191, + "step": 10193 + }, + { + "epoch": 3.1, + "learning_rate": 9.685709527602883e-05, + "loss": 0.0507, + "step": 10194 + }, + { + "epoch": 3.1, + "learning_rate": 9.683006111112712e-05, + "loss": 0.0274, + "step": 10195 + }, + { + "epoch": 3.1, + "learning_rate": 9.680302892133274e-05, + "loss": 0.0455, + "step": 10196 + }, + { + "epoch": 3.1, + "learning_rate": 9.677599870764983e-05, + "loss": 0.0422, + "step": 10197 + }, + { + "epoch": 3.1, + "learning_rate": 9.674897047108254e-05, + "loss": 0.0655, + "step": 10198 + }, + { + "epoch": 3.1, + "learning_rate": 9.67219442126348e-05, + "loss": 0.0298, + "step": 10199 + }, + { + "epoch": 3.1, + "learning_rate": 9.669491993331059e-05, + "loss": 0.0564, + "step": 10200 + }, + { + "epoch": 3.1, + "learning_rate": 9.666789763411378e-05, + "loss": 0.041, + "step": 10201 + }, + { + "epoch": 3.1, + "learning_rate": 9.66408773160482e-05, + "loss": 0.0557, + "step": 10202 + }, + { + "epoch": 3.1, + "learning_rate": 9.66138589801176e-05, + "loss": 0.1123, + "step": 10203 + }, + { + "epoch": 3.1, + "learning_rate": 9.658684262732554e-05, + "loss": 0.0598, + "step": 10204 + }, + { + "epoch": 3.1, + "learning_rate": 9.655982825867562e-05, + "loss": 0.0354, + "step": 10205 + }, + { + "epoch": 3.1, + "learning_rate": 9.653281587517138e-05, + "loss": 0.0363, + "step": 10206 + }, + { + "epoch": 3.1, + "learning_rate": 9.65058054778162e-05, + "loss": 0.0262, + "step": 10207 + }, + { + "epoch": 3.1, + "learning_rate": 9.647879706761359e-05, + "loss": 0.0692, + "step": 10208 + }, + { + "epoch": 3.1, + "learning_rate": 9.645179064556664e-05, + "loss": 0.0671, + "step": 10209 + }, + { + "epoch": 3.1, + "learning_rate": 9.642478621267863e-05, + "loss": 0.0331, + "step": 10210 + }, + { + "epoch": 3.1, + "learning_rate": 9.639778376995271e-05, + "loss": 0.065, + "step": 10211 + }, + { + "epoch": 3.1, + "learning_rate": 9.637078331839195e-05, + "loss": 0.0308, + "step": 10212 + }, + { + "epoch": 3.1, + "learning_rate": 9.634378485899935e-05, + "loss": 0.066, + "step": 10213 + }, + { + "epoch": 3.1, + "learning_rate": 9.631678839277776e-05, + "loss": 0.0774, + "step": 10214 + }, + { + "epoch": 3.1, + "learning_rate": 9.628979392073006e-05, + "loss": 0.0337, + "step": 10215 + }, + { + "epoch": 3.1, + "learning_rate": 9.626280144385901e-05, + "loss": 0.0371, + "step": 10216 + }, + { + "epoch": 3.1, + "learning_rate": 9.623581096316731e-05, + "loss": 0.0485, + "step": 10217 + }, + { + "epoch": 3.1, + "learning_rate": 9.620882247965762e-05, + "loss": 0.0714, + "step": 10218 + }, + { + "epoch": 3.1, + "learning_rate": 9.618183599433237e-05, + "loss": 0.0364, + "step": 10219 + }, + { + "epoch": 3.1, + "learning_rate": 9.61548515081941e-05, + "loss": 0.0437, + "step": 10220 + }, + { + "epoch": 3.1, + "learning_rate": 9.61278690222452e-05, + "loss": 0.0273, + "step": 10221 + }, + { + "epoch": 3.1, + "learning_rate": 9.610088853748799e-05, + "loss": 0.0308, + "step": 10222 + }, + { + "epoch": 3.1, + "learning_rate": 9.607391005492474e-05, + "loss": 0.0534, + "step": 10223 + }, + { + "epoch": 3.1, + "learning_rate": 9.604693357555755e-05, + "loss": 0.0304, + "step": 10224 + }, + { + "epoch": 3.1, + "learning_rate": 9.601995910038858e-05, + "loss": 0.0448, + "step": 10225 + }, + { + "epoch": 3.1, + "learning_rate": 9.599298663041982e-05, + "loss": 0.0446, + "step": 10226 + }, + { + "epoch": 3.11, + "learning_rate": 9.596601616665323e-05, + "loss": 0.024, + "step": 10227 + }, + { + "epoch": 3.11, + "learning_rate": 9.593904771009072e-05, + "loss": 0.1044, + "step": 10228 + }, + { + "epoch": 3.11, + "learning_rate": 9.591208126173406e-05, + "loss": 0.0765, + "step": 10229 + }, + { + "epoch": 3.11, + "learning_rate": 9.588511682258493e-05, + "loss": 0.034, + "step": 10230 + }, + { + "epoch": 3.11, + "learning_rate": 9.585815439364505e-05, + "loss": 0.0383, + "step": 10231 + }, + { + "epoch": 3.11, + "learning_rate": 9.583119397591593e-05, + "loss": 0.0559, + "step": 10232 + }, + { + "epoch": 3.11, + "learning_rate": 9.580423557039912e-05, + "loss": 0.0259, + "step": 10233 + }, + { + "epoch": 3.11, + "learning_rate": 9.577727917809609e-05, + "loss": 0.0345, + "step": 10234 + }, + { + "epoch": 3.11, + "learning_rate": 9.575032480000808e-05, + "loss": 0.0291, + "step": 10235 + }, + { + "epoch": 3.11, + "learning_rate": 9.57233724371364e-05, + "loss": 0.0351, + "step": 10236 + }, + { + "epoch": 3.11, + "learning_rate": 9.569642209048229e-05, + "loss": 0.0532, + "step": 10237 + }, + { + "epoch": 3.11, + "learning_rate": 9.566947376104683e-05, + "loss": 0.0385, + "step": 10238 + }, + { + "epoch": 3.11, + "learning_rate": 9.564252744983116e-05, + "loss": 0.0778, + "step": 10239 + }, + { + "epoch": 3.11, + "learning_rate": 9.561558315783615e-05, + "loss": 0.0485, + "step": 10240 + }, + { + "epoch": 3.11, + "learning_rate": 9.558864088606273e-05, + "loss": 0.0492, + "step": 10241 + }, + { + "epoch": 3.11, + "learning_rate": 9.556170063551174e-05, + "loss": 0.0241, + "step": 10242 + }, + { + "epoch": 3.11, + "learning_rate": 9.553476240718396e-05, + "loss": 0.0493, + "step": 10243 + }, + { + "epoch": 3.11, + "learning_rate": 9.550782620208008e-05, + "loss": 0.0302, + "step": 10244 + }, + { + "epoch": 3.11, + "learning_rate": 9.548089202120061e-05, + "loss": 0.0764, + "step": 10245 + }, + { + "epoch": 3.11, + "learning_rate": 9.54539598655461e-05, + "loss": 0.0398, + "step": 10246 + }, + { + "epoch": 3.11, + "learning_rate": 9.542702973611706e-05, + "loss": 0.0698, + "step": 10247 + }, + { + "epoch": 3.11, + "learning_rate": 9.540010163391376e-05, + "loss": 0.015, + "step": 10248 + }, + { + "epoch": 3.11, + "learning_rate": 9.537317555993669e-05, + "loss": 0.0534, + "step": 10249 + }, + { + "epoch": 3.11, + "learning_rate": 9.53462515151859e-05, + "loss": 0.0204, + "step": 10250 + }, + { + "epoch": 3.11, + "learning_rate": 9.53193295006616e-05, + "loss": 0.0735, + "step": 10251 + }, + { + "epoch": 3.11, + "learning_rate": 9.529240951736384e-05, + "loss": 0.0465, + "step": 10252 + }, + { + "epoch": 3.11, + "learning_rate": 9.526549156629262e-05, + "loss": 0.0253, + "step": 10253 + }, + { + "epoch": 3.11, + "learning_rate": 9.523857564844796e-05, + "loss": 0.055, + "step": 10254 + }, + { + "epoch": 3.11, + "learning_rate": 9.521166176482957e-05, + "loss": 0.0279, + "step": 10255 + }, + { + "epoch": 3.11, + "learning_rate": 9.518474991643727e-05, + "loss": 0.0312, + "step": 10256 + }, + { + "epoch": 3.11, + "learning_rate": 9.51578401042708e-05, + "loss": 0.0557, + "step": 10257 + }, + { + "epoch": 3.11, + "learning_rate": 9.513093232932971e-05, + "loss": 0.0361, + "step": 10258 + }, + { + "epoch": 3.11, + "learning_rate": 9.510402659261367e-05, + "loss": 0.0372, + "step": 10259 + }, + { + "epoch": 3.12, + "learning_rate": 9.507712289512199e-05, + "loss": 0.0256, + "step": 10260 + }, + { + "epoch": 3.12, + "learning_rate": 9.505022123785412e-05, + "loss": 0.0184, + "step": 10261 + }, + { + "epoch": 3.12, + "learning_rate": 9.502332162180943e-05, + "loss": 0.0706, + "step": 10262 + }, + { + "epoch": 3.12, + "learning_rate": 9.499642404798708e-05, + "loss": 0.0647, + "step": 10263 + }, + { + "epoch": 3.12, + "learning_rate": 9.496952851738634e-05, + "loss": 0.0384, + "step": 10264 + }, + { + "epoch": 3.12, + "learning_rate": 9.494263503100619e-05, + "loss": 0.046, + "step": 10265 + }, + { + "epoch": 3.12, + "learning_rate": 9.491574358984571e-05, + "loss": 0.0483, + "step": 10266 + }, + { + "epoch": 3.12, + "learning_rate": 9.488885419490382e-05, + "loss": 0.0116, + "step": 10267 + }, + { + "epoch": 3.12, + "learning_rate": 9.486196684717938e-05, + "loss": 0.0546, + "step": 10268 + }, + { + "epoch": 3.12, + "learning_rate": 9.483508154767124e-05, + "loss": 0.0557, + "step": 10269 + }, + { + "epoch": 3.12, + "learning_rate": 9.480819829737798e-05, + "loss": 0.0127, + "step": 10270 + }, + { + "epoch": 3.12, + "learning_rate": 9.47813170972983e-05, + "loss": 0.0677, + "step": 10271 + }, + { + "epoch": 3.12, + "learning_rate": 9.475443794843078e-05, + "loss": 0.0248, + "step": 10272 + }, + { + "epoch": 3.12, + "learning_rate": 9.47275608517739e-05, + "loss": 0.0336, + "step": 10273 + }, + { + "epoch": 3.12, + "learning_rate": 9.470068580832609e-05, + "loss": 0.0495, + "step": 10274 + }, + { + "epoch": 3.12, + "learning_rate": 9.467381281908556e-05, + "loss": 0.0439, + "step": 10275 + }, + { + "epoch": 3.12, + "learning_rate": 9.464694188505065e-05, + "loss": 0.0092, + "step": 10276 + }, + { + "epoch": 3.12, + "learning_rate": 9.462007300721957e-05, + "loss": 0.06, + "step": 10277 + }, + { + "epoch": 3.12, + "learning_rate": 9.459320618659032e-05, + "loss": 0.043, + "step": 10278 + }, + { + "epoch": 3.12, + "learning_rate": 9.456634142416105e-05, + "loss": 0.0481, + "step": 10279 + }, + { + "epoch": 3.12, + "learning_rate": 9.45394787209296e-05, + "loss": 0.0455, + "step": 10280 + }, + { + "epoch": 3.12, + "learning_rate": 9.451261807789387e-05, + "loss": 0.071, + "step": 10281 + }, + { + "epoch": 3.12, + "learning_rate": 9.448575949605166e-05, + "loss": 0.0354, + "step": 10282 + }, + { + "epoch": 3.12, + "learning_rate": 9.445890297640072e-05, + "loss": 0.0414, + "step": 10283 + }, + { + "epoch": 3.12, + "learning_rate": 9.44320485199387e-05, + "loss": 0.0444, + "step": 10284 + }, + { + "epoch": 3.12, + "learning_rate": 9.440519612766306e-05, + "loss": 0.0285, + "step": 10285 + }, + { + "epoch": 3.12, + "learning_rate": 9.437834580057135e-05, + "loss": 0.0785, + "step": 10286 + }, + { + "epoch": 3.12, + "learning_rate": 9.4351497539661e-05, + "loss": 0.0298, + "step": 10287 + }, + { + "epoch": 3.12, + "learning_rate": 9.432465134592931e-05, + "loss": 0.0554, + "step": 10288 + }, + { + "epoch": 3.12, + "learning_rate": 9.42978072203736e-05, + "loss": 0.0396, + "step": 10289 + }, + { + "epoch": 3.12, + "learning_rate": 9.427096516399098e-05, + "loss": 0.0141, + "step": 10290 + }, + { + "epoch": 3.12, + "learning_rate": 9.424412517777857e-05, + "loss": 0.044, + "step": 10291 + }, + { + "epoch": 3.12, + "learning_rate": 9.421728726273343e-05, + "loss": 0.0254, + "step": 10292 + }, + { + "epoch": 3.13, + "learning_rate": 9.419045141985246e-05, + "loss": 0.0398, + "step": 10293 + }, + { + "epoch": 3.13, + "learning_rate": 9.416361765013258e-05, + "loss": 0.0297, + "step": 10294 + }, + { + "epoch": 3.13, + "learning_rate": 9.413678595457056e-05, + "loss": 0.0722, + "step": 10295 + }, + { + "epoch": 3.13, + "learning_rate": 9.410995633416312e-05, + "loss": 0.0404, + "step": 10296 + }, + { + "epoch": 3.13, + "learning_rate": 9.408312878990691e-05, + "loss": 0.069, + "step": 10297 + }, + { + "epoch": 3.13, + "learning_rate": 9.405630332279847e-05, + "loss": 0.0513, + "step": 10298 + }, + { + "epoch": 3.13, + "learning_rate": 9.402947993383432e-05, + "loss": 0.054, + "step": 10299 + }, + { + "epoch": 3.13, + "learning_rate": 9.400265862401095e-05, + "loss": 0.0283, + "step": 10300 + }, + { + "epoch": 3.13, + "learning_rate": 9.39758393943245e-05, + "loss": 0.0626, + "step": 10301 + }, + { + "epoch": 3.13, + "learning_rate": 9.394902224577134e-05, + "loss": 0.0704, + "step": 10302 + }, + { + "epoch": 3.13, + "learning_rate": 9.392220717934762e-05, + "loss": 0.072, + "step": 10303 + }, + { + "epoch": 3.13, + "learning_rate": 9.389539419604948e-05, + "loss": 0.092, + "step": 10304 + }, + { + "epoch": 3.13, + "learning_rate": 9.386858329687294e-05, + "loss": 0.0266, + "step": 10305 + }, + { + "epoch": 3.13, + "learning_rate": 9.38417744828139e-05, + "loss": 0.0417, + "step": 10306 + }, + { + "epoch": 3.13, + "learning_rate": 9.381496775486826e-05, + "loss": 0.0448, + "step": 10307 + }, + { + "epoch": 3.13, + "learning_rate": 9.37881631140318e-05, + "loss": 0.0001, + "step": 10308 + }, + { + "epoch": 3.13, + "learning_rate": 9.376136056130024e-05, + "loss": 0.0217, + "step": 10309 + }, + { + "epoch": 3.13, + "learning_rate": 9.373456009766926e-05, + "loss": 0.0354, + "step": 10310 + }, + { + "epoch": 3.13, + "learning_rate": 9.370776172413435e-05, + "loss": 0.0301, + "step": 10311 + }, + { + "epoch": 3.13, + "learning_rate": 9.368096544169096e-05, + "loss": 0.0354, + "step": 10312 + }, + { + "epoch": 3.13, + "learning_rate": 9.365417125133459e-05, + "loss": 0.0309, + "step": 10313 + }, + { + "epoch": 3.13, + "learning_rate": 9.362737915406053e-05, + "loss": 0.027, + "step": 10314 + }, + { + "epoch": 3.13, + "learning_rate": 9.360058915086407e-05, + "loss": 0.0328, + "step": 10315 + }, + { + "epoch": 3.13, + "learning_rate": 9.357380124274028e-05, + "loss": 0.0879, + "step": 10316 + }, + { + "epoch": 3.13, + "learning_rate": 9.354701543068431e-05, + "loss": 0.0076, + "step": 10317 + }, + { + "epoch": 3.13, + "learning_rate": 9.352023171569115e-05, + "loss": 0.0245, + "step": 10318 + }, + { + "epoch": 3.13, + "learning_rate": 9.349345009875577e-05, + "loss": 0.0255, + "step": 10319 + }, + { + "epoch": 3.13, + "learning_rate": 9.346667058087306e-05, + "loss": 0.0476, + "step": 10320 + }, + { + "epoch": 3.13, + "learning_rate": 9.343989316303772e-05, + "loss": 0.013, + "step": 10321 + }, + { + "epoch": 3.13, + "learning_rate": 9.341311784624448e-05, + "loss": 0.0583, + "step": 10322 + }, + { + "epoch": 3.13, + "learning_rate": 9.338634463148796e-05, + "loss": 0.0381, + "step": 10323 + }, + { + "epoch": 3.13, + "learning_rate": 9.335957351976273e-05, + "loss": 0.0529, + "step": 10324 + }, + { + "epoch": 3.13, + "learning_rate": 9.33328045120633e-05, + "loss": 0.0457, + "step": 10325 + }, + { + "epoch": 3.14, + "learning_rate": 9.330603760938395e-05, + "loss": 0.0385, + "step": 10326 + }, + { + "epoch": 3.14, + "learning_rate": 9.327927281271906e-05, + "loss": 0.0352, + "step": 10327 + }, + { + "epoch": 3.14, + "learning_rate": 9.325251012306284e-05, + "loss": 0.0674, + "step": 10328 + }, + { + "epoch": 3.14, + "learning_rate": 9.322574954140948e-05, + "loss": 0.0684, + "step": 10329 + }, + { + "epoch": 3.14, + "learning_rate": 9.319899106875304e-05, + "loss": 0.0423, + "step": 10330 + }, + { + "epoch": 3.14, + "learning_rate": 9.31722347060875e-05, + "loss": 0.0366, + "step": 10331 + }, + { + "epoch": 3.14, + "learning_rate": 9.314548045440679e-05, + "loss": 0.0173, + "step": 10332 + }, + { + "epoch": 3.14, + "learning_rate": 9.311872831470475e-05, + "loss": 0.0198, + "step": 10333 + }, + { + "epoch": 3.14, + "learning_rate": 9.309197828797517e-05, + "loss": 0.0315, + "step": 10334 + }, + { + "epoch": 3.14, + "learning_rate": 9.306523037521171e-05, + "loss": 0.0711, + "step": 10335 + }, + { + "epoch": 3.14, + "learning_rate": 9.303848457740798e-05, + "loss": 0.036, + "step": 10336 + }, + { + "epoch": 3.14, + "learning_rate": 9.30117408955575e-05, + "loss": 0.0323, + "step": 10337 + }, + { + "epoch": 3.14, + "learning_rate": 9.298499933065373e-05, + "loss": 0.0159, + "step": 10338 + }, + { + "epoch": 3.14, + "learning_rate": 9.295825988369003e-05, + "loss": 0.0568, + "step": 10339 + }, + { + "epoch": 3.14, + "learning_rate": 9.29315225556598e-05, + "loss": 0.1114, + "step": 10340 + }, + { + "epoch": 3.14, + "learning_rate": 9.290478734755607e-05, + "loss": 0.0166, + "step": 10341 + }, + { + "epoch": 3.14, + "learning_rate": 9.287805426037203e-05, + "loss": 0.0655, + "step": 10342 + }, + { + "epoch": 3.14, + "learning_rate": 9.285132329510081e-05, + "loss": 0.0791, + "step": 10343 + }, + { + "epoch": 3.14, + "learning_rate": 9.282459445273532e-05, + "loss": 0.0235, + "step": 10344 + }, + { + "epoch": 3.14, + "learning_rate": 9.279786773426851e-05, + "loss": 0.056, + "step": 10345 + }, + { + "epoch": 3.14, + "learning_rate": 9.277114314069316e-05, + "loss": 0.0226, + "step": 10346 + }, + { + "epoch": 3.14, + "learning_rate": 9.2744420673002e-05, + "loss": 0.0519, + "step": 10347 + }, + { + "epoch": 3.14, + "learning_rate": 9.271770033218772e-05, + "loss": 0.0556, + "step": 10348 + }, + { + "epoch": 3.14, + "learning_rate": 9.269098211924289e-05, + "loss": 0.0167, + "step": 10349 + }, + { + "epoch": 3.14, + "learning_rate": 9.266426603516007e-05, + "loss": 0.0725, + "step": 10350 + }, + { + "epoch": 3.14, + "learning_rate": 9.26375520809316e-05, + "loss": 0.0306, + "step": 10351 + }, + { + "epoch": 3.14, + "learning_rate": 9.261084025754983e-05, + "loss": 0.0281, + "step": 10352 + }, + { + "epoch": 3.14, + "learning_rate": 9.258413056600701e-05, + "loss": 0.0482, + "step": 10353 + }, + { + "epoch": 3.14, + "learning_rate": 9.255742300729543e-05, + "loss": 0.0587, + "step": 10354 + }, + { + "epoch": 3.14, + "learning_rate": 9.253071758240712e-05, + "loss": 0.0965, + "step": 10355 + }, + { + "epoch": 3.14, + "learning_rate": 9.250401429233418e-05, + "loss": 0.0625, + "step": 10356 + }, + { + "epoch": 3.14, + "learning_rate": 9.247731313806846e-05, + "loss": 0.0727, + "step": 10357 + }, + { + "epoch": 3.14, + "learning_rate": 9.245061412060188e-05, + "loss": 0.0481, + "step": 10358 + }, + { + "epoch": 3.15, + "learning_rate": 9.24239172409262e-05, + "loss": 0.0334, + "step": 10359 + }, + { + "epoch": 3.15, + "learning_rate": 9.239722250003316e-05, + "loss": 0.0396, + "step": 10360 + }, + { + "epoch": 3.15, + "learning_rate": 9.23705298989144e-05, + "loss": 0.0193, + "step": 10361 + }, + { + "epoch": 3.15, + "learning_rate": 9.234383943856146e-05, + "loss": 0.0501, + "step": 10362 + }, + { + "epoch": 3.15, + "learning_rate": 9.231715111996579e-05, + "loss": 0.0469, + "step": 10363 + }, + { + "epoch": 3.15, + "learning_rate": 9.22904649441188e-05, + "loss": 0.044, + "step": 10364 + }, + { + "epoch": 3.15, + "learning_rate": 9.226378091201182e-05, + "loss": 0.0619, + "step": 10365 + }, + { + "epoch": 3.15, + "learning_rate": 9.223709902463611e-05, + "loss": 0.0304, + "step": 10366 + }, + { + "epoch": 3.15, + "learning_rate": 9.221041928298272e-05, + "loss": 0.061, + "step": 10367 + }, + { + "epoch": 3.15, + "learning_rate": 9.218374168804282e-05, + "loss": 0.0319, + "step": 10368 + }, + { + "epoch": 3.15, + "learning_rate": 9.215706624080736e-05, + "loss": 0.0319, + "step": 10369 + }, + { + "epoch": 3.15, + "learning_rate": 9.213039294226724e-05, + "loss": 0.0423, + "step": 10370 + }, + { + "epoch": 3.15, + "learning_rate": 9.210372179341338e-05, + "loss": 0.059, + "step": 10371 + }, + { + "epoch": 3.15, + "learning_rate": 9.207705279523644e-05, + "loss": 0.052, + "step": 10372 + }, + { + "epoch": 3.15, + "learning_rate": 9.205038594872712e-05, + "loss": 0.0338, + "step": 10373 + }, + { + "epoch": 3.15, + "learning_rate": 9.202372125487602e-05, + "loss": 0.031, + "step": 10374 + }, + { + "epoch": 3.15, + "learning_rate": 9.199705871467369e-05, + "loss": 0.0449, + "step": 10375 + }, + { + "epoch": 3.15, + "learning_rate": 9.197039832911056e-05, + "loss": 0.0317, + "step": 10376 + }, + { + "epoch": 3.15, + "learning_rate": 9.19437400991769e-05, + "loss": 0.0099, + "step": 10377 + }, + { + "epoch": 3.15, + "learning_rate": 9.191708402586307e-05, + "loss": 0.0257, + "step": 10378 + }, + { + "epoch": 3.15, + "learning_rate": 9.189043011015926e-05, + "loss": 0.0155, + "step": 10379 + }, + { + "epoch": 3.15, + "learning_rate": 9.186377835305552e-05, + "loss": 0.0552, + "step": 10380 + }, + { + "epoch": 3.15, + "learning_rate": 9.183712875554203e-05, + "loss": 0.0558, + "step": 10381 + }, + { + "epoch": 3.15, + "learning_rate": 9.181048131860858e-05, + "loss": 0.0169, + "step": 10382 + }, + { + "epoch": 3.15, + "learning_rate": 9.178383604324509e-05, + "loss": 0.0118, + "step": 10383 + }, + { + "epoch": 3.15, + "learning_rate": 9.175719293044138e-05, + "loss": 0.0509, + "step": 10384 + }, + { + "epoch": 3.15, + "learning_rate": 9.173055198118718e-05, + "loss": 0.0132, + "step": 10385 + }, + { + "epoch": 3.15, + "learning_rate": 9.170391319647211e-05, + "loss": 0.0345, + "step": 10386 + }, + { + "epoch": 3.15, + "learning_rate": 9.167727657728567e-05, + "loss": 0.0511, + "step": 10387 + }, + { + "epoch": 3.15, + "learning_rate": 9.16506421246174e-05, + "loss": 0.0899, + "step": 10388 + }, + { + "epoch": 3.15, + "learning_rate": 9.162400983945667e-05, + "loss": 0.0155, + "step": 10389 + }, + { + "epoch": 3.15, + "learning_rate": 9.159737972279278e-05, + "loss": 0.0321, + "step": 10390 + }, + { + "epoch": 3.16, + "learning_rate": 9.157075177561502e-05, + "loss": 0.0222, + "step": 10391 + }, + { + "epoch": 3.16, + "learning_rate": 9.154412599891244e-05, + "loss": 0.029, + "step": 10392 + }, + { + "epoch": 3.16, + "learning_rate": 9.151750239367415e-05, + "loss": 0.044, + "step": 10393 + }, + { + "epoch": 3.16, + "learning_rate": 9.149088096088911e-05, + "loss": 0.042, + "step": 10394 + }, + { + "epoch": 3.16, + "learning_rate": 9.146426170154632e-05, + "loss": 0.0618, + "step": 10395 + }, + { + "epoch": 3.16, + "learning_rate": 9.143764461663459e-05, + "loss": 0.0648, + "step": 10396 + }, + { + "epoch": 3.16, + "learning_rate": 9.141102970714259e-05, + "loss": 0.0529, + "step": 10397 + }, + { + "epoch": 3.16, + "learning_rate": 9.138441697405903e-05, + "loss": 0.02, + "step": 10398 + }, + { + "epoch": 3.16, + "learning_rate": 9.135780641837247e-05, + "loss": 0.0286, + "step": 10399 + }, + { + "epoch": 3.16, + "learning_rate": 9.133119804107144e-05, + "loss": 0.0397, + "step": 10400 + }, + { + "epoch": 3.16, + "learning_rate": 9.130459184314438e-05, + "loss": 0.0427, + "step": 10401 + }, + { + "epoch": 3.16, + "learning_rate": 9.12779878255796e-05, + "loss": 0.0212, + "step": 10402 + }, + { + "epoch": 3.16, + "learning_rate": 9.125138598936535e-05, + "loss": 0.0277, + "step": 10403 + }, + { + "epoch": 3.16, + "learning_rate": 9.122478633548985e-05, + "loss": 0.0431, + "step": 10404 + }, + { + "epoch": 3.16, + "learning_rate": 9.119818886494117e-05, + "loss": 0.0832, + "step": 10405 + }, + { + "epoch": 3.16, + "learning_rate": 9.11715935787074e-05, + "loss": 0.0631, + "step": 10406 + }, + { + "epoch": 3.16, + "learning_rate": 9.114500047777637e-05, + "loss": 0.0199, + "step": 10407 + }, + { + "epoch": 3.16, + "learning_rate": 9.111840956313596e-05, + "loss": 0.036, + "step": 10408 + }, + { + "epoch": 3.16, + "learning_rate": 9.109182083577397e-05, + "loss": 0.0655, + "step": 10409 + }, + { + "epoch": 3.16, + "learning_rate": 9.10652342966781e-05, + "loss": 0.0118, + "step": 10410 + }, + { + "epoch": 3.16, + "learning_rate": 9.103864994683597e-05, + "loss": 0.067, + "step": 10411 + }, + { + "epoch": 3.16, + "learning_rate": 9.101206778723507e-05, + "loss": 0.0305, + "step": 10412 + }, + { + "epoch": 3.16, + "learning_rate": 9.098548781886287e-05, + "loss": 0.0532, + "step": 10413 + }, + { + "epoch": 3.16, + "learning_rate": 9.095891004270674e-05, + "loss": 0.0574, + "step": 10414 + }, + { + "epoch": 3.16, + "learning_rate": 9.093233445975396e-05, + "loss": 0.0384, + "step": 10415 + }, + { + "epoch": 3.16, + "learning_rate": 9.090576107099174e-05, + "loss": 0.0534, + "step": 10416 + }, + { + "epoch": 3.16, + "learning_rate": 9.087918987740724e-05, + "loss": 0.0478, + "step": 10417 + }, + { + "epoch": 3.16, + "learning_rate": 9.085262087998743e-05, + "loss": 0.0193, + "step": 10418 + }, + { + "epoch": 3.16, + "learning_rate": 9.08260540797193e-05, + "loss": 0.0217, + "step": 10419 + }, + { + "epoch": 3.16, + "learning_rate": 9.079948947758974e-05, + "loss": 0.038, + "step": 10420 + }, + { + "epoch": 3.16, + "learning_rate": 9.077292707458555e-05, + "loss": 0.0173, + "step": 10421 + }, + { + "epoch": 3.16, + "learning_rate": 9.07463668716935e-05, + "loss": 0.0203, + "step": 10422 + }, + { + "epoch": 3.16, + "learning_rate": 9.071980886990008e-05, + "loss": 0.0203, + "step": 10423 + }, + { + "epoch": 3.17, + "learning_rate": 9.069325307019195e-05, + "loss": 0.0589, + "step": 10424 + }, + { + "epoch": 3.17, + "learning_rate": 9.066669947355554e-05, + "loss": 0.0455, + "step": 10425 + }, + { + "epoch": 3.17, + "learning_rate": 9.064014808097725e-05, + "loss": 0.073, + "step": 10426 + }, + { + "epoch": 3.17, + "learning_rate": 9.061359889344342e-05, + "loss": 0.0205, + "step": 10427 + }, + { + "epoch": 3.17, + "learning_rate": 9.05870519119402e-05, + "loss": 0.0526, + "step": 10428 + }, + { + "epoch": 3.17, + "learning_rate": 9.056050713745378e-05, + "loss": 0.0267, + "step": 10429 + }, + { + "epoch": 3.17, + "learning_rate": 9.053396457097022e-05, + "loss": 0.0418, + "step": 10430 + }, + { + "epoch": 3.17, + "learning_rate": 9.05074242134755e-05, + "loss": 0.0533, + "step": 10431 + }, + { + "epoch": 3.17, + "learning_rate": 9.048088606595555e-05, + "loss": 0.0608, + "step": 10432 + }, + { + "epoch": 3.17, + "learning_rate": 9.04543501293961e-05, + "loss": 0.0828, + "step": 10433 + }, + { + "epoch": 3.17, + "learning_rate": 9.042781640478291e-05, + "loss": 0.056, + "step": 10434 + }, + { + "epoch": 3.17, + "learning_rate": 9.040128489310164e-05, + "loss": 0.0396, + "step": 10435 + }, + { + "epoch": 3.17, + "learning_rate": 9.037475559533788e-05, + "loss": 0.0424, + "step": 10436 + }, + { + "epoch": 3.17, + "learning_rate": 9.034822851247717e-05, + "loss": 0.0337, + "step": 10437 + }, + { + "epoch": 3.17, + "learning_rate": 9.032170364550477e-05, + "loss": 0.0176, + "step": 10438 + }, + { + "epoch": 3.17, + "learning_rate": 9.029518099540608e-05, + "loss": 0.0127, + "step": 10439 + }, + { + "epoch": 3.17, + "learning_rate": 9.026866056316635e-05, + "loss": 0.0156, + "step": 10440 + }, + { + "epoch": 3.17, + "learning_rate": 9.024214234977069e-05, + "loss": 0.044, + "step": 10441 + }, + { + "epoch": 3.17, + "learning_rate": 9.021562635620425e-05, + "loss": 0.0364, + "step": 10442 + }, + { + "epoch": 3.17, + "learning_rate": 9.018911258345193e-05, + "loss": 0.0359, + "step": 10443 + }, + { + "epoch": 3.17, + "learning_rate": 9.016260103249871e-05, + "loss": 0.0227, + "step": 10444 + }, + { + "epoch": 3.17, + "learning_rate": 9.013609170432937e-05, + "loss": 0.0388, + "step": 10445 + }, + { + "epoch": 3.17, + "learning_rate": 9.010958459992869e-05, + "loss": 0.0301, + "step": 10446 + }, + { + "epoch": 3.17, + "learning_rate": 9.008307972028136e-05, + "loss": 0.0467, + "step": 10447 + }, + { + "epoch": 3.17, + "learning_rate": 9.005657706637187e-05, + "loss": 0.0411, + "step": 10448 + }, + { + "epoch": 3.17, + "learning_rate": 9.003007663918473e-05, + "loss": 0.0605, + "step": 10449 + }, + { + "epoch": 3.17, + "learning_rate": 9.000357843970439e-05, + "loss": 0.0968, + "step": 10450 + }, + { + "epoch": 3.17, + "learning_rate": 8.997708246891518e-05, + "loss": 0.0561, + "step": 10451 + }, + { + "epoch": 3.17, + "learning_rate": 8.995058872780139e-05, + "loss": 0.0712, + "step": 10452 + }, + { + "epoch": 3.17, + "learning_rate": 8.992409721734708e-05, + "loss": 0.0589, + "step": 10453 + }, + { + "epoch": 3.17, + "learning_rate": 8.989760793853641e-05, + "loss": 0.0462, + "step": 10454 + }, + { + "epoch": 3.17, + "learning_rate": 8.987112089235333e-05, + "loss": 0.0498, + "step": 10455 + }, + { + "epoch": 3.17, + "learning_rate": 8.98446360797818e-05, + "loss": 0.0486, + "step": 10456 + }, + { + "epoch": 3.18, + "learning_rate": 8.981815350180568e-05, + "loss": 0.0586, + "step": 10457 + }, + { + "epoch": 3.18, + "learning_rate": 8.97916731594086e-05, + "loss": 0.0556, + "step": 10458 + }, + { + "epoch": 3.18, + "learning_rate": 8.976519505357432e-05, + "loss": 0.0449, + "step": 10459 + }, + { + "epoch": 3.18, + "learning_rate": 8.973871918528642e-05, + "loss": 0.0518, + "step": 10460 + }, + { + "epoch": 3.18, + "learning_rate": 8.97122455555284e-05, + "loss": 0.0768, + "step": 10461 + }, + { + "epoch": 3.18, + "learning_rate": 8.968577416528368e-05, + "loss": 0.0497, + "step": 10462 + }, + { + "epoch": 3.18, + "learning_rate": 8.965930501553556e-05, + "loss": 0.0599, + "step": 10463 + }, + { + "epoch": 3.18, + "learning_rate": 8.963283810726731e-05, + "loss": 0.0379, + "step": 10464 + }, + { + "epoch": 3.18, + "learning_rate": 8.960637344146208e-05, + "loss": 0.0606, + "step": 10465 + }, + { + "epoch": 3.18, + "learning_rate": 8.957991101910297e-05, + "loss": 0.0552, + "step": 10466 + }, + { + "epoch": 3.18, + "learning_rate": 8.955345084117303e-05, + "loss": 0.0431, + "step": 10467 + }, + { + "epoch": 3.18, + "learning_rate": 8.95269929086551e-05, + "loss": 0.0554, + "step": 10468 + }, + { + "epoch": 3.18, + "learning_rate": 8.950053722253205e-05, + "loss": 0.0224, + "step": 10469 + }, + { + "epoch": 3.18, + "learning_rate": 8.947408378378663e-05, + "loss": 0.0491, + "step": 10470 + }, + { + "epoch": 3.18, + "learning_rate": 8.94476325934015e-05, + "loss": 0.0499, + "step": 10471 + }, + { + "epoch": 3.18, + "learning_rate": 8.942118365235931e-05, + "loss": 0.0376, + "step": 10472 + }, + { + "epoch": 3.18, + "learning_rate": 8.939473696164245e-05, + "loss": 0.0506, + "step": 10473 + }, + { + "epoch": 3.18, + "learning_rate": 8.936829252223338e-05, + "loss": 0.0184, + "step": 10474 + }, + { + "epoch": 3.18, + "learning_rate": 8.934185033511444e-05, + "loss": 0.0206, + "step": 10475 + }, + { + "epoch": 3.18, + "learning_rate": 8.931541040126788e-05, + "loss": 0.0462, + "step": 10476 + }, + { + "epoch": 3.18, + "learning_rate": 8.928897272167592e-05, + "loss": 0.0268, + "step": 10477 + }, + { + "epoch": 3.18, + "learning_rate": 8.926253729732054e-05, + "loss": 0.0706, + "step": 10478 + }, + { + "epoch": 3.18, + "learning_rate": 8.92361041291838e-05, + "loss": 0.0006, + "step": 10479 + }, + { + "epoch": 3.18, + "learning_rate": 8.920967321824761e-05, + "loss": 0.0174, + "step": 10480 + }, + { + "epoch": 3.18, + "learning_rate": 8.918324456549377e-05, + "loss": 0.0362, + "step": 10481 + }, + { + "epoch": 3.18, + "learning_rate": 8.915681817190404e-05, + "loss": 0.0365, + "step": 10482 + }, + { + "epoch": 3.18, + "learning_rate": 8.913039403846017e-05, + "loss": 0.0784, + "step": 10483 + }, + { + "epoch": 3.18, + "learning_rate": 8.910397216614358e-05, + "loss": 0.0652, + "step": 10484 + }, + { + "epoch": 3.18, + "learning_rate": 8.907755255593589e-05, + "loss": 0.0414, + "step": 10485 + }, + { + "epoch": 3.18, + "learning_rate": 8.905113520881846e-05, + "loss": 0.0381, + "step": 10486 + }, + { + "epoch": 3.18, + "learning_rate": 8.90247201257726e-05, + "loss": 0.0596, + "step": 10487 + }, + { + "epoch": 3.18, + "learning_rate": 8.899830730777966e-05, + "loss": 0.0304, + "step": 10488 + }, + { + "epoch": 3.18, + "learning_rate": 8.897189675582066e-05, + "loss": 0.055, + "step": 10489 + }, + { + "epoch": 3.19, + "learning_rate": 8.894548847087674e-05, + "loss": 0.0341, + "step": 10490 + }, + { + "epoch": 3.19, + "learning_rate": 8.891908245392886e-05, + "loss": 0.0715, + "step": 10491 + }, + { + "epoch": 3.19, + "learning_rate": 8.889267870595796e-05, + "loss": 0.0291, + "step": 10492 + }, + { + "epoch": 3.19, + "learning_rate": 8.886627722794488e-05, + "loss": 0.0247, + "step": 10493 + }, + { + "epoch": 3.19, + "learning_rate": 8.88398780208703e-05, + "loss": 0.0544, + "step": 10494 + }, + { + "epoch": 3.19, + "learning_rate": 8.881348108571489e-05, + "loss": 0.0484, + "step": 10495 + }, + { + "epoch": 3.19, + "learning_rate": 8.878708642345924e-05, + "loss": 0.0424, + "step": 10496 + }, + { + "epoch": 3.19, + "learning_rate": 8.876069403508381e-05, + "loss": 0.0666, + "step": 10497 + }, + { + "epoch": 3.19, + "learning_rate": 8.873430392156909e-05, + "loss": 0.0467, + "step": 10498 + }, + { + "epoch": 3.19, + "learning_rate": 8.870791608389524e-05, + "loss": 0.0624, + "step": 10499 + }, + { + "epoch": 3.19, + "learning_rate": 8.868153052304257e-05, + "loss": 0.0294, + "step": 10500 + }, + { + "epoch": 3.19, + "learning_rate": 8.865514723999125e-05, + "loss": 0.0529, + "step": 10501 + }, + { + "epoch": 3.19, + "learning_rate": 8.86287662357213e-05, + "loss": 0.0475, + "step": 10502 + }, + { + "epoch": 3.19, + "learning_rate": 8.860238751121279e-05, + "loss": 0.044, + "step": 10503 + }, + { + "epoch": 3.19, + "learning_rate": 8.857601106744546e-05, + "loss": 0.0438, + "step": 10504 + }, + { + "epoch": 3.19, + "learning_rate": 8.854963690539921e-05, + "loss": 0.0299, + "step": 10505 + }, + { + "epoch": 3.19, + "learning_rate": 8.852326502605373e-05, + "loss": 0.0491, + "step": 10506 + }, + { + "epoch": 3.19, + "learning_rate": 8.849689543038869e-05, + "loss": 0.0298, + "step": 10507 + }, + { + "epoch": 3.19, + "learning_rate": 8.847052811938367e-05, + "loss": 0.0727, + "step": 10508 + }, + { + "epoch": 3.19, + "learning_rate": 8.844416309401805e-05, + "loss": 0.0394, + "step": 10509 + }, + { + "epoch": 3.19, + "learning_rate": 8.841780035527125e-05, + "loss": 0.0281, + "step": 10510 + }, + { + "epoch": 3.19, + "learning_rate": 8.839143990412258e-05, + "loss": 0.0374, + "step": 10511 + }, + { + "epoch": 3.19, + "learning_rate": 8.836508174155128e-05, + "loss": 0.0979, + "step": 10512 + }, + { + "epoch": 3.19, + "learning_rate": 8.833872586853648e-05, + "loss": 0.0379, + "step": 10513 + }, + { + "epoch": 3.19, + "learning_rate": 8.831237228605716e-05, + "loss": 0.0807, + "step": 10514 + }, + { + "epoch": 3.19, + "learning_rate": 8.82860209950923e-05, + "loss": 0.0693, + "step": 10515 + }, + { + "epoch": 3.19, + "learning_rate": 8.825967199662077e-05, + "loss": 0.0398, + "step": 10516 + }, + { + "epoch": 3.19, + "learning_rate": 8.823332529162139e-05, + "loss": 0.0633, + "step": 10517 + }, + { + "epoch": 3.19, + "learning_rate": 8.820698088107287e-05, + "loss": 0.0644, + "step": 10518 + }, + { + "epoch": 3.19, + "learning_rate": 8.818063876595379e-05, + "loss": 0.0254, + "step": 10519 + }, + { + "epoch": 3.19, + "learning_rate": 8.815429894724268e-05, + "loss": 0.0335, + "step": 10520 + }, + { + "epoch": 3.19, + "learning_rate": 8.812796142591801e-05, + "loss": 0.02, + "step": 10521 + }, + { + "epoch": 3.19, + "learning_rate": 8.810162620295814e-05, + "loss": 0.0252, + "step": 10522 + }, + { + "epoch": 3.2, + "learning_rate": 8.807529327934136e-05, + "loss": 0.0422, + "step": 10523 + }, + { + "epoch": 3.2, + "learning_rate": 8.804896265604581e-05, + "loss": 0.0441, + "step": 10524 + }, + { + "epoch": 3.2, + "learning_rate": 8.802263433404962e-05, + "loss": 0.0496, + "step": 10525 + }, + { + "epoch": 3.2, + "learning_rate": 8.799630831433083e-05, + "loss": 0.0554, + "step": 10526 + }, + { + "epoch": 3.2, + "learning_rate": 8.796998459786736e-05, + "loss": 0.0267, + "step": 10527 + }, + { + "epoch": 3.2, + "learning_rate": 8.794366318563712e-05, + "loss": 0.0469, + "step": 10528 + }, + { + "epoch": 3.2, + "learning_rate": 8.791734407861776e-05, + "loss": 0.1201, + "step": 10529 + }, + { + "epoch": 3.2, + "learning_rate": 8.7891027277787e-05, + "loss": 0.0722, + "step": 10530 + }, + { + "epoch": 3.2, + "learning_rate": 8.786471278412245e-05, + "loss": 0.0192, + "step": 10531 + }, + { + "epoch": 3.2, + "learning_rate": 8.783840059860162e-05, + "loss": 0.0331, + "step": 10532 + }, + { + "epoch": 3.2, + "learning_rate": 8.781209072220195e-05, + "loss": 0.0373, + "step": 10533 + }, + { + "epoch": 3.2, + "learning_rate": 8.778578315590071e-05, + "loss": 0.0252, + "step": 10534 + }, + { + "epoch": 3.2, + "learning_rate": 8.775947790067518e-05, + "loss": 0.0439, + "step": 10535 + }, + { + "epoch": 3.2, + "learning_rate": 8.773317495750253e-05, + "loss": 0.0661, + "step": 10536 + }, + { + "epoch": 3.2, + "learning_rate": 8.770687432735981e-05, + "loss": 0.0139, + "step": 10537 + }, + { + "epoch": 3.2, + "learning_rate": 8.768057601122412e-05, + "loss": 0.0424, + "step": 10538 + }, + { + "epoch": 3.2, + "learning_rate": 8.765428001007222e-05, + "loss": 0.0387, + "step": 10539 + }, + { + "epoch": 3.2, + "learning_rate": 8.762798632488096e-05, + "loss": 0.0217, + "step": 10540 + }, + { + "epoch": 3.2, + "learning_rate": 8.760169495662713e-05, + "loss": 0.0669, + "step": 10541 + }, + { + "epoch": 3.2, + "learning_rate": 8.757540590628736e-05, + "loss": 0.0344, + "step": 10542 + }, + { + "epoch": 3.2, + "learning_rate": 8.754911917483824e-05, + "loss": 0.0319, + "step": 10543 + }, + { + "epoch": 3.2, + "learning_rate": 8.752283476325618e-05, + "loss": 0.0623, + "step": 10544 + }, + { + "epoch": 3.2, + "learning_rate": 8.749655267251754e-05, + "loss": 0.0663, + "step": 10545 + }, + { + "epoch": 3.2, + "learning_rate": 8.747027290359875e-05, + "loss": 0.0232, + "step": 10546 + }, + { + "epoch": 3.2, + "learning_rate": 8.744399545747589e-05, + "loss": 0.0555, + "step": 10547 + }, + { + "epoch": 3.2, + "learning_rate": 8.741772033512521e-05, + "loss": 0.0397, + "step": 10548 + }, + { + "epoch": 3.2, + "learning_rate": 8.739144753752271e-05, + "loss": 0.072, + "step": 10549 + }, + { + "epoch": 3.2, + "learning_rate": 8.736517706564435e-05, + "loss": 0.0396, + "step": 10550 + }, + { + "epoch": 3.2, + "learning_rate": 8.733890892046589e-05, + "loss": 0.046, + "step": 10551 + }, + { + "epoch": 3.2, + "learning_rate": 8.731264310296329e-05, + "loss": 0.0128, + "step": 10552 + }, + { + "epoch": 3.2, + "learning_rate": 8.728637961411211e-05, + "loss": 0.0402, + "step": 10553 + }, + { + "epoch": 3.2, + "learning_rate": 8.726011845488807e-05, + "loss": 0.0624, + "step": 10554 + }, + { + "epoch": 3.2, + "learning_rate": 8.723385962626666e-05, + "loss": 0.0925, + "step": 10555 + }, + { + "epoch": 3.21, + "learning_rate": 8.720760312922325e-05, + "loss": 0.0363, + "step": 10556 + }, + { + "epoch": 3.21, + "learning_rate": 8.71813489647333e-05, + "loss": 0.0429, + "step": 10557 + }, + { + "epoch": 3.21, + "learning_rate": 8.715509713377195e-05, + "loss": 0.0231, + "step": 10558 + }, + { + "epoch": 3.21, + "learning_rate": 8.712884763731458e-05, + "loss": 0.051, + "step": 10559 + }, + { + "epoch": 3.21, + "learning_rate": 8.710260047633602e-05, + "loss": 0.0188, + "step": 10560 + }, + { + "epoch": 3.21, + "learning_rate": 8.707635565181143e-05, + "loss": 0.0449, + "step": 10561 + }, + { + "epoch": 3.21, + "learning_rate": 8.705011316471576e-05, + "loss": 0.0392, + "step": 10562 + }, + { + "epoch": 3.21, + "learning_rate": 8.702387301602371e-05, + "loss": 0.11, + "step": 10563 + }, + { + "epoch": 3.21, + "learning_rate": 8.699763520671024e-05, + "loss": 0.0557, + "step": 10564 + }, + { + "epoch": 3.21, + "learning_rate": 8.697139973774974e-05, + "loss": 0.0373, + "step": 10565 + }, + { + "epoch": 3.21, + "learning_rate": 8.694516661011698e-05, + "loss": 0.0545, + "step": 10566 + }, + { + "epoch": 3.21, + "learning_rate": 8.69189358247863e-05, + "loss": 0.0173, + "step": 10567 + }, + { + "epoch": 3.21, + "learning_rate": 8.689270738273222e-05, + "loss": 0.0638, + "step": 10568 + }, + { + "epoch": 3.21, + "learning_rate": 8.686648128492903e-05, + "loss": 0.0602, + "step": 10569 + }, + { + "epoch": 3.21, + "learning_rate": 8.684025753235083e-05, + "loss": 0.0274, + "step": 10570 + }, + { + "epoch": 3.21, + "learning_rate": 8.681403612597194e-05, + "loss": 0.0227, + "step": 10571 + }, + { + "epoch": 3.21, + "learning_rate": 8.678781706676625e-05, + "loss": 0.0628, + "step": 10572 + }, + { + "epoch": 3.21, + "learning_rate": 8.676160035570783e-05, + "loss": 0.0631, + "step": 10573 + }, + { + "epoch": 3.21, + "learning_rate": 8.673538599377052e-05, + "loss": 0.0261, + "step": 10574 + }, + { + "epoch": 3.21, + "learning_rate": 8.67091739819281e-05, + "loss": 0.0564, + "step": 10575 + }, + { + "epoch": 3.21, + "learning_rate": 8.668296432115421e-05, + "loss": 0.0457, + "step": 10576 + }, + { + "epoch": 3.21, + "learning_rate": 8.665675701242257e-05, + "loss": 0.0362, + "step": 10577 + }, + { + "epoch": 3.21, + "learning_rate": 8.663055205670663e-05, + "loss": 0.0618, + "step": 10578 + }, + { + "epoch": 3.21, + "learning_rate": 8.660434945497986e-05, + "loss": 0.0628, + "step": 10579 + }, + { + "epoch": 3.21, + "learning_rate": 8.657814920821563e-05, + "loss": 0.0303, + "step": 10580 + }, + { + "epoch": 3.21, + "learning_rate": 8.655195131738712e-05, + "loss": 0.0456, + "step": 10581 + }, + { + "epoch": 3.21, + "learning_rate": 8.65257557834676e-05, + "loss": 0.0157, + "step": 10582 + }, + { + "epoch": 3.21, + "learning_rate": 8.649956260743006e-05, + "loss": 0.0353, + "step": 10583 + }, + { + "epoch": 3.21, + "learning_rate": 8.647337179024763e-05, + "loss": 0.0457, + "step": 10584 + }, + { + "epoch": 3.21, + "learning_rate": 8.644718333289315e-05, + "loss": 0.0403, + "step": 10585 + }, + { + "epoch": 3.21, + "learning_rate": 8.642099723633935e-05, + "loss": 0.0475, + "step": 10586 + }, + { + "epoch": 3.21, + "learning_rate": 8.639481350155915e-05, + "loss": 0.0445, + "step": 10587 + }, + { + "epoch": 3.21, + "learning_rate": 8.636863212952503e-05, + "loss": 0.0503, + "step": 10588 + }, + { + "epoch": 3.22, + "learning_rate": 8.634245312120976e-05, + "loss": 0.0372, + "step": 10589 + }, + { + "epoch": 3.22, + "learning_rate": 8.631627647758555e-05, + "loss": 0.0485, + "step": 10590 + }, + { + "epoch": 3.22, + "learning_rate": 8.6290102199625e-05, + "loss": 0.0668, + "step": 10591 + }, + { + "epoch": 3.22, + "learning_rate": 8.626393028830025e-05, + "loss": 0.0541, + "step": 10592 + }, + { + "epoch": 3.22, + "learning_rate": 8.623776074458364e-05, + "loss": 0.0339, + "step": 10593 + }, + { + "epoch": 3.22, + "learning_rate": 8.621159356944726e-05, + "loss": 0.0169, + "step": 10594 + }, + { + "epoch": 3.22, + "learning_rate": 8.618542876386304e-05, + "loss": 0.073, + "step": 10595 + }, + { + "epoch": 3.22, + "learning_rate": 8.615926632880307e-05, + "loss": 0.0376, + "step": 10596 + }, + { + "epoch": 3.22, + "learning_rate": 8.613310626523909e-05, + "loss": 0.0238, + "step": 10597 + }, + { + "epoch": 3.22, + "learning_rate": 8.610694857414297e-05, + "loss": 0.0455, + "step": 10598 + }, + { + "epoch": 3.22, + "learning_rate": 8.608079325648637e-05, + "loss": 0.0982, + "step": 10599 + }, + { + "epoch": 3.22, + "learning_rate": 8.605464031324082e-05, + "loss": 0.0791, + "step": 10600 + }, + { + "epoch": 3.22, + "learning_rate": 8.602848974537785e-05, + "loss": 0.039, + "step": 10601 + }, + { + "epoch": 3.22, + "learning_rate": 8.600234155386887e-05, + "loss": 0.0569, + "step": 10602 + }, + { + "epoch": 3.22, + "learning_rate": 8.597619573968529e-05, + "loss": 0.0297, + "step": 10603 + }, + { + "epoch": 3.22, + "learning_rate": 8.595005230379822e-05, + "loss": 0.0513, + "step": 10604 + }, + { + "epoch": 3.22, + "learning_rate": 8.592391124717902e-05, + "loss": 0.0685, + "step": 10605 + }, + { + "epoch": 3.22, + "learning_rate": 8.589777257079847e-05, + "loss": 0.0614, + "step": 10606 + }, + { + "epoch": 3.22, + "learning_rate": 8.587163627562779e-05, + "loss": 0.0704, + "step": 10607 + }, + { + "epoch": 3.22, + "learning_rate": 8.584550236263768e-05, + "loss": 0.0532, + "step": 10608 + }, + { + "epoch": 3.22, + "learning_rate": 8.581937083279909e-05, + "loss": 0.026, + "step": 10609 + }, + { + "epoch": 3.22, + "learning_rate": 8.579324168708267e-05, + "loss": 0.0315, + "step": 10610 + }, + { + "epoch": 3.22, + "learning_rate": 8.576711492645897e-05, + "loss": 0.051, + "step": 10611 + }, + { + "epoch": 3.22, + "learning_rate": 8.574099055189865e-05, + "loss": 0.0826, + "step": 10612 + }, + { + "epoch": 3.22, + "learning_rate": 8.571486856437204e-05, + "loss": 0.0538, + "step": 10613 + }, + { + "epoch": 3.22, + "learning_rate": 8.568874896484961e-05, + "loss": 0.0358, + "step": 10614 + }, + { + "epoch": 3.22, + "learning_rate": 8.566263175430155e-05, + "loss": 0.0584, + "step": 10615 + }, + { + "epoch": 3.22, + "learning_rate": 8.563651693369805e-05, + "loss": 0.041, + "step": 10616 + }, + { + "epoch": 3.22, + "learning_rate": 8.561040450400916e-05, + "loss": 0.0498, + "step": 10617 + }, + { + "epoch": 3.22, + "learning_rate": 8.558429446620497e-05, + "loss": 0.0639, + "step": 10618 + }, + { + "epoch": 3.22, + "learning_rate": 8.55581868212553e-05, + "loss": 0.04, + "step": 10619 + }, + { + "epoch": 3.22, + "learning_rate": 8.553208157013007e-05, + "loss": 0.0075, + "step": 10620 + }, + { + "epoch": 3.22, + "learning_rate": 8.550597871379894e-05, + "loss": 0.0562, + "step": 10621 + }, + { + "epoch": 3.23, + "learning_rate": 8.547987825323153e-05, + "loss": 0.0325, + "step": 10622 + }, + { + "epoch": 3.23, + "learning_rate": 8.545378018939749e-05, + "loss": 0.0273, + "step": 10623 + }, + { + "epoch": 3.23, + "learning_rate": 8.542768452326619e-05, + "loss": 0.0397, + "step": 10624 + }, + { + "epoch": 3.23, + "learning_rate": 8.54015912558071e-05, + "loss": 0.0611, + "step": 10625 + }, + { + "epoch": 3.23, + "learning_rate": 8.537550038798945e-05, + "loss": 0.0381, + "step": 10626 + }, + { + "epoch": 3.23, + "learning_rate": 8.534941192078244e-05, + "loss": 0.0687, + "step": 10627 + }, + { + "epoch": 3.23, + "learning_rate": 8.53233258551552e-05, + "loss": 0.0186, + "step": 10628 + }, + { + "epoch": 3.23, + "learning_rate": 8.52972421920767e-05, + "loss": 0.0628, + "step": 10629 + }, + { + "epoch": 3.23, + "learning_rate": 8.527116093251603e-05, + "loss": 0.0795, + "step": 10630 + }, + { + "epoch": 3.23, + "learning_rate": 8.524508207744181e-05, + "loss": 0.0237, + "step": 10631 + }, + { + "epoch": 3.23, + "learning_rate": 8.521900562782294e-05, + "loss": 0.0435, + "step": 10632 + }, + { + "epoch": 3.23, + "learning_rate": 8.519293158462798e-05, + "loss": 0.0182, + "step": 10633 + }, + { + "epoch": 3.23, + "learning_rate": 8.516685994882563e-05, + "loss": 0.0181, + "step": 10634 + }, + { + "epoch": 3.23, + "learning_rate": 8.514079072138431e-05, + "loss": 0.0297, + "step": 10635 + }, + { + "epoch": 3.23, + "learning_rate": 8.511472390327237e-05, + "loss": 0.0893, + "step": 10636 + }, + { + "epoch": 3.23, + "learning_rate": 8.508865949545821e-05, + "loss": 0.0511, + "step": 10637 + }, + { + "epoch": 3.23, + "learning_rate": 8.506259749890995e-05, + "loss": 0.0405, + "step": 10638 + }, + { + "epoch": 3.23, + "learning_rate": 8.503653791459583e-05, + "loss": 0.0889, + "step": 10639 + }, + { + "epoch": 3.23, + "learning_rate": 8.501048074348381e-05, + "loss": 0.0533, + "step": 10640 + }, + { + "epoch": 3.23, + "learning_rate": 8.498442598654186e-05, + "loss": 0.0494, + "step": 10641 + }, + { + "epoch": 3.23, + "learning_rate": 8.495837364473777e-05, + "loss": 0.0571, + "step": 10642 + }, + { + "epoch": 3.23, + "learning_rate": 8.49323237190394e-05, + "loss": 0.0235, + "step": 10643 + }, + { + "epoch": 3.23, + "learning_rate": 8.490627621041435e-05, + "loss": 0.0311, + "step": 10644 + }, + { + "epoch": 3.23, + "learning_rate": 8.48802311198303e-05, + "loss": 0.0322, + "step": 10645 + }, + { + "epoch": 3.23, + "learning_rate": 8.485418844825473e-05, + "loss": 0.0199, + "step": 10646 + }, + { + "epoch": 3.23, + "learning_rate": 8.482814819665495e-05, + "loss": 0.0094, + "step": 10647 + }, + { + "epoch": 3.23, + "learning_rate": 8.48021103659984e-05, + "loss": 0.0407, + "step": 10648 + }, + { + "epoch": 3.23, + "learning_rate": 8.477607495725221e-05, + "loss": 0.0525, + "step": 10649 + }, + { + "epoch": 3.23, + "learning_rate": 8.475004197138362e-05, + "loss": 0.0557, + "step": 10650 + }, + { + "epoch": 3.23, + "learning_rate": 8.472401140935963e-05, + "loss": 0.0346, + "step": 10651 + }, + { + "epoch": 3.23, + "learning_rate": 8.469798327214714e-05, + "loss": 0.0362, + "step": 10652 + }, + { + "epoch": 3.23, + "learning_rate": 8.467195756071312e-05, + "loss": 0.0201, + "step": 10653 + }, + { + "epoch": 3.23, + "learning_rate": 8.464593427602427e-05, + "loss": 0.0571, + "step": 10654 + }, + { + "epoch": 3.24, + "learning_rate": 8.461991341904741e-05, + "loss": 0.0432, + "step": 10655 + }, + { + "epoch": 3.24, + "learning_rate": 8.459389499074893e-05, + "loss": 0.0282, + "step": 10656 + }, + { + "epoch": 3.24, + "learning_rate": 8.456787899209549e-05, + "loss": 0.007, + "step": 10657 + }, + { + "epoch": 3.24, + "learning_rate": 8.454186542405344e-05, + "loss": 0.0247, + "step": 10658 + }, + { + "epoch": 3.24, + "learning_rate": 8.451585428758918e-05, + "loss": 0.034, + "step": 10659 + }, + { + "epoch": 3.24, + "learning_rate": 8.448984558366889e-05, + "loss": 0.0494, + "step": 10660 + }, + { + "epoch": 3.24, + "learning_rate": 8.44638393132587e-05, + "loss": 0.0469, + "step": 10661 + }, + { + "epoch": 3.24, + "learning_rate": 8.443783547732473e-05, + "loss": 0.0377, + "step": 10662 + }, + { + "epoch": 3.24, + "learning_rate": 8.441183407683286e-05, + "loss": 0.047, + "step": 10663 + }, + { + "epoch": 3.24, + "learning_rate": 8.438583511274908e-05, + "loss": 0.0251, + "step": 10664 + }, + { + "epoch": 3.24, + "learning_rate": 8.435983858603912e-05, + "loss": 0.0182, + "step": 10665 + }, + { + "epoch": 3.24, + "learning_rate": 8.433384449766859e-05, + "loss": 0.0847, + "step": 10666 + }, + { + "epoch": 3.24, + "learning_rate": 8.430785284860325e-05, + "loss": 0.0321, + "step": 10667 + }, + { + "epoch": 3.24, + "learning_rate": 8.428186363980847e-05, + "loss": 0.0207, + "step": 10668 + }, + { + "epoch": 3.24, + "learning_rate": 8.42558768722498e-05, + "loss": 0.026, + "step": 10669 + }, + { + "epoch": 3.24, + "learning_rate": 8.422989254689244e-05, + "loss": 0.0557, + "step": 10670 + }, + { + "epoch": 3.24, + "learning_rate": 8.420391066470184e-05, + "loss": 0.0144, + "step": 10671 + }, + { + "epoch": 3.24, + "learning_rate": 8.41779312266429e-05, + "loss": 0.0586, + "step": 10672 + }, + { + "epoch": 3.24, + "learning_rate": 8.415195423368083e-05, + "loss": 0.0366, + "step": 10673 + }, + { + "epoch": 3.24, + "learning_rate": 8.412597968678051e-05, + "loss": 0.0012, + "step": 10674 + }, + { + "epoch": 3.24, + "learning_rate": 8.410000758690693e-05, + "loss": 0.0312, + "step": 10675 + }, + { + "epoch": 3.24, + "learning_rate": 8.407403793502481e-05, + "loss": 0.0261, + "step": 10676 + }, + { + "epoch": 3.24, + "learning_rate": 8.404807073209881e-05, + "loss": 0.0486, + "step": 10677 + }, + { + "epoch": 3.24, + "learning_rate": 8.402210597909364e-05, + "loss": 0.0476, + "step": 10678 + }, + { + "epoch": 3.24, + "learning_rate": 8.399614367697369e-05, + "loss": 0.0313, + "step": 10679 + }, + { + "epoch": 3.24, + "learning_rate": 8.397018382670349e-05, + "loss": 0.0484, + "step": 10680 + }, + { + "epoch": 3.24, + "learning_rate": 8.394422642924735e-05, + "loss": 0.0428, + "step": 10681 + }, + { + "epoch": 3.24, + "learning_rate": 8.391827148556949e-05, + "loss": 0.0706, + "step": 10682 + }, + { + "epoch": 3.24, + "learning_rate": 8.3892318996634e-05, + "loss": 0.01, + "step": 10683 + }, + { + "epoch": 3.24, + "learning_rate": 8.386636896340505e-05, + "loss": 0.0314, + "step": 10684 + }, + { + "epoch": 3.24, + "learning_rate": 8.384042138684652e-05, + "loss": 0.0487, + "step": 10685 + }, + { + "epoch": 3.24, + "learning_rate": 8.381447626792237e-05, + "loss": 0.0299, + "step": 10686 + }, + { + "epoch": 3.24, + "learning_rate": 8.378853360759633e-05, + "loss": 0.0608, + "step": 10687 + }, + { + "epoch": 3.25, + "learning_rate": 8.376259340683206e-05, + "loss": 0.0423, + "step": 10688 + }, + { + "epoch": 3.25, + "learning_rate": 8.373665566659326e-05, + "loss": 0.0211, + "step": 10689 + }, + { + "epoch": 3.25, + "learning_rate": 8.371072038784333e-05, + "loss": 0.0318, + "step": 10690 + }, + { + "epoch": 3.25, + "learning_rate": 8.368478757154579e-05, + "loss": 0.035, + "step": 10691 + }, + { + "epoch": 3.25, + "learning_rate": 8.365885721866395e-05, + "loss": 0.04, + "step": 10692 + }, + { + "epoch": 3.25, + "learning_rate": 8.363292933016095e-05, + "loss": 0.0516, + "step": 10693 + }, + { + "epoch": 3.25, + "learning_rate": 8.360700390700005e-05, + "loss": 0.0431, + "step": 10694 + }, + { + "epoch": 3.25, + "learning_rate": 8.358108095014421e-05, + "loss": 0.0574, + "step": 10695 + }, + { + "epoch": 3.25, + "learning_rate": 8.355516046055655e-05, + "loss": 0.0454, + "step": 10696 + }, + { + "epoch": 3.25, + "learning_rate": 8.352924243919973e-05, + "loss": 0.0526, + "step": 10697 + }, + { + "epoch": 3.25, + "learning_rate": 8.350332688703668e-05, + "loss": 0.0642, + "step": 10698 + }, + { + "epoch": 3.25, + "learning_rate": 8.347741380502998e-05, + "loss": 0.0373, + "step": 10699 + }, + { + "epoch": 3.25, + "learning_rate": 8.345150319414232e-05, + "loss": 0.0406, + "step": 10700 + }, + { + "epoch": 3.25, + "learning_rate": 8.34255950553362e-05, + "loss": 0.0335, + "step": 10701 + }, + { + "epoch": 3.25, + "learning_rate": 8.339968938957392e-05, + "loss": 0.0357, + "step": 10702 + }, + { + "epoch": 3.25, + "learning_rate": 8.337378619781794e-05, + "loss": 0.0313, + "step": 10703 + }, + { + "epoch": 3.25, + "learning_rate": 8.334788548103035e-05, + "loss": 0.0637, + "step": 10704 + }, + { + "epoch": 3.25, + "learning_rate": 8.332198724017342e-05, + "loss": 0.0421, + "step": 10705 + }, + { + "epoch": 3.25, + "learning_rate": 8.329609147620915e-05, + "loss": 0.0597, + "step": 10706 + }, + { + "epoch": 3.25, + "learning_rate": 8.327019819009939e-05, + "loss": 0.036, + "step": 10707 + }, + { + "epoch": 3.25, + "learning_rate": 8.324430738280616e-05, + "loss": 0.0574, + "step": 10708 + }, + { + "epoch": 3.25, + "learning_rate": 8.321841905529108e-05, + "loss": 0.038, + "step": 10709 + }, + { + "epoch": 3.25, + "learning_rate": 8.319253320851598e-05, + "loss": 0.0974, + "step": 10710 + }, + { + "epoch": 3.25, + "learning_rate": 8.316664984344232e-05, + "loss": 0.0527, + "step": 10711 + }, + { + "epoch": 3.25, + "learning_rate": 8.314076896103168e-05, + "loss": 0.0337, + "step": 10712 + }, + { + "epoch": 3.25, + "learning_rate": 8.31148905622453e-05, + "loss": 0.0656, + "step": 10713 + }, + { + "epoch": 3.25, + "learning_rate": 8.30890146480447e-05, + "loss": 0.0261, + "step": 10714 + }, + { + "epoch": 3.25, + "learning_rate": 8.30631412193909e-05, + "loss": 0.0522, + "step": 10715 + }, + { + "epoch": 3.25, + "learning_rate": 8.30372702772452e-05, + "loss": 0.0522, + "step": 10716 + }, + { + "epoch": 3.25, + "learning_rate": 8.301140182256854e-05, + "loss": 0.0481, + "step": 10717 + }, + { + "epoch": 3.25, + "learning_rate": 8.298553585632176e-05, + "loss": 0.0239, + "step": 10718 + }, + { + "epoch": 3.25, + "learning_rate": 8.295967237946591e-05, + "loss": 0.0606, + "step": 10719 + }, + { + "epoch": 3.25, + "learning_rate": 8.293381139296155e-05, + "loss": 0.0478, + "step": 10720 + }, + { + "epoch": 3.26, + "learning_rate": 8.290795289776956e-05, + "loss": 0.0317, + "step": 10721 + }, + { + "epoch": 3.26, + "learning_rate": 8.288209689485025e-05, + "loss": 0.0495, + "step": 10722 + }, + { + "epoch": 3.26, + "learning_rate": 8.285624338516428e-05, + "loss": 0.0347, + "step": 10723 + }, + { + "epoch": 3.26, + "learning_rate": 8.283039236967191e-05, + "loss": 0.0425, + "step": 10724 + }, + { + "epoch": 3.26, + "learning_rate": 8.280454384933356e-05, + "loss": 0.0574, + "step": 10725 + }, + { + "epoch": 3.26, + "learning_rate": 8.277869782510936e-05, + "loss": 0.0067, + "step": 10726 + }, + { + "epoch": 3.26, + "learning_rate": 8.275285429795933e-05, + "loss": 0.06, + "step": 10727 + }, + { + "epoch": 3.26, + "learning_rate": 8.272701326884361e-05, + "loss": 0.052, + "step": 10728 + }, + { + "epoch": 3.26, + "learning_rate": 8.270117473872205e-05, + "loss": 0.0654, + "step": 10729 + }, + { + "epoch": 3.26, + "learning_rate": 8.267533870855453e-05, + "loss": 0.0806, + "step": 10730 + }, + { + "epoch": 3.26, + "learning_rate": 8.26495051793007e-05, + "loss": 0.0769, + "step": 10731 + }, + { + "epoch": 3.26, + "learning_rate": 8.26236741519203e-05, + "loss": 0.0118, + "step": 10732 + }, + { + "epoch": 3.26, + "learning_rate": 8.259784562737282e-05, + "loss": 0.043, + "step": 10733 + }, + { + "epoch": 3.26, + "learning_rate": 8.257201960661769e-05, + "loss": 0.0316, + "step": 10734 + }, + { + "epoch": 3.26, + "learning_rate": 8.254619609061435e-05, + "loss": 0.0689, + "step": 10735 + }, + { + "epoch": 3.26, + "learning_rate": 8.252037508032193e-05, + "loss": 0.0307, + "step": 10736 + }, + { + "epoch": 3.26, + "learning_rate": 8.249455657669984e-05, + "loss": 0.0003, + "step": 10737 + }, + { + "epoch": 3.26, + "learning_rate": 8.246874058070688e-05, + "loss": 0.0236, + "step": 10738 + }, + { + "epoch": 3.26, + "learning_rate": 8.244292709330224e-05, + "loss": 0.0511, + "step": 10739 + }, + { + "epoch": 3.26, + "learning_rate": 8.241711611544471e-05, + "loss": 0.0293, + "step": 10740 + }, + { + "epoch": 3.26, + "learning_rate": 8.239130764809316e-05, + "loss": 0.0605, + "step": 10741 + }, + { + "epoch": 3.26, + "learning_rate": 8.23655016922063e-05, + "loss": 0.067, + "step": 10742 + }, + { + "epoch": 3.26, + "learning_rate": 8.233969824874264e-05, + "loss": 0.0497, + "step": 10743 + }, + { + "epoch": 3.26, + "learning_rate": 8.231389731866082e-05, + "loss": 0.0199, + "step": 10744 + }, + { + "epoch": 3.26, + "learning_rate": 8.22880989029192e-05, + "loss": 0.0307, + "step": 10745 + }, + { + "epoch": 3.26, + "learning_rate": 8.226230300247618e-05, + "loss": 0.0606, + "step": 10746 + }, + { + "epoch": 3.26, + "learning_rate": 8.223650961828997e-05, + "loss": 0.0258, + "step": 10747 + }, + { + "epoch": 3.26, + "learning_rate": 8.221071875131872e-05, + "loss": 0.0382, + "step": 10748 + }, + { + "epoch": 3.26, + "learning_rate": 8.218493040252039e-05, + "loss": 0.0213, + "step": 10749 + }, + { + "epoch": 3.26, + "learning_rate": 8.215914457285305e-05, + "loss": 0.0111, + "step": 10750 + }, + { + "epoch": 3.26, + "learning_rate": 8.213336126327462e-05, + "loss": 0.0318, + "step": 10751 + }, + { + "epoch": 3.26, + "learning_rate": 8.21075804747428e-05, + "loss": 0.053, + "step": 10752 + }, + { + "epoch": 3.26, + "learning_rate": 8.208180220821524e-05, + "loss": 0.06, + "step": 10753 + }, + { + "epoch": 3.27, + "learning_rate": 8.205602646464952e-05, + "loss": 0.0427, + "step": 10754 + }, + { + "epoch": 3.27, + "learning_rate": 8.203025324500325e-05, + "loss": 0.0284, + "step": 10755 + }, + { + "epoch": 3.27, + "learning_rate": 8.200448255023367e-05, + "loss": 0.0721, + "step": 10756 + }, + { + "epoch": 3.27, + "learning_rate": 8.197871438129822e-05, + "loss": 0.0721, + "step": 10757 + }, + { + "epoch": 3.27, + "learning_rate": 8.195294873915408e-05, + "loss": 0.0412, + "step": 10758 + }, + { + "epoch": 3.27, + "learning_rate": 8.192718562475828e-05, + "loss": 0.0349, + "step": 10759 + }, + { + "epoch": 3.27, + "learning_rate": 8.190142503906798e-05, + "loss": 0.0435, + "step": 10760 + }, + { + "epoch": 3.27, + "learning_rate": 8.187566698303998e-05, + "loss": 0.0115, + "step": 10761 + }, + { + "epoch": 3.27, + "learning_rate": 8.18499114576313e-05, + "loss": 0.0439, + "step": 10762 + }, + { + "epoch": 3.27, + "learning_rate": 8.182415846379843e-05, + "loss": 0.0336, + "step": 10763 + }, + { + "epoch": 3.27, + "learning_rate": 8.179840800249822e-05, + "loss": 0.0632, + "step": 10764 + }, + { + "epoch": 3.27, + "learning_rate": 8.177266007468709e-05, + "loss": 0.0411, + "step": 10765 + }, + { + "epoch": 3.27, + "learning_rate": 8.174691468132162e-05, + "loss": 0.0263, + "step": 10766 + }, + { + "epoch": 3.27, + "learning_rate": 8.172117182335811e-05, + "loss": 0.0477, + "step": 10767 + }, + { + "epoch": 3.27, + "learning_rate": 8.16954315017528e-05, + "loss": 0.0463, + "step": 10768 + }, + { + "epoch": 3.27, + "learning_rate": 8.166969371746195e-05, + "loss": 0.045, + "step": 10769 + }, + { + "epoch": 3.27, + "learning_rate": 8.164395847144156e-05, + "loss": 0.0666, + "step": 10770 + }, + { + "epoch": 3.27, + "learning_rate": 8.161822576464773e-05, + "loss": 0.1385, + "step": 10771 + }, + { + "epoch": 3.27, + "learning_rate": 8.159249559803626e-05, + "loss": 0.0237, + "step": 10772 + }, + { + "epoch": 3.27, + "learning_rate": 8.156676797256291e-05, + "loss": 0.0604, + "step": 10773 + }, + { + "epoch": 3.27, + "learning_rate": 8.154104288918352e-05, + "loss": 0.018, + "step": 10774 + }, + { + "epoch": 3.27, + "learning_rate": 8.151532034885359e-05, + "loss": 0.0864, + "step": 10775 + }, + { + "epoch": 3.27, + "learning_rate": 8.148960035252872e-05, + "loss": 0.0634, + "step": 10776 + }, + { + "epoch": 3.27, + "learning_rate": 8.146388290116429e-05, + "loss": 0.0823, + "step": 10777 + }, + { + "epoch": 3.27, + "learning_rate": 8.143816799571565e-05, + "loss": 0.0291, + "step": 10778 + }, + { + "epoch": 3.27, + "learning_rate": 8.141245563713794e-05, + "loss": 0.0368, + "step": 10779 + }, + { + "epoch": 3.27, + "learning_rate": 8.138674582638642e-05, + "loss": 0.0603, + "step": 10780 + }, + { + "epoch": 3.27, + "learning_rate": 8.136103856441605e-05, + "loss": 0.0457, + "step": 10781 + }, + { + "epoch": 3.27, + "learning_rate": 8.133533385218185e-05, + "loss": 0.0883, + "step": 10782 + }, + { + "epoch": 3.27, + "learning_rate": 8.130963169063866e-05, + "loss": 0.0149, + "step": 10783 + }, + { + "epoch": 3.27, + "learning_rate": 8.128393208074115e-05, + "loss": 0.029, + "step": 10784 + }, + { + "epoch": 3.27, + "learning_rate": 8.12582350234441e-05, + "loss": 0.019, + "step": 10785 + }, + { + "epoch": 3.27, + "learning_rate": 8.123254051970202e-05, + "loss": 0.0498, + "step": 10786 + }, + { + "epoch": 3.28, + "learning_rate": 8.120684857046947e-05, + "loss": 0.0779, + "step": 10787 + }, + { + "epoch": 3.28, + "learning_rate": 8.118115917670068e-05, + "loss": 0.0145, + "step": 10788 + }, + { + "epoch": 3.28, + "learning_rate": 8.115547233935005e-05, + "loss": 0.0358, + "step": 10789 + }, + { + "epoch": 3.28, + "learning_rate": 8.112978805937171e-05, + "loss": 0.033, + "step": 10790 + }, + { + "epoch": 3.28, + "learning_rate": 8.110410633771975e-05, + "loss": 0.065, + "step": 10791 + }, + { + "epoch": 3.28, + "learning_rate": 8.107842717534827e-05, + "loss": 0.0469, + "step": 10792 + }, + { + "epoch": 3.28, + "learning_rate": 8.105275057321112e-05, + "loss": 0.0504, + "step": 10793 + }, + { + "epoch": 3.28, + "learning_rate": 8.10270765322621e-05, + "loss": 0.0436, + "step": 10794 + }, + { + "epoch": 3.28, + "learning_rate": 8.100140505345485e-05, + "loss": 0.0392, + "step": 10795 + }, + { + "epoch": 3.28, + "learning_rate": 8.097573613774315e-05, + "loss": 0.016, + "step": 10796 + }, + { + "epoch": 3.28, + "learning_rate": 8.095006978608037e-05, + "loss": 0.1005, + "step": 10797 + }, + { + "epoch": 3.28, + "learning_rate": 8.092440599942005e-05, + "loss": 0.0135, + "step": 10798 + }, + { + "epoch": 3.28, + "learning_rate": 8.089874477871552e-05, + "loss": 0.0424, + "step": 10799 + }, + { + "epoch": 3.28, + "learning_rate": 8.08730861249199e-05, + "loss": 0.0178, + "step": 10800 + }, + { + "epoch": 3.28, + "learning_rate": 8.084743003898648e-05, + "loss": 0.0348, + "step": 10801 + }, + { + "epoch": 3.28, + "learning_rate": 8.08217765218682e-05, + "loss": 0.0354, + "step": 10802 + }, + { + "epoch": 3.28, + "learning_rate": 8.079612557451817e-05, + "loss": 0.0477, + "step": 10803 + }, + { + "epoch": 3.28, + "learning_rate": 8.0770477197889e-05, + "loss": 0.0469, + "step": 10804 + }, + { + "epoch": 3.28, + "learning_rate": 8.074483139293366e-05, + "loss": 0.0626, + "step": 10805 + }, + { + "epoch": 3.28, + "learning_rate": 8.071918816060469e-05, + "loss": 0.0272, + "step": 10806 + }, + { + "epoch": 3.28, + "learning_rate": 8.069354750185477e-05, + "loss": 0.0319, + "step": 10807 + }, + { + "epoch": 3.28, + "learning_rate": 8.066790941763635e-05, + "loss": 0.0153, + "step": 10808 + }, + { + "epoch": 3.28, + "learning_rate": 8.064227390890168e-05, + "loss": 0.0635, + "step": 10809 + }, + { + "epoch": 3.28, + "learning_rate": 8.06166409766032e-05, + "loss": 0.0438, + "step": 10810 + }, + { + "epoch": 3.28, + "learning_rate": 8.059101062169301e-05, + "loss": 0.0422, + "step": 10811 + }, + { + "epoch": 3.28, + "learning_rate": 8.05653828451233e-05, + "loss": 0.0189, + "step": 10812 + }, + { + "epoch": 3.28, + "learning_rate": 8.053975764784599e-05, + "loss": 0.0638, + "step": 10813 + }, + { + "epoch": 3.28, + "learning_rate": 8.051413503081293e-05, + "loss": 0.0859, + "step": 10814 + }, + { + "epoch": 3.28, + "learning_rate": 8.048851499497605e-05, + "loss": 0.0265, + "step": 10815 + }, + { + "epoch": 3.28, + "learning_rate": 8.046289754128693e-05, + "loss": 0.0351, + "step": 10816 + }, + { + "epoch": 3.28, + "learning_rate": 8.043728267069732e-05, + "loss": 0.0447, + "step": 10817 + }, + { + "epoch": 3.28, + "learning_rate": 8.041167038415865e-05, + "loss": 0.0367, + "step": 10818 + }, + { + "epoch": 3.28, + "learning_rate": 8.03860606826224e-05, + "loss": 0.0474, + "step": 10819 + }, + { + "epoch": 3.29, + "learning_rate": 8.036045356703976e-05, + "loss": 0.0442, + "step": 10820 + }, + { + "epoch": 3.29, + "learning_rate": 8.033484903836211e-05, + "loss": 0.047, + "step": 10821 + }, + { + "epoch": 3.29, + "learning_rate": 8.03092470975405e-05, + "loss": 0.0612, + "step": 10822 + }, + { + "epoch": 3.29, + "learning_rate": 8.028364774552602e-05, + "loss": 0.0369, + "step": 10823 + }, + { + "epoch": 3.29, + "learning_rate": 8.025805098326961e-05, + "loss": 0.0556, + "step": 10824 + }, + { + "epoch": 3.29, + "learning_rate": 8.023245681172201e-05, + "loss": 0.0474, + "step": 10825 + }, + { + "epoch": 3.29, + "learning_rate": 8.020686523183411e-05, + "loss": 0.0501, + "step": 10826 + }, + { + "epoch": 3.29, + "learning_rate": 8.018127624455646e-05, + "loss": 0.0538, + "step": 10827 + }, + { + "epoch": 3.29, + "learning_rate": 8.015568985083975e-05, + "loss": 0.0062, + "step": 10828 + }, + { + "epoch": 3.29, + "learning_rate": 8.013010605163424e-05, + "loss": 0.0399, + "step": 10829 + }, + { + "epoch": 3.29, + "learning_rate": 8.010452484789044e-05, + "loss": 0.0386, + "step": 10830 + }, + { + "epoch": 3.29, + "learning_rate": 8.007894624055854e-05, + "loss": 0.0389, + "step": 10831 + }, + { + "epoch": 3.29, + "learning_rate": 8.005337023058874e-05, + "loss": 0.0558, + "step": 10832 + }, + { + "epoch": 3.29, + "learning_rate": 8.002779681893123e-05, + "loss": 0.024, + "step": 10833 + }, + { + "epoch": 3.29, + "learning_rate": 8.000222600653576e-05, + "loss": 0.0203, + "step": 10834 + }, + { + "epoch": 3.29, + "learning_rate": 7.997665779435239e-05, + "loss": 0.0655, + "step": 10835 + }, + { + "epoch": 3.29, + "learning_rate": 7.995109218333079e-05, + "loss": 0.0246, + "step": 10836 + }, + { + "epoch": 3.29, + "learning_rate": 7.992552917442073e-05, + "loss": 0.0665, + "step": 10837 + }, + { + "epoch": 3.29, + "learning_rate": 7.98999687685718e-05, + "loss": 0.0398, + "step": 10838 + }, + { + "epoch": 3.29, + "learning_rate": 7.98744109667334e-05, + "loss": 0.0261, + "step": 10839 + }, + { + "epoch": 3.29, + "learning_rate": 7.984885576985503e-05, + "loss": 0.0442, + "step": 10840 + }, + { + "epoch": 3.29, + "learning_rate": 7.98233031788859e-05, + "loss": 0.0348, + "step": 10841 + }, + { + "epoch": 3.29, + "learning_rate": 7.979775319477533e-05, + "loss": 0.036, + "step": 10842 + }, + { + "epoch": 3.29, + "learning_rate": 7.977220581847234e-05, + "loss": 0.008, + "step": 10843 + }, + { + "epoch": 3.29, + "learning_rate": 7.974666105092601e-05, + "loss": 0.0522, + "step": 10844 + }, + { + "epoch": 3.29, + "learning_rate": 7.972111889308512e-05, + "loss": 0.0338, + "step": 10845 + }, + { + "epoch": 3.29, + "learning_rate": 7.969557934589864e-05, + "loss": 0.0537, + "step": 10846 + }, + { + "epoch": 3.29, + "learning_rate": 7.967004241031514e-05, + "loss": 0.0352, + "step": 10847 + }, + { + "epoch": 3.29, + "learning_rate": 7.964450808728339e-05, + "loss": 0.0558, + "step": 10848 + }, + { + "epoch": 3.29, + "learning_rate": 7.961897637775185e-05, + "loss": 0.0129, + "step": 10849 + }, + { + "epoch": 3.29, + "learning_rate": 7.95934472826689e-05, + "loss": 0.0516, + "step": 10850 + }, + { + "epoch": 3.29, + "learning_rate": 7.956792080298297e-05, + "loss": 0.0127, + "step": 10851 + }, + { + "epoch": 3.29, + "learning_rate": 7.954239693964217e-05, + "loss": 0.0555, + "step": 10852 + }, + { + "epoch": 3.3, + "learning_rate": 7.951687569359479e-05, + "loss": 0.0192, + "step": 10853 + }, + { + "epoch": 3.3, + "learning_rate": 7.949135706578878e-05, + "loss": 0.0406, + "step": 10854 + }, + { + "epoch": 3.3, + "learning_rate": 7.946584105717203e-05, + "loss": 0.0334, + "step": 10855 + }, + { + "epoch": 3.3, + "learning_rate": 7.94403276686925e-05, + "loss": 0.0491, + "step": 10856 + }, + { + "epoch": 3.3, + "learning_rate": 7.941481690129784e-05, + "loss": 0.0451, + "step": 10857 + }, + { + "epoch": 3.3, + "learning_rate": 7.938930875593578e-05, + "loss": 0.0183, + "step": 10858 + }, + { + "epoch": 3.3, + "learning_rate": 7.936380323355384e-05, + "loss": 0.0548, + "step": 10859 + }, + { + "epoch": 3.3, + "learning_rate": 7.933830033509947e-05, + "loss": 0.0196, + "step": 10860 + }, + { + "epoch": 3.3, + "learning_rate": 7.931280006151998e-05, + "loss": 0.0726, + "step": 10861 + }, + { + "epoch": 3.3, + "learning_rate": 7.928730241376273e-05, + "loss": 0.0355, + "step": 10862 + }, + { + "epoch": 3.3, + "learning_rate": 7.926180739277476e-05, + "loss": 0.0624, + "step": 10863 + }, + { + "epoch": 3.3, + "learning_rate": 7.923631499950331e-05, + "loss": 0.0498, + "step": 10864 + }, + { + "epoch": 3.3, + "learning_rate": 7.921082523489521e-05, + "loss": 0.0481, + "step": 10865 + }, + { + "epoch": 3.3, + "learning_rate": 7.918533809989732e-05, + "loss": 0.046, + "step": 10866 + }, + { + "epoch": 3.3, + "learning_rate": 7.915985359545652e-05, + "loss": 0.0604, + "step": 10867 + }, + { + "epoch": 3.3, + "learning_rate": 7.913437172251933e-05, + "loss": 0.0584, + "step": 10868 + }, + { + "epoch": 3.3, + "learning_rate": 7.910889248203257e-05, + "loss": 0.0253, + "step": 10869 + }, + { + "epoch": 3.3, + "learning_rate": 7.908341587494244e-05, + "loss": 0.0153, + "step": 10870 + }, + { + "epoch": 3.3, + "learning_rate": 7.905794190219549e-05, + "loss": 0.0422, + "step": 10871 + }, + { + "epoch": 3.3, + "learning_rate": 7.903247056473794e-05, + "loss": 0.0335, + "step": 10872 + }, + { + "epoch": 3.3, + "learning_rate": 7.900700186351597e-05, + "loss": 0.0236, + "step": 10873 + }, + { + "epoch": 3.3, + "learning_rate": 7.898153579947581e-05, + "loss": 0.0256, + "step": 10874 + }, + { + "epoch": 3.3, + "learning_rate": 7.895607237356325e-05, + "loss": 0.0383, + "step": 10875 + }, + { + "epoch": 3.3, + "learning_rate": 7.893061158672429e-05, + "loss": 0.0477, + "step": 10876 + }, + { + "epoch": 3.3, + "learning_rate": 7.890515343990467e-05, + "loss": 0.0228, + "step": 10877 + }, + { + "epoch": 3.3, + "learning_rate": 7.887969793405014e-05, + "loss": 0.0041, + "step": 10878 + }, + { + "epoch": 3.3, + "learning_rate": 7.88542450701063e-05, + "loss": 0.0312, + "step": 10879 + }, + { + "epoch": 3.3, + "learning_rate": 7.882879484901857e-05, + "loss": 0.0512, + "step": 10880 + }, + { + "epoch": 3.3, + "learning_rate": 7.880334727173245e-05, + "loss": 0.0786, + "step": 10881 + }, + { + "epoch": 3.3, + "learning_rate": 7.877790233919313e-05, + "loss": 0.0626, + "step": 10882 + }, + { + "epoch": 3.3, + "learning_rate": 7.875246005234594e-05, + "loss": 0.0097, + "step": 10883 + }, + { + "epoch": 3.3, + "learning_rate": 7.872702041213594e-05, + "loss": 0.0492, + "step": 10884 + }, + { + "epoch": 3.3, + "learning_rate": 7.870158341950813e-05, + "loss": 0.0446, + "step": 10885 + }, + { + "epoch": 3.31, + "learning_rate": 7.867614907540735e-05, + "loss": 0.0471, + "step": 10886 + }, + { + "epoch": 3.31, + "learning_rate": 7.865071738077853e-05, + "loss": 0.0412, + "step": 10887 + }, + { + "epoch": 3.31, + "learning_rate": 7.862528833656628e-05, + "loss": 0.0586, + "step": 10888 + }, + { + "epoch": 3.31, + "learning_rate": 7.85998619437153e-05, + "loss": 0.0628, + "step": 10889 + }, + { + "epoch": 3.31, + "learning_rate": 7.857443820317009e-05, + "loss": 0.0613, + "step": 10890 + }, + { + "epoch": 3.31, + "learning_rate": 7.854901711587498e-05, + "loss": 0.0772, + "step": 10891 + }, + { + "epoch": 3.31, + "learning_rate": 7.852359868277441e-05, + "loss": 0.0128, + "step": 10892 + }, + { + "epoch": 3.31, + "learning_rate": 7.84981829048125e-05, + "loss": 0.0174, + "step": 10893 + }, + { + "epoch": 3.31, + "learning_rate": 7.847276978293354e-05, + "loss": 0.0425, + "step": 10894 + }, + { + "epoch": 3.31, + "learning_rate": 7.84473593180813e-05, + "loss": 0.0305, + "step": 10895 + }, + { + "epoch": 3.31, + "learning_rate": 7.842195151119984e-05, + "loss": 0.059, + "step": 10896 + }, + { + "epoch": 3.31, + "learning_rate": 7.839654636323303e-05, + "loss": 0.0398, + "step": 10897 + }, + { + "epoch": 3.31, + "learning_rate": 7.837114387512451e-05, + "loss": 0.0506, + "step": 10898 + }, + { + "epoch": 3.31, + "learning_rate": 7.834574404781805e-05, + "loss": 0.0462, + "step": 10899 + }, + { + "epoch": 3.31, + "learning_rate": 7.832034688225695e-05, + "loss": 0.0768, + "step": 10900 + }, + { + "epoch": 3.31, + "learning_rate": 7.829495237938481e-05, + "loss": 0.0506, + "step": 10901 + }, + { + "epoch": 3.31, + "learning_rate": 7.826956054014489e-05, + "loss": 0.0483, + "step": 10902 + }, + { + "epoch": 3.31, + "learning_rate": 7.82441713654805e-05, + "loss": 0.047, + "step": 10903 + }, + { + "epoch": 3.31, + "learning_rate": 7.821878485633471e-05, + "loss": 0.0455, + "step": 10904 + }, + { + "epoch": 3.31, + "learning_rate": 7.819340101365051e-05, + "loss": 0.0447, + "step": 10905 + }, + { + "epoch": 3.31, + "learning_rate": 7.816801983837094e-05, + "loss": 0.0347, + "step": 10906 + }, + { + "epoch": 3.31, + "learning_rate": 7.814264133143875e-05, + "loss": 0.0711, + "step": 10907 + }, + { + "epoch": 3.31, + "learning_rate": 7.811726549379676e-05, + "loss": 0.051, + "step": 10908 + }, + { + "epoch": 3.31, + "learning_rate": 7.809189232638757e-05, + "loss": 0.0346, + "step": 10909 + }, + { + "epoch": 3.31, + "learning_rate": 7.806652183015372e-05, + "loss": 0.0384, + "step": 10910 + }, + { + "epoch": 3.31, + "learning_rate": 7.804115400603756e-05, + "loss": 0.0416, + "step": 10911 + }, + { + "epoch": 3.31, + "learning_rate": 7.801578885498156e-05, + "loss": 0.0613, + "step": 10912 + }, + { + "epoch": 3.31, + "learning_rate": 7.79904263779279e-05, + "loss": 0.0529, + "step": 10913 + }, + { + "epoch": 3.31, + "learning_rate": 7.796506657581875e-05, + "loss": 0.0363, + "step": 10914 + }, + { + "epoch": 3.31, + "learning_rate": 7.793970944959614e-05, + "loss": 0.0299, + "step": 10915 + }, + { + "epoch": 3.31, + "learning_rate": 7.791435500020196e-05, + "loss": 0.0428, + "step": 10916 + }, + { + "epoch": 3.31, + "learning_rate": 7.788900322857817e-05, + "loss": 0.0259, + "step": 10917 + }, + { + "epoch": 3.32, + "learning_rate": 7.786365413566639e-05, + "loss": 0.0208, + "step": 10918 + }, + { + "epoch": 3.32, + "learning_rate": 7.783830772240836e-05, + "loss": 0.044, + "step": 10919 + }, + { + "epoch": 3.32, + "learning_rate": 7.781296398974559e-05, + "loss": 0.0138, + "step": 10920 + }, + { + "epoch": 3.32, + "learning_rate": 7.778762293861949e-05, + "loss": 0.0434, + "step": 10921 + }, + { + "epoch": 3.32, + "learning_rate": 7.776228456997146e-05, + "loss": 0.0545, + "step": 10922 + }, + { + "epoch": 3.32, + "learning_rate": 7.773694888474267e-05, + "loss": 0.0284, + "step": 10923 + }, + { + "epoch": 3.32, + "learning_rate": 7.77116158838744e-05, + "loss": 0.0681, + "step": 10924 + }, + { + "epoch": 3.32, + "learning_rate": 7.768628556830761e-05, + "loss": 0.0455, + "step": 10925 + }, + { + "epoch": 3.32, + "learning_rate": 7.766095793898324e-05, + "loss": 0.0539, + "step": 10926 + }, + { + "epoch": 3.32, + "learning_rate": 7.763563299684207e-05, + "loss": 0.023, + "step": 10927 + }, + { + "epoch": 3.32, + "learning_rate": 7.761031074282502e-05, + "loss": 0.0448, + "step": 10928 + }, + { + "epoch": 3.32, + "learning_rate": 7.758499117787257e-05, + "loss": 0.0355, + "step": 10929 + }, + { + "epoch": 3.32, + "learning_rate": 7.755967430292542e-05, + "loss": 0.0698, + "step": 10930 + }, + { + "epoch": 3.32, + "learning_rate": 7.753436011892392e-05, + "loss": 0.0739, + "step": 10931 + }, + { + "epoch": 3.32, + "learning_rate": 7.75090486268084e-05, + "loss": 0.0573, + "step": 10932 + }, + { + "epoch": 3.32, + "learning_rate": 7.748373982751918e-05, + "loss": 0.0852, + "step": 10933 + }, + { + "epoch": 3.32, + "learning_rate": 7.745843372199632e-05, + "loss": 0.0297, + "step": 10934 + }, + { + "epoch": 3.32, + "learning_rate": 7.743313031118006e-05, + "loss": 0.0435, + "step": 10935 + }, + { + "epoch": 3.32, + "learning_rate": 7.740782959601008e-05, + "loss": 0.0706, + "step": 10936 + }, + { + "epoch": 3.32, + "learning_rate": 7.738253157742635e-05, + "loss": 0.0163, + "step": 10937 + }, + { + "epoch": 3.32, + "learning_rate": 7.735723625636869e-05, + "loss": 0.0323, + "step": 10938 + }, + { + "epoch": 3.32, + "learning_rate": 7.733194363377661e-05, + "loss": 0.0199, + "step": 10939 + }, + { + "epoch": 3.32, + "learning_rate": 7.730665371058985e-05, + "loss": 0.0464, + "step": 10940 + }, + { + "epoch": 3.32, + "learning_rate": 7.728136648774761e-05, + "loss": 0.0395, + "step": 10941 + }, + { + "epoch": 3.32, + "learning_rate": 7.725608196618942e-05, + "loss": 0.0242, + "step": 10942 + }, + { + "epoch": 3.32, + "learning_rate": 7.723080014685442e-05, + "loss": 0.0339, + "step": 10943 + }, + { + "epoch": 3.32, + "learning_rate": 7.720552103068185e-05, + "loss": 0.0389, + "step": 10944 + }, + { + "epoch": 3.32, + "learning_rate": 7.718024461861069e-05, + "loss": 0.0494, + "step": 10945 + }, + { + "epoch": 3.32, + "learning_rate": 7.715497091157987e-05, + "loss": 0.0512, + "step": 10946 + }, + { + "epoch": 3.32, + "learning_rate": 7.71296999105283e-05, + "loss": 0.0157, + "step": 10947 + }, + { + "epoch": 3.32, + "learning_rate": 7.710443161639464e-05, + "loss": 0.026, + "step": 10948 + }, + { + "epoch": 3.32, + "learning_rate": 7.707916603011764e-05, + "loss": 0.0373, + "step": 10949 + }, + { + "epoch": 3.32, + "learning_rate": 7.705390315263578e-05, + "loss": 0.0503, + "step": 10950 + }, + { + "epoch": 3.33, + "learning_rate": 7.702864298488751e-05, + "loss": 0.0562, + "step": 10951 + }, + { + "epoch": 3.33, + "learning_rate": 7.70033855278111e-05, + "loss": 0.0518, + "step": 10952 + }, + { + "epoch": 3.33, + "learning_rate": 7.697813078234492e-05, + "loss": 0.0295, + "step": 10953 + }, + { + "epoch": 3.33, + "learning_rate": 7.695287874942699e-05, + "loss": 0.0125, + "step": 10954 + }, + { + "epoch": 3.33, + "learning_rate": 7.692762942999545e-05, + "loss": 0.0088, + "step": 10955 + }, + { + "epoch": 3.33, + "learning_rate": 7.690238282498821e-05, + "loss": 0.0513, + "step": 10956 + }, + { + "epoch": 3.33, + "learning_rate": 7.687713893534302e-05, + "loss": 0.0404, + "step": 10957 + }, + { + "epoch": 3.33, + "learning_rate": 7.685189776199777e-05, + "loss": 0.0682, + "step": 10958 + }, + { + "epoch": 3.33, + "learning_rate": 7.682665930588993e-05, + "loss": 0.0485, + "step": 10959 + }, + { + "epoch": 3.33, + "learning_rate": 7.680142356795718e-05, + "loss": 0.0662, + "step": 10960 + }, + { + "epoch": 3.33, + "learning_rate": 7.67761905491369e-05, + "loss": 0.0225, + "step": 10961 + }, + { + "epoch": 3.33, + "learning_rate": 7.675096025036636e-05, + "loss": 0.0437, + "step": 10962 + }, + { + "epoch": 3.33, + "learning_rate": 7.672573267258289e-05, + "loss": 0.0483, + "step": 10963 + }, + { + "epoch": 3.33, + "learning_rate": 7.670050781672352e-05, + "loss": 0.0334, + "step": 10964 + }, + { + "epoch": 3.33, + "learning_rate": 7.667528568372545e-05, + "loss": 0.0201, + "step": 10965 + }, + { + "epoch": 3.33, + "learning_rate": 7.66500662745254e-05, + "loss": 0.0263, + "step": 10966 + }, + { + "epoch": 3.33, + "learning_rate": 7.662484959006032e-05, + "loss": 0.0488, + "step": 10967 + }, + { + "epoch": 3.33, + "learning_rate": 7.659963563126686e-05, + "loss": 0.0609, + "step": 10968 + }, + { + "epoch": 3.33, + "learning_rate": 7.657442439908176e-05, + "loss": 0.018, + "step": 10969 + }, + { + "epoch": 3.33, + "learning_rate": 7.654921589444147e-05, + "loss": 0.07, + "step": 10970 + }, + { + "epoch": 3.33, + "learning_rate": 7.652401011828235e-05, + "loss": 0.0805, + "step": 10971 + }, + { + "epoch": 3.33, + "learning_rate": 7.649880707154085e-05, + "loss": 0.0297, + "step": 10972 + }, + { + "epoch": 3.33, + "learning_rate": 7.647360675515308e-05, + "loss": 0.0375, + "step": 10973 + }, + { + "epoch": 3.33, + "learning_rate": 7.644840917005524e-05, + "loss": 0.0364, + "step": 10974 + }, + { + "epoch": 3.33, + "learning_rate": 7.642321431718333e-05, + "loss": 0.0446, + "step": 10975 + }, + { + "epoch": 3.33, + "learning_rate": 7.639802219747324e-05, + "loss": 0.0627, + "step": 10976 + }, + { + "epoch": 3.33, + "learning_rate": 7.637283281186073e-05, + "loss": 0.0499, + "step": 10977 + }, + { + "epoch": 3.33, + "learning_rate": 7.634764616128158e-05, + "loss": 0.0262, + "step": 10978 + }, + { + "epoch": 3.33, + "learning_rate": 7.632246224667143e-05, + "loss": 0.013, + "step": 10979 + }, + { + "epoch": 3.33, + "learning_rate": 7.62972810689657e-05, + "loss": 0.0406, + "step": 10980 + }, + { + "epoch": 3.33, + "learning_rate": 7.627210262909997e-05, + "loss": 0.031, + "step": 10981 + }, + { + "epoch": 3.33, + "learning_rate": 7.62469269280093e-05, + "loss": 0.0553, + "step": 10982 + }, + { + "epoch": 3.33, + "learning_rate": 7.622175396662907e-05, + "loss": 0.034, + "step": 10983 + }, + { + "epoch": 3.34, + "learning_rate": 7.619658374589427e-05, + "loss": 0.0441, + "step": 10984 + }, + { + "epoch": 3.34, + "learning_rate": 7.617141626674002e-05, + "loss": 0.0708, + "step": 10985 + }, + { + "epoch": 3.34, + "learning_rate": 7.614625153010115e-05, + "loss": 0.0607, + "step": 10986 + }, + { + "epoch": 3.34, + "learning_rate": 7.612108953691243e-05, + "loss": 0.0271, + "step": 10987 + }, + { + "epoch": 3.34, + "learning_rate": 7.609593028810861e-05, + "loss": 0.0542, + "step": 10988 + }, + { + "epoch": 3.34, + "learning_rate": 7.607077378462422e-05, + "loss": 0.0198, + "step": 10989 + }, + { + "epoch": 3.34, + "learning_rate": 7.604562002739384e-05, + "loss": 0.0428, + "step": 10990 + }, + { + "epoch": 3.34, + "learning_rate": 7.60204690173518e-05, + "loss": 0.0472, + "step": 10991 + }, + { + "epoch": 3.34, + "learning_rate": 7.59953207554324e-05, + "loss": 0.0314, + "step": 10992 + }, + { + "epoch": 3.34, + "learning_rate": 7.597017524256974e-05, + "loss": 0.044, + "step": 10993 + }, + { + "epoch": 3.34, + "learning_rate": 7.594503247969805e-05, + "loss": 0.0326, + "step": 10994 + }, + { + "epoch": 3.34, + "learning_rate": 7.59198924677512e-05, + "loss": 0.0488, + "step": 10995 + }, + { + "epoch": 3.34, + "learning_rate": 7.589475520766314e-05, + "loss": 0.022, + "step": 10996 + }, + { + "epoch": 3.34, + "learning_rate": 7.586962070036762e-05, + "loss": 0.0331, + "step": 10997 + }, + { + "epoch": 3.34, + "learning_rate": 7.584448894679824e-05, + "loss": 0.0642, + "step": 10998 + }, + { + "epoch": 3.34, + "learning_rate": 7.581935994788873e-05, + "loss": 0.0274, + "step": 10999 + }, + { + "epoch": 3.34, + "learning_rate": 7.579423370457236e-05, + "loss": 0.0339, + "step": 11000 + }, + { + "epoch": 3.34, + "learning_rate": 7.576911021778268e-05, + "loss": 0.0419, + "step": 11001 + }, + { + "epoch": 3.34, + "learning_rate": 7.574398948845287e-05, + "loss": 0.0685, + "step": 11002 + }, + { + "epoch": 3.34, + "learning_rate": 7.571887151751605e-05, + "loss": 0.0391, + "step": 11003 + }, + { + "epoch": 3.34, + "learning_rate": 7.569375630590538e-05, + "loss": 0.0475, + "step": 11004 + }, + { + "epoch": 3.34, + "learning_rate": 7.566864385455369e-05, + "loss": 0.0366, + "step": 11005 + }, + { + "epoch": 3.34, + "learning_rate": 7.564353416439404e-05, + "loss": 0.0148, + "step": 11006 + }, + { + "epoch": 3.34, + "learning_rate": 7.561842723635891e-05, + "loss": 0.0499, + "step": 11007 + }, + { + "epoch": 3.34, + "learning_rate": 7.559332307138116e-05, + "loss": 0.0349, + "step": 11008 + }, + { + "epoch": 3.34, + "learning_rate": 7.556822167039323e-05, + "loss": 0.0349, + "step": 11009 + }, + { + "epoch": 3.34, + "learning_rate": 7.554312303432762e-05, + "loss": 0.0431, + "step": 11010 + }, + { + "epoch": 3.34, + "learning_rate": 7.551802716411665e-05, + "loss": 0.0439, + "step": 11011 + }, + { + "epoch": 3.34, + "learning_rate": 7.549293406069248e-05, + "loss": 0.0222, + "step": 11012 + }, + { + "epoch": 3.34, + "learning_rate": 7.546784372498739e-05, + "loss": 0.0439, + "step": 11013 + }, + { + "epoch": 3.34, + "learning_rate": 7.544275615793329e-05, + "loss": 0.0431, + "step": 11014 + }, + { + "epoch": 3.34, + "learning_rate": 7.541767136046223e-05, + "loss": 0.055, + "step": 11015 + }, + { + "epoch": 3.34, + "learning_rate": 7.539258933350596e-05, + "loss": 0.0453, + "step": 11016 + }, + { + "epoch": 3.35, + "learning_rate": 7.536751007799621e-05, + "loss": 0.0824, + "step": 11017 + }, + { + "epoch": 3.35, + "learning_rate": 7.534243359486455e-05, + "loss": 0.0353, + "step": 11018 + }, + { + "epoch": 3.35, + "learning_rate": 7.531735988504254e-05, + "loss": 0.0348, + "step": 11019 + }, + { + "epoch": 3.35, + "learning_rate": 7.529228894946167e-05, + "loss": 0.0127, + "step": 11020 + }, + { + "epoch": 3.35, + "learning_rate": 7.526722078905319e-05, + "loss": 0.0446, + "step": 11021 + }, + { + "epoch": 3.35, + "learning_rate": 7.524215540474828e-05, + "loss": 0.0603, + "step": 11022 + }, + { + "epoch": 3.35, + "learning_rate": 7.521709279747803e-05, + "loss": 0.0633, + "step": 11023 + }, + { + "epoch": 3.35, + "learning_rate": 7.519203296817354e-05, + "loss": 0.061, + "step": 11024 + }, + { + "epoch": 3.35, + "learning_rate": 7.516697591776562e-05, + "loss": 0.065, + "step": 11025 + }, + { + "epoch": 3.35, + "learning_rate": 7.514192164718512e-05, + "loss": 0.0676, + "step": 11026 + }, + { + "epoch": 3.35, + "learning_rate": 7.511687015736274e-05, + "loss": 0.035, + "step": 11027 + }, + { + "epoch": 3.35, + "learning_rate": 7.509182144922897e-05, + "loss": 0.0567, + "step": 11028 + }, + { + "epoch": 3.35, + "learning_rate": 7.506677552371441e-05, + "loss": 0.0388, + "step": 11029 + }, + { + "epoch": 3.35, + "learning_rate": 7.504173238174936e-05, + "loss": 0.0626, + "step": 11030 + }, + { + "epoch": 3.35, + "learning_rate": 7.501669202426426e-05, + "loss": 0.0573, + "step": 11031 + }, + { + "epoch": 3.35, + "learning_rate": 7.499165445218903e-05, + "loss": 0.0258, + "step": 11032 + }, + { + "epoch": 3.35, + "learning_rate": 7.496661966645393e-05, + "loss": 0.0418, + "step": 11033 + }, + { + "epoch": 3.35, + "learning_rate": 7.494158766798884e-05, + "loss": 0.0344, + "step": 11034 + }, + { + "epoch": 3.35, + "learning_rate": 7.491655845772372e-05, + "loss": 0.0514, + "step": 11035 + }, + { + "epoch": 3.35, + "learning_rate": 7.489153203658826e-05, + "loss": 0.0545, + "step": 11036 + }, + { + "epoch": 3.35, + "learning_rate": 7.486650840551208e-05, + "loss": 0.0584, + "step": 11037 + }, + { + "epoch": 3.35, + "learning_rate": 7.484148756542484e-05, + "loss": 0.0302, + "step": 11038 + }, + { + "epoch": 3.35, + "learning_rate": 7.481646951725588e-05, + "loss": 0.0312, + "step": 11039 + }, + { + "epoch": 3.35, + "learning_rate": 7.479145426193466e-05, + "loss": 0.0448, + "step": 11040 + }, + { + "epoch": 3.35, + "learning_rate": 7.476644180039036e-05, + "loss": 0.0216, + "step": 11041 + }, + { + "epoch": 3.35, + "learning_rate": 7.474143213355208e-05, + "loss": 0.0533, + "step": 11042 + }, + { + "epoch": 3.35, + "learning_rate": 7.471642526234897e-05, + "loss": 0.0236, + "step": 11043 + }, + { + "epoch": 3.35, + "learning_rate": 7.469142118770981e-05, + "loss": 0.0653, + "step": 11044 + }, + { + "epoch": 3.35, + "learning_rate": 7.466641991056359e-05, + "loss": 0.0317, + "step": 11045 + }, + { + "epoch": 3.35, + "learning_rate": 7.464142143183888e-05, + "loss": 0.082, + "step": 11046 + }, + { + "epoch": 3.35, + "learning_rate": 7.461642575246451e-05, + "loss": 0.068, + "step": 11047 + }, + { + "epoch": 3.35, + "learning_rate": 7.459143287336875e-05, + "loss": 0.0295, + "step": 11048 + }, + { + "epoch": 3.35, + "learning_rate": 7.456644279548016e-05, + "loss": 0.0348, + "step": 11049 + }, + { + "epoch": 3.36, + "learning_rate": 7.454145551972698e-05, + "loss": 0.0713, + "step": 11050 + }, + { + "epoch": 3.36, + "learning_rate": 7.451647104703748e-05, + "loss": 0.0706, + "step": 11051 + }, + { + "epoch": 3.36, + "learning_rate": 7.449148937833975e-05, + "loss": 0.0191, + "step": 11052 + }, + { + "epoch": 3.36, + "learning_rate": 7.44665105145617e-05, + "loss": 0.0334, + "step": 11053 + }, + { + "epoch": 3.36, + "learning_rate": 7.444153445663132e-05, + "loss": 0.0269, + "step": 11054 + }, + { + "epoch": 3.36, + "learning_rate": 7.441656120547634e-05, + "loss": 0.0682, + "step": 11055 + }, + { + "epoch": 3.36, + "learning_rate": 7.43915907620245e-05, + "loss": 0.0535, + "step": 11056 + }, + { + "epoch": 3.36, + "learning_rate": 7.436662312720335e-05, + "loss": 0.0368, + "step": 11057 + }, + { + "epoch": 3.36, + "learning_rate": 7.434165830194034e-05, + "loss": 0.0472, + "step": 11058 + }, + { + "epoch": 3.36, + "learning_rate": 7.431669628716281e-05, + "loss": 0.0681, + "step": 11059 + }, + { + "epoch": 3.36, + "learning_rate": 7.429173708379809e-05, + "loss": 0.0023, + "step": 11060 + }, + { + "epoch": 3.36, + "learning_rate": 7.426678069277338e-05, + "loss": 0.0706, + "step": 11061 + }, + { + "epoch": 3.36, + "learning_rate": 7.424182711501566e-05, + "loss": 0.0199, + "step": 11062 + }, + { + "epoch": 3.36, + "learning_rate": 7.421687635145193e-05, + "loss": 0.0392, + "step": 11063 + }, + { + "epoch": 3.36, + "learning_rate": 7.419192840300893e-05, + "loss": 0.0598, + "step": 11064 + }, + { + "epoch": 3.36, + "learning_rate": 7.416698327061355e-05, + "loss": 0.0557, + "step": 11065 + }, + { + "epoch": 3.36, + "learning_rate": 7.41420409551923e-05, + "loss": 0.0362, + "step": 11066 + }, + { + "epoch": 3.36, + "learning_rate": 7.411710145767185e-05, + "loss": 0.0455, + "step": 11067 + }, + { + "epoch": 3.36, + "learning_rate": 7.409216477897856e-05, + "loss": 0.0386, + "step": 11068 + }, + { + "epoch": 3.36, + "learning_rate": 7.406723092003867e-05, + "loss": 0.0365, + "step": 11069 + }, + { + "epoch": 3.36, + "learning_rate": 7.404229988177856e-05, + "loss": 0.031, + "step": 11070 + }, + { + "epoch": 3.36, + "learning_rate": 7.40173716651242e-05, + "loss": 0.0301, + "step": 11071 + }, + { + "epoch": 3.36, + "learning_rate": 7.399244627100177e-05, + "loss": 0.0141, + "step": 11072 + }, + { + "epoch": 3.36, + "learning_rate": 7.396752370033697e-05, + "loss": 0.032, + "step": 11073 + }, + { + "epoch": 3.36, + "learning_rate": 7.394260395405574e-05, + "loss": 0.0255, + "step": 11074 + }, + { + "epoch": 3.36, + "learning_rate": 7.39176870330837e-05, + "loss": 0.0231, + "step": 11075 + }, + { + "epoch": 3.36, + "learning_rate": 7.389277293834652e-05, + "loss": 0.0387, + "step": 11076 + }, + { + "epoch": 3.36, + "learning_rate": 7.386786167076965e-05, + "loss": 0.0067, + "step": 11077 + }, + { + "epoch": 3.36, + "learning_rate": 7.384295323127839e-05, + "loss": 0.0287, + "step": 11078 + }, + { + "epoch": 3.36, + "learning_rate": 7.381804762079816e-05, + "loss": 0.0427, + "step": 11079 + }, + { + "epoch": 3.36, + "learning_rate": 7.3793144840254e-05, + "loss": 0.0189, + "step": 11080 + }, + { + "epoch": 3.36, + "learning_rate": 7.376824489057111e-05, + "loss": 0.0357, + "step": 11081 + }, + { + "epoch": 3.36, + "learning_rate": 7.374334777267436e-05, + "loss": 0.0149, + "step": 11082 + }, + { + "epoch": 3.37, + "learning_rate": 7.371845348748854e-05, + "loss": 0.0706, + "step": 11083 + }, + { + "epoch": 3.37, + "learning_rate": 7.369356203593857e-05, + "loss": 0.0245, + "step": 11084 + }, + { + "epoch": 3.37, + "learning_rate": 7.366867341894893e-05, + "loss": 0.0273, + "step": 11085 + }, + { + "epoch": 3.37, + "learning_rate": 7.364378763744429e-05, + "loss": 0.0695, + "step": 11086 + }, + { + "epoch": 3.37, + "learning_rate": 7.361890469234903e-05, + "loss": 0.0418, + "step": 11087 + }, + { + "epoch": 3.37, + "learning_rate": 7.359402458458747e-05, + "loss": 0.0334, + "step": 11088 + }, + { + "epoch": 3.37, + "learning_rate": 7.35691473150838e-05, + "loss": 0.0215, + "step": 11089 + }, + { + "epoch": 3.37, + "learning_rate": 7.354427288476222e-05, + "loss": 0.0449, + "step": 11090 + }, + { + "epoch": 3.37, + "learning_rate": 7.351940129454664e-05, + "loss": 0.0354, + "step": 11091 + }, + { + "epoch": 3.37, + "learning_rate": 7.349453254536108e-05, + "loss": 0.0422, + "step": 11092 + }, + { + "epoch": 3.37, + "learning_rate": 7.346966663812929e-05, + "loss": 0.0444, + "step": 11093 + }, + { + "epoch": 3.37, + "learning_rate": 7.34448035737749e-05, + "loss": 0.0261, + "step": 11094 + }, + { + "epoch": 3.37, + "learning_rate": 7.341994335322161e-05, + "loss": 0.112, + "step": 11095 + }, + { + "epoch": 3.37, + "learning_rate": 7.33950859773928e-05, + "loss": 0.0301, + "step": 11096 + }, + { + "epoch": 3.37, + "learning_rate": 7.337023144721203e-05, + "loss": 0.0491, + "step": 11097 + }, + { + "epoch": 3.37, + "learning_rate": 7.334537976360233e-05, + "loss": 0.0634, + "step": 11098 + }, + { + "epoch": 3.37, + "learning_rate": 7.332053092748704e-05, + "loss": 0.0796, + "step": 11099 + }, + { + "epoch": 3.37, + "learning_rate": 7.32956849397891e-05, + "loss": 0.0681, + "step": 11100 + }, + { + "epoch": 3.37, + "learning_rate": 7.327084180143155e-05, + "loss": 0.0485, + "step": 11101 + }, + { + "epoch": 3.37, + "learning_rate": 7.324600151333732e-05, + "loss": 0.0312, + "step": 11102 + }, + { + "epoch": 3.37, + "learning_rate": 7.322116407642894e-05, + "loss": 0.0759, + "step": 11103 + }, + { + "epoch": 3.37, + "learning_rate": 7.319632949162921e-05, + "loss": 0.0352, + "step": 11104 + }, + { + "epoch": 3.37, + "learning_rate": 7.317149775986059e-05, + "loss": 0.051, + "step": 11105 + }, + { + "epoch": 3.37, + "learning_rate": 7.314666888204556e-05, + "loss": 0.0361, + "step": 11106 + }, + { + "epoch": 3.37, + "learning_rate": 7.312184285910636e-05, + "loss": 0.0565, + "step": 11107 + }, + { + "epoch": 3.37, + "learning_rate": 7.309701969196531e-05, + "loss": 0.0357, + "step": 11108 + }, + { + "epoch": 3.37, + "learning_rate": 7.307219938154446e-05, + "loss": 0.0652, + "step": 11109 + }, + { + "epoch": 3.37, + "learning_rate": 7.304738192876575e-05, + "loss": 0.0415, + "step": 11110 + }, + { + "epoch": 3.37, + "learning_rate": 7.302256733455121e-05, + "loss": 0.0375, + "step": 11111 + }, + { + "epoch": 3.37, + "learning_rate": 7.29977555998225e-05, + "loss": 0.0309, + "step": 11112 + }, + { + "epoch": 3.37, + "learning_rate": 7.297294672550149e-05, + "loss": 0.0569, + "step": 11113 + }, + { + "epoch": 3.37, + "learning_rate": 7.29481407125095e-05, + "loss": 0.0389, + "step": 11114 + }, + { + "epoch": 3.37, + "learning_rate": 7.292333756176818e-05, + "loss": 0.0427, + "step": 11115 + }, + { + "epoch": 3.38, + "learning_rate": 7.28985372741988e-05, + "loss": 0.034, + "step": 11116 + }, + { + "epoch": 3.38, + "learning_rate": 7.287373985072272e-05, + "loss": 0.0494, + "step": 11117 + }, + { + "epoch": 3.38, + "learning_rate": 7.284894529226102e-05, + "loss": 0.0347, + "step": 11118 + }, + { + "epoch": 3.38, + "learning_rate": 7.28241535997347e-05, + "loss": 0.0223, + "step": 11119 + }, + { + "epoch": 3.38, + "learning_rate": 7.279936477406483e-05, + "loss": 0.067, + "step": 11120 + }, + { + "epoch": 3.38, + "learning_rate": 7.277457881617211e-05, + "loss": 0.0592, + "step": 11121 + }, + { + "epoch": 3.38, + "learning_rate": 7.274979572697738e-05, + "loss": 0.0548, + "step": 11122 + }, + { + "epoch": 3.38, + "learning_rate": 7.272501550740121e-05, + "loss": 0.015, + "step": 11123 + }, + { + "epoch": 3.38, + "learning_rate": 7.270023815836405e-05, + "loss": 0.0519, + "step": 11124 + }, + { + "epoch": 3.38, + "learning_rate": 7.26754636807864e-05, + "loss": 0.0279, + "step": 11125 + }, + { + "epoch": 3.38, + "learning_rate": 7.265069207558849e-05, + "loss": 0.0256, + "step": 11126 + }, + { + "epoch": 3.38, + "learning_rate": 7.262592334369057e-05, + "loss": 0.0253, + "step": 11127 + }, + { + "epoch": 3.38, + "learning_rate": 7.260115748601273e-05, + "loss": 0.0598, + "step": 11128 + }, + { + "epoch": 3.38, + "learning_rate": 7.257639450347491e-05, + "loss": 0.0779, + "step": 11129 + }, + { + "epoch": 3.38, + "learning_rate": 7.255163439699693e-05, + "loss": 0.034, + "step": 11130 + }, + { + "epoch": 3.38, + "learning_rate": 7.252687716749867e-05, + "loss": 0.0602, + "step": 11131 + }, + { + "epoch": 3.38, + "learning_rate": 7.25021228158997e-05, + "loss": 0.0296, + "step": 11132 + }, + { + "epoch": 3.38, + "learning_rate": 7.247737134311966e-05, + "loss": 0.0231, + "step": 11133 + }, + { + "epoch": 3.38, + "learning_rate": 7.245262275007792e-05, + "loss": 0.0447, + "step": 11134 + }, + { + "epoch": 3.38, + "learning_rate": 7.242787703769379e-05, + "loss": 0.0299, + "step": 11135 + }, + { + "epoch": 3.38, + "learning_rate": 7.24031342068866e-05, + "loss": 0.0643, + "step": 11136 + }, + { + "epoch": 3.38, + "learning_rate": 7.237839425857539e-05, + "loss": 0.0262, + "step": 11137 + }, + { + "epoch": 3.38, + "learning_rate": 7.23536571936793e-05, + "loss": 0.056, + "step": 11138 + }, + { + "epoch": 3.38, + "learning_rate": 7.232892301311704e-05, + "loss": 0.0334, + "step": 11139 + }, + { + "epoch": 3.38, + "learning_rate": 7.23041917178076e-05, + "loss": 0.0495, + "step": 11140 + }, + { + "epoch": 3.38, + "learning_rate": 7.227946330866952e-05, + "loss": 0.0877, + "step": 11141 + }, + { + "epoch": 3.38, + "learning_rate": 7.225473778662147e-05, + "loss": 0.0515, + "step": 11142 + }, + { + "epoch": 3.38, + "learning_rate": 7.223001515258203e-05, + "loss": 0.0876, + "step": 11143 + }, + { + "epoch": 3.38, + "learning_rate": 7.220529540746935e-05, + "loss": 0.0225, + "step": 11144 + }, + { + "epoch": 3.38, + "learning_rate": 7.21805785522019e-05, + "loss": 0.0318, + "step": 11145 + }, + { + "epoch": 3.38, + "learning_rate": 7.215586458769768e-05, + "loss": 0.0553, + "step": 11146 + }, + { + "epoch": 3.38, + "learning_rate": 7.213115351487486e-05, + "loss": 0.0293, + "step": 11147 + }, + { + "epoch": 3.38, + "learning_rate": 7.210644533465136e-05, + "loss": 0.0618, + "step": 11148 + }, + { + "epoch": 3.39, + "learning_rate": 7.208174004794491e-05, + "loss": 0.021, + "step": 11149 + }, + { + "epoch": 3.39, + "learning_rate": 7.205703765567341e-05, + "loss": 0.0564, + "step": 11150 + }, + { + "epoch": 3.39, + "learning_rate": 7.203233815875431e-05, + "loss": 0.0237, + "step": 11151 + }, + { + "epoch": 3.39, + "learning_rate": 7.20076415581053e-05, + "loss": 0.0461, + "step": 11152 + }, + { + "epoch": 3.39, + "learning_rate": 7.198294785464367e-05, + "loss": 0.0511, + "step": 11153 + }, + { + "epoch": 3.39, + "learning_rate": 7.195825704928675e-05, + "loss": 0.0482, + "step": 11154 + }, + { + "epoch": 3.39, + "learning_rate": 7.193356914295167e-05, + "loss": 0.0439, + "step": 11155 + }, + { + "epoch": 3.39, + "learning_rate": 7.190888413655562e-05, + "loss": 0.0393, + "step": 11156 + }, + { + "epoch": 3.39, + "learning_rate": 7.188420203101548e-05, + "loss": 0.0616, + "step": 11157 + }, + { + "epoch": 3.39, + "learning_rate": 7.185952282724821e-05, + "loss": 0.038, + "step": 11158 + }, + { + "epoch": 3.39, + "learning_rate": 7.183484652617051e-05, + "loss": 0.0986, + "step": 11159 + }, + { + "epoch": 3.39, + "learning_rate": 7.1810173128699e-05, + "loss": 0.0352, + "step": 11160 + }, + { + "epoch": 3.39, + "learning_rate": 7.178550263575034e-05, + "loss": 0.011, + "step": 11161 + }, + { + "epoch": 3.39, + "learning_rate": 7.176083504824082e-05, + "loss": 0.0743, + "step": 11162 + }, + { + "epoch": 3.39, + "learning_rate": 7.173617036708697e-05, + "loss": 0.0196, + "step": 11163 + }, + { + "epoch": 3.39, + "learning_rate": 7.171150859320475e-05, + "loss": 0.0481, + "step": 11164 + }, + { + "epoch": 3.39, + "learning_rate": 7.168684972751042e-05, + "loss": 0.0502, + "step": 11165 + }, + { + "epoch": 3.39, + "learning_rate": 7.166219377092002e-05, + "loss": 0.0724, + "step": 11166 + }, + { + "epoch": 3.39, + "learning_rate": 7.163754072434936e-05, + "loss": 0.0763, + "step": 11167 + }, + { + "epoch": 3.39, + "learning_rate": 7.161289058871431e-05, + "loss": 0.0662, + "step": 11168 + }, + { + "epoch": 3.39, + "learning_rate": 7.15882433649305e-05, + "loss": 0.0388, + "step": 11169 + }, + { + "epoch": 3.39, + "learning_rate": 7.156359905391352e-05, + "loss": 0.0367, + "step": 11170 + }, + { + "epoch": 3.39, + "learning_rate": 7.153895765657874e-05, + "loss": 0.0377, + "step": 11171 + }, + { + "epoch": 3.39, + "learning_rate": 7.151431917384167e-05, + "loss": 0.0313, + "step": 11172 + }, + { + "epoch": 3.39, + "learning_rate": 7.148968360661746e-05, + "loss": 0.0283, + "step": 11173 + }, + { + "epoch": 3.39, + "learning_rate": 7.146505095582129e-05, + "loss": 0.0191, + "step": 11174 + }, + { + "epoch": 3.39, + "learning_rate": 7.144042122236819e-05, + "loss": 0.0457, + "step": 11175 + }, + { + "epoch": 3.39, + "learning_rate": 7.141579440717301e-05, + "loss": 0.0432, + "step": 11176 + }, + { + "epoch": 3.39, + "learning_rate": 7.139117051115069e-05, + "loss": 0.0547, + "step": 11177 + }, + { + "epoch": 3.39, + "learning_rate": 7.136654953521579e-05, + "loss": 0.0472, + "step": 11178 + }, + { + "epoch": 3.39, + "learning_rate": 7.13419314802831e-05, + "loss": 0.0422, + "step": 11179 + }, + { + "epoch": 3.39, + "learning_rate": 7.13173163472669e-05, + "loss": 0.0475, + "step": 11180 + }, + { + "epoch": 3.39, + "learning_rate": 7.129270413708168e-05, + "loss": 0.0403, + "step": 11181 + }, + { + "epoch": 3.4, + "learning_rate": 7.126809485064169e-05, + "loss": 0.0095, + "step": 11182 + }, + { + "epoch": 3.4, + "learning_rate": 7.124348848886105e-05, + "loss": 0.0453, + "step": 11183 + }, + { + "epoch": 3.4, + "learning_rate": 7.121888505265399e-05, + "loss": 0.0095, + "step": 11184 + }, + { + "epoch": 3.4, + "learning_rate": 7.119428454293423e-05, + "loss": 0.033, + "step": 11185 + }, + { + "epoch": 3.4, + "learning_rate": 7.116968696061573e-05, + "loss": 0.0551, + "step": 11186 + }, + { + "epoch": 3.4, + "learning_rate": 7.114509230661215e-05, + "loss": 0.0644, + "step": 11187 + }, + { + "epoch": 3.4, + "learning_rate": 7.112050058183722e-05, + "loss": 0.0283, + "step": 11188 + }, + { + "epoch": 3.4, + "learning_rate": 7.109591178720438e-05, + "loss": 0.0513, + "step": 11189 + }, + { + "epoch": 3.4, + "learning_rate": 7.107132592362695e-05, + "loss": 0.0561, + "step": 11190 + }, + { + "epoch": 3.4, + "learning_rate": 7.104674299201838e-05, + "loss": 0.0558, + "step": 11191 + }, + { + "epoch": 3.4, + "learning_rate": 7.102216299329173e-05, + "loss": 0.0682, + "step": 11192 + }, + { + "epoch": 3.4, + "learning_rate": 7.099758592836016e-05, + "loss": 0.0382, + "step": 11193 + }, + { + "epoch": 3.4, + "learning_rate": 7.097301179813663e-05, + "loss": 0.0458, + "step": 11194 + }, + { + "epoch": 3.4, + "learning_rate": 7.094844060353394e-05, + "loss": 0.048, + "step": 11195 + }, + { + "epoch": 3.4, + "learning_rate": 7.092387234546481e-05, + "loss": 0.0494, + "step": 11196 + }, + { + "epoch": 3.4, + "learning_rate": 7.0899307024842e-05, + "loss": 0.034, + "step": 11197 + }, + { + "epoch": 3.4, + "learning_rate": 7.087474464257792e-05, + "loss": 0.0292, + "step": 11198 + }, + { + "epoch": 3.4, + "learning_rate": 7.085018519958508e-05, + "loss": 0.0546, + "step": 11199 + }, + { + "epoch": 3.4, + "learning_rate": 7.082562869677578e-05, + "loss": 0.018, + "step": 11200 + }, + { + "epoch": 3.4, + "learning_rate": 7.080107513506214e-05, + "loss": 0.0583, + "step": 11201 + }, + { + "epoch": 3.4, + "learning_rate": 7.077652451535635e-05, + "loss": 0.0591, + "step": 11202 + }, + { + "epoch": 3.4, + "learning_rate": 7.07519768385703e-05, + "loss": 0.0198, + "step": 11203 + }, + { + "epoch": 3.4, + "learning_rate": 7.072743210561603e-05, + "loss": 0.0657, + "step": 11204 + }, + { + "epoch": 3.4, + "learning_rate": 7.070289031740507e-05, + "loss": 0.017, + "step": 11205 + }, + { + "epoch": 3.4, + "learning_rate": 7.067835147484923e-05, + "loss": 0.0622, + "step": 11206 + }, + { + "epoch": 3.4, + "learning_rate": 7.065381557886005e-05, + "loss": 0.0253, + "step": 11207 + }, + { + "epoch": 3.4, + "learning_rate": 7.062928263034891e-05, + "loss": 0.0749, + "step": 11208 + }, + { + "epoch": 3.4, + "learning_rate": 7.060475263022727e-05, + "loss": 0.0173, + "step": 11209 + }, + { + "epoch": 3.4, + "learning_rate": 7.058022557940613e-05, + "loss": 0.0226, + "step": 11210 + }, + { + "epoch": 3.4, + "learning_rate": 7.055570147879675e-05, + "loss": 0.0764, + "step": 11211 + }, + { + "epoch": 3.4, + "learning_rate": 7.053118032931006e-05, + "loss": 0.0353, + "step": 11212 + }, + { + "epoch": 3.4, + "learning_rate": 7.050666213185703e-05, + "loss": 0.0869, + "step": 11213 + }, + { + "epoch": 3.4, + "learning_rate": 7.048214688734839e-05, + "loss": 0.0319, + "step": 11214 + }, + { + "epoch": 3.41, + "learning_rate": 7.045763459669475e-05, + "loss": 0.0461, + "step": 11215 + }, + { + "epoch": 3.41, + "learning_rate": 7.043312526080678e-05, + "loss": 0.0431, + "step": 11216 + }, + { + "epoch": 3.41, + "learning_rate": 7.040861888059482e-05, + "loss": 0.0576, + "step": 11217 + }, + { + "epoch": 3.41, + "learning_rate": 7.038411545696935e-05, + "loss": 0.03, + "step": 11218 + }, + { + "epoch": 3.41, + "learning_rate": 7.03596149908405e-05, + "loss": 0.037, + "step": 11219 + }, + { + "epoch": 3.41, + "learning_rate": 7.03351174831184e-05, + "loss": 0.0607, + "step": 11220 + }, + { + "epoch": 3.41, + "learning_rate": 7.031062293471305e-05, + "loss": 0.0204, + "step": 11221 + }, + { + "epoch": 3.41, + "learning_rate": 7.028613134653438e-05, + "loss": 0.0566, + "step": 11222 + }, + { + "epoch": 3.41, + "learning_rate": 7.026164271949215e-05, + "loss": 0.048, + "step": 11223 + }, + { + "epoch": 3.41, + "learning_rate": 7.02371570544961e-05, + "loss": 0.0184, + "step": 11224 + }, + { + "epoch": 3.41, + "learning_rate": 7.021267435245578e-05, + "loss": 0.0391, + "step": 11225 + }, + { + "epoch": 3.41, + "learning_rate": 7.018819461428057e-05, + "loss": 0.1524, + "step": 11226 + }, + { + "epoch": 3.41, + "learning_rate": 7.016371784087993e-05, + "loss": 0.0282, + "step": 11227 + }, + { + "epoch": 3.41, + "learning_rate": 7.0139244033163e-05, + "loss": 0.0322, + "step": 11228 + }, + { + "epoch": 3.41, + "learning_rate": 7.011477319203904e-05, + "loss": 0.0181, + "step": 11229 + }, + { + "epoch": 3.41, + "learning_rate": 7.009030531841697e-05, + "loss": 0.0105, + "step": 11230 + }, + { + "epoch": 3.41, + "learning_rate": 7.006584041320568e-05, + "loss": 0.0246, + "step": 11231 + }, + { + "epoch": 3.41, + "learning_rate": 7.004137847731407e-05, + "loss": 0.064, + "step": 11232 + }, + { + "epoch": 3.41, + "learning_rate": 7.001691951165072e-05, + "loss": 0.0506, + "step": 11233 + }, + { + "epoch": 3.41, + "learning_rate": 6.99924635171243e-05, + "loss": 0.0477, + "step": 11234 + }, + { + "epoch": 3.41, + "learning_rate": 6.996801049464326e-05, + "loss": 0.0353, + "step": 11235 + }, + { + "epoch": 3.41, + "learning_rate": 6.994356044511594e-05, + "loss": 0.0786, + "step": 11236 + }, + { + "epoch": 3.41, + "learning_rate": 6.991911336945051e-05, + "loss": 0.0183, + "step": 11237 + }, + { + "epoch": 3.41, + "learning_rate": 6.989466926855525e-05, + "loss": 0.0485, + "step": 11238 + }, + { + "epoch": 3.41, + "learning_rate": 6.987022814333806e-05, + "loss": 0.0367, + "step": 11239 + }, + { + "epoch": 3.41, + "learning_rate": 6.984578999470699e-05, + "loss": 0.0158, + "step": 11240 + }, + { + "epoch": 3.41, + "learning_rate": 6.982135482356974e-05, + "loss": 0.0517, + "step": 11241 + }, + { + "epoch": 3.41, + "learning_rate": 6.9796922630834e-05, + "loss": 0.0437, + "step": 11242 + }, + { + "epoch": 3.41, + "learning_rate": 6.977249341740745e-05, + "loss": 0.053, + "step": 11243 + }, + { + "epoch": 3.41, + "learning_rate": 6.974806718419745e-05, + "loss": 0.0438, + "step": 11244 + }, + { + "epoch": 3.41, + "learning_rate": 6.972364393211151e-05, + "loss": 0.053, + "step": 11245 + }, + { + "epoch": 3.41, + "learning_rate": 6.96992236620567e-05, + "loss": 0.0366, + "step": 11246 + }, + { + "epoch": 3.41, + "learning_rate": 6.967480637494023e-05, + "loss": 0.0415, + "step": 11247 + }, + { + "epoch": 3.42, + "learning_rate": 6.965039207166924e-05, + "loss": 0.0265, + "step": 11248 + }, + { + "epoch": 3.42, + "learning_rate": 6.962598075315046e-05, + "loss": 0.0538, + "step": 11249 + }, + { + "epoch": 3.42, + "learning_rate": 6.960157242029095e-05, + "loss": 0.0813, + "step": 11250 + }, + { + "epoch": 3.42, + "learning_rate": 6.957716707399712e-05, + "loss": 0.062, + "step": 11251 + }, + { + "epoch": 3.42, + "learning_rate": 6.955276471517577e-05, + "loss": 0.0314, + "step": 11252 + }, + { + "epoch": 3.42, + "learning_rate": 6.952836534473323e-05, + "loss": 0.0123, + "step": 11253 + }, + { + "epoch": 3.42, + "learning_rate": 6.950396896357602e-05, + "loss": 0.0518, + "step": 11254 + }, + { + "epoch": 3.42, + "learning_rate": 6.947957557261027e-05, + "loss": 0.0398, + "step": 11255 + }, + { + "epoch": 3.42, + "learning_rate": 6.945518517274212e-05, + "loss": 0.0509, + "step": 11256 + }, + { + "epoch": 3.42, + "learning_rate": 6.943079776487772e-05, + "loss": 0.0433, + "step": 11257 + }, + { + "epoch": 3.42, + "learning_rate": 6.940641334992286e-05, + "loss": 0.085, + "step": 11258 + }, + { + "epoch": 3.42, + "learning_rate": 6.938203192878345e-05, + "loss": 0.0419, + "step": 11259 + }, + { + "epoch": 3.42, + "learning_rate": 6.935765350236513e-05, + "loss": 0.092, + "step": 11260 + }, + { + "epoch": 3.42, + "learning_rate": 6.933327807157352e-05, + "loss": 0.0274, + "step": 11261 + }, + { + "epoch": 3.42, + "learning_rate": 6.930890563731401e-05, + "loss": 0.0381, + "step": 11262 + }, + { + "epoch": 3.42, + "learning_rate": 6.92845362004921e-05, + "loss": 0.0319, + "step": 11263 + }, + { + "epoch": 3.42, + "learning_rate": 6.92601697620129e-05, + "loss": 0.0378, + "step": 11264 + }, + { + "epoch": 3.42, + "learning_rate": 6.923580632278169e-05, + "loss": 0.0648, + "step": 11265 + }, + { + "epoch": 3.42, + "learning_rate": 6.921144588370345e-05, + "loss": 0.0564, + "step": 11266 + }, + { + "epoch": 3.42, + "learning_rate": 6.918708844568302e-05, + "loss": 0.025, + "step": 11267 + }, + { + "epoch": 3.42, + "learning_rate": 6.916273400962531e-05, + "loss": 0.0302, + "step": 11268 + }, + { + "epoch": 3.42, + "learning_rate": 6.913838257643494e-05, + "loss": 0.0218, + "step": 11269 + }, + { + "epoch": 3.42, + "learning_rate": 6.911403414701658e-05, + "loss": 0.0518, + "step": 11270 + }, + { + "epoch": 3.42, + "learning_rate": 6.908968872227466e-05, + "loss": 0.0664, + "step": 11271 + }, + { + "epoch": 3.42, + "learning_rate": 6.906534630311348e-05, + "loss": 0.0392, + "step": 11272 + }, + { + "epoch": 3.42, + "learning_rate": 6.904100689043742e-05, + "loss": 0.0327, + "step": 11273 + }, + { + "epoch": 3.42, + "learning_rate": 6.901667048515046e-05, + "loss": 0.048, + "step": 11274 + }, + { + "epoch": 3.42, + "learning_rate": 6.899233708815685e-05, + "loss": 0.039, + "step": 11275 + }, + { + "epoch": 3.42, + "learning_rate": 6.896800670036023e-05, + "loss": 0.0365, + "step": 11276 + }, + { + "epoch": 3.42, + "learning_rate": 6.894367932266459e-05, + "loss": 0.0292, + "step": 11277 + }, + { + "epoch": 3.42, + "learning_rate": 6.891935495597352e-05, + "loss": 0.064, + "step": 11278 + }, + { + "epoch": 3.42, + "learning_rate": 6.88950336011907e-05, + "loss": 0.036, + "step": 11279 + }, + { + "epoch": 3.42, + "learning_rate": 6.887071525921954e-05, + "loss": 0.0411, + "step": 11280 + }, + { + "epoch": 3.43, + "learning_rate": 6.884639993096336e-05, + "loss": 0.0321, + "step": 11281 + }, + { + "epoch": 3.43, + "learning_rate": 6.882208761732548e-05, + "loss": 0.0483, + "step": 11282 + }, + { + "epoch": 3.43, + "learning_rate": 6.879777831920894e-05, + "loss": 0.0457, + "step": 11283 + }, + { + "epoch": 3.43, + "learning_rate": 6.877347203751688e-05, + "loss": 0.0496, + "step": 11284 + }, + { + "epoch": 3.43, + "learning_rate": 6.874916877315211e-05, + "loss": 0.029, + "step": 11285 + }, + { + "epoch": 3.43, + "learning_rate": 6.872486852701748e-05, + "loss": 0.0675, + "step": 11286 + }, + { + "epoch": 3.43, + "learning_rate": 6.870057130001559e-05, + "loss": 0.0459, + "step": 11287 + }, + { + "epoch": 3.43, + "learning_rate": 6.867627709304906e-05, + "loss": 0.0186, + "step": 11288 + }, + { + "epoch": 3.43, + "learning_rate": 6.86519859070204e-05, + "loss": 0.052, + "step": 11289 + }, + { + "epoch": 3.43, + "learning_rate": 6.862769774283194e-05, + "loss": 0.0465, + "step": 11290 + }, + { + "epoch": 3.43, + "learning_rate": 6.860341260138586e-05, + "loss": 0.072, + "step": 11291 + }, + { + "epoch": 3.43, + "learning_rate": 6.857913048358426e-05, + "loss": 0.0151, + "step": 11292 + }, + { + "epoch": 3.43, + "learning_rate": 6.855485139032927e-05, + "loss": 0.0314, + "step": 11293 + }, + { + "epoch": 3.43, + "learning_rate": 6.853057532252265e-05, + "loss": 0.029, + "step": 11294 + }, + { + "epoch": 3.43, + "learning_rate": 6.850630228106629e-05, + "loss": 0.0385, + "step": 11295 + }, + { + "epoch": 3.43, + "learning_rate": 6.848203226686184e-05, + "loss": 0.049, + "step": 11296 + }, + { + "epoch": 3.43, + "learning_rate": 6.845776528081078e-05, + "loss": 0.0487, + "step": 11297 + }, + { + "epoch": 3.43, + "learning_rate": 6.843350132381468e-05, + "loss": 0.0366, + "step": 11298 + }, + { + "epoch": 3.43, + "learning_rate": 6.840924039677475e-05, + "loss": 0.0236, + "step": 11299 + }, + { + "epoch": 3.43, + "learning_rate": 6.838498250059235e-05, + "loss": 0.0481, + "step": 11300 + }, + { + "epoch": 3.43, + "learning_rate": 6.83607276361685e-05, + "loss": 0.0251, + "step": 11301 + }, + { + "epoch": 3.43, + "learning_rate": 6.833647580440421e-05, + "loss": 0.0233, + "step": 11302 + }, + { + "epoch": 3.43, + "learning_rate": 6.831222700620032e-05, + "loss": 0.0343, + "step": 11303 + }, + { + "epoch": 3.43, + "learning_rate": 6.82879812424577e-05, + "loss": 0.0313, + "step": 11304 + }, + { + "epoch": 3.43, + "learning_rate": 6.82637385140769e-05, + "loss": 0.0144, + "step": 11305 + }, + { + "epoch": 3.43, + "learning_rate": 6.823949882195859e-05, + "loss": 0.0143, + "step": 11306 + }, + { + "epoch": 3.43, + "learning_rate": 6.821526216700314e-05, + "loss": 0.0531, + "step": 11307 + }, + { + "epoch": 3.43, + "learning_rate": 6.819102855011079e-05, + "loss": 0.051, + "step": 11308 + }, + { + "epoch": 3.43, + "learning_rate": 6.81667979721819e-05, + "loss": 0.0092, + "step": 11309 + }, + { + "epoch": 3.43, + "learning_rate": 6.814257043411645e-05, + "loss": 0.0847, + "step": 11310 + }, + { + "epoch": 3.43, + "learning_rate": 6.81183459368145e-05, + "loss": 0.0526, + "step": 11311 + }, + { + "epoch": 3.43, + "learning_rate": 6.809412448117588e-05, + "loss": 0.067, + "step": 11312 + }, + { + "epoch": 3.43, + "learning_rate": 6.80699060681003e-05, + "loss": 0.0328, + "step": 11313 + }, + { + "epoch": 3.44, + "learning_rate": 6.804569069848752e-05, + "loss": 0.0323, + "step": 11314 + }, + { + "epoch": 3.44, + "learning_rate": 6.802147837323693e-05, + "loss": 0.0257, + "step": 11315 + }, + { + "epoch": 3.44, + "learning_rate": 6.799726909324812e-05, + "loss": 0.0477, + "step": 11316 + }, + { + "epoch": 3.44, + "learning_rate": 6.797306285942019e-05, + "loss": 0.045, + "step": 11317 + }, + { + "epoch": 3.44, + "learning_rate": 6.79488596726525e-05, + "loss": 0.0392, + "step": 11318 + }, + { + "epoch": 3.44, + "learning_rate": 6.792465953384398e-05, + "loss": 0.0446, + "step": 11319 + }, + { + "epoch": 3.44, + "learning_rate": 6.790046244389375e-05, + "loss": 0.03, + "step": 11320 + }, + { + "epoch": 3.44, + "learning_rate": 6.787626840370059e-05, + "loss": 0.032, + "step": 11321 + }, + { + "epoch": 3.44, + "learning_rate": 6.785207741416317e-05, + "loss": 0.0775, + "step": 11322 + }, + { + "epoch": 3.44, + "learning_rate": 6.782788947618022e-05, + "loss": 0.0638, + "step": 11323 + }, + { + "epoch": 3.44, + "learning_rate": 6.780370459065016e-05, + "loss": 0.0588, + "step": 11324 + }, + { + "epoch": 3.44, + "learning_rate": 6.777952275847151e-05, + "loss": 0.0443, + "step": 11325 + }, + { + "epoch": 3.44, + "learning_rate": 6.775534398054246e-05, + "loss": 0.0366, + "step": 11326 + }, + { + "epoch": 3.44, + "learning_rate": 6.77311682577612e-05, + "loss": 0.0405, + "step": 11327 + }, + { + "epoch": 3.44, + "learning_rate": 6.770699559102574e-05, + "loss": 0.0727, + "step": 11328 + }, + { + "epoch": 3.44, + "learning_rate": 6.768282598123414e-05, + "loss": 0.0395, + "step": 11329 + }, + { + "epoch": 3.44, + "learning_rate": 6.765865942928408e-05, + "loss": 0.0559, + "step": 11330 + }, + { + "epoch": 3.44, + "learning_rate": 6.763449593607343e-05, + "loss": 0.0309, + "step": 11331 + }, + { + "epoch": 3.44, + "learning_rate": 6.761033550249972e-05, + "loss": 0.043, + "step": 11332 + }, + { + "epoch": 3.44, + "learning_rate": 6.758617812946039e-05, + "loss": 0.0267, + "step": 11333 + }, + { + "epoch": 3.44, + "learning_rate": 6.75620238178529e-05, + "loss": 0.0354, + "step": 11334 + }, + { + "epoch": 3.44, + "learning_rate": 6.753787256857444e-05, + "loss": 0.0445, + "step": 11335 + }, + { + "epoch": 3.44, + "learning_rate": 6.751372438252224e-05, + "loss": 0.0497, + "step": 11336 + }, + { + "epoch": 3.44, + "learning_rate": 6.748957926059329e-05, + "loss": 0.0312, + "step": 11337 + }, + { + "epoch": 3.44, + "learning_rate": 6.746543720368446e-05, + "loss": 0.051, + "step": 11338 + }, + { + "epoch": 3.44, + "learning_rate": 6.744129821269268e-05, + "loss": 0.0686, + "step": 11339 + }, + { + "epoch": 3.44, + "learning_rate": 6.741716228851449e-05, + "loss": 0.0304, + "step": 11340 + }, + { + "epoch": 3.44, + "learning_rate": 6.739302943204667e-05, + "loss": 0.0513, + "step": 11341 + }, + { + "epoch": 3.44, + "learning_rate": 6.736889964418545e-05, + "loss": 0.0258, + "step": 11342 + }, + { + "epoch": 3.44, + "learning_rate": 6.734477292582736e-05, + "loss": 0.0478, + "step": 11343 + }, + { + "epoch": 3.44, + "learning_rate": 6.732064927786852e-05, + "loss": 0.0478, + "step": 11344 + }, + { + "epoch": 3.44, + "learning_rate": 6.729652870120513e-05, + "loss": 0.0529, + "step": 11345 + }, + { + "epoch": 3.44, + "learning_rate": 6.727241119673319e-05, + "loss": 0.0622, + "step": 11346 + }, + { + "epoch": 3.45, + "learning_rate": 6.724829676534853e-05, + "loss": 0.0543, + "step": 11347 + }, + { + "epoch": 3.45, + "learning_rate": 6.722418540794702e-05, + "loss": 0.0405, + "step": 11348 + }, + { + "epoch": 3.45, + "learning_rate": 6.720007712542423e-05, + "loss": 0.0212, + "step": 11349 + }, + { + "epoch": 3.45, + "learning_rate": 6.717597191867584e-05, + "loss": 0.0475, + "step": 11350 + }, + { + "epoch": 3.45, + "learning_rate": 6.715186978859718e-05, + "loss": 0.0663, + "step": 11351 + }, + { + "epoch": 3.45, + "learning_rate": 6.712777073608358e-05, + "loss": 0.0265, + "step": 11352 + }, + { + "epoch": 3.45, + "learning_rate": 6.710367476203032e-05, + "loss": 0.0272, + "step": 11353 + }, + { + "epoch": 3.45, + "learning_rate": 6.707958186733239e-05, + "loss": 0.0449, + "step": 11354 + }, + { + "epoch": 3.45, + "learning_rate": 6.70554920528849e-05, + "loss": 0.0489, + "step": 11355 + }, + { + "epoch": 3.45, + "learning_rate": 6.703140531958259e-05, + "loss": 0.0468, + "step": 11356 + }, + { + "epoch": 3.45, + "learning_rate": 6.700732166832039e-05, + "loss": 0.0385, + "step": 11357 + }, + { + "epoch": 3.45, + "learning_rate": 6.698324109999269e-05, + "loss": 0.007, + "step": 11358 + }, + { + "epoch": 3.45, + "learning_rate": 6.69591636154942e-05, + "loss": 0.0265, + "step": 11359 + }, + { + "epoch": 3.45, + "learning_rate": 6.69350892157192e-05, + "loss": 0.0188, + "step": 11360 + }, + { + "epoch": 3.45, + "learning_rate": 6.69110179015621e-05, + "loss": 0.0773, + "step": 11361 + }, + { + "epoch": 3.45, + "learning_rate": 6.688694967391706e-05, + "loss": 0.0371, + "step": 11362 + }, + { + "epoch": 3.45, + "learning_rate": 6.686288453367802e-05, + "loss": 0.046, + "step": 11363 + }, + { + "epoch": 3.45, + "learning_rate": 6.68388224817391e-05, + "loss": 0.0583, + "step": 11364 + }, + { + "epoch": 3.45, + "learning_rate": 6.6814763518994e-05, + "loss": 0.0541, + "step": 11365 + }, + { + "epoch": 3.45, + "learning_rate": 6.679070764633655e-05, + "loss": 0.0645, + "step": 11366 + }, + { + "epoch": 3.45, + "learning_rate": 6.676665486466029e-05, + "loss": 0.0471, + "step": 11367 + }, + { + "epoch": 3.45, + "learning_rate": 6.674260517485874e-05, + "loss": 0.0356, + "step": 11368 + }, + { + "epoch": 3.45, + "learning_rate": 6.671855857782518e-05, + "loss": 0.0411, + "step": 11369 + }, + { + "epoch": 3.45, + "learning_rate": 6.669451507445303e-05, + "loss": 0.043, + "step": 11370 + }, + { + "epoch": 3.45, + "learning_rate": 6.66704746656353e-05, + "loss": 0.0498, + "step": 11371 + }, + { + "epoch": 3.45, + "learning_rate": 6.66464373522651e-05, + "loss": 0.0526, + "step": 11372 + }, + { + "epoch": 3.45, + "learning_rate": 6.662240313523534e-05, + "loss": 0.037, + "step": 11373 + }, + { + "epoch": 3.45, + "learning_rate": 6.659837201543875e-05, + "loss": 0.0376, + "step": 11374 + }, + { + "epoch": 3.45, + "learning_rate": 6.65743439937681e-05, + "loss": 0.0073, + "step": 11375 + }, + { + "epoch": 3.45, + "learning_rate": 6.655031907111589e-05, + "loss": 0.0343, + "step": 11376 + }, + { + "epoch": 3.45, + "learning_rate": 6.652629724837467e-05, + "loss": 0.0615, + "step": 11377 + }, + { + "epoch": 3.45, + "learning_rate": 6.65022785264367e-05, + "loss": 0.0574, + "step": 11378 + }, + { + "epoch": 3.45, + "learning_rate": 6.647826290619421e-05, + "loss": 0.0151, + "step": 11379 + }, + { + "epoch": 3.46, + "learning_rate": 6.645425038853935e-05, + "loss": 0.0228, + "step": 11380 + }, + { + "epoch": 3.46, + "learning_rate": 6.643024097436404e-05, + "loss": 0.0932, + "step": 11381 + }, + { + "epoch": 3.46, + "learning_rate": 6.640623466456033e-05, + "loss": 0.0496, + "step": 11382 + }, + { + "epoch": 3.46, + "learning_rate": 6.638223146001976e-05, + "loss": 0.0307, + "step": 11383 + }, + { + "epoch": 3.46, + "learning_rate": 6.635823136163413e-05, + "loss": 0.034, + "step": 11384 + }, + { + "epoch": 3.46, + "learning_rate": 6.633423437029488e-05, + "loss": 0.0373, + "step": 11385 + }, + { + "epoch": 3.46, + "learning_rate": 6.631024048689352e-05, + "loss": 0.0461, + "step": 11386 + }, + { + "epoch": 3.46, + "learning_rate": 6.628624971232132e-05, + "loss": 0.0239, + "step": 11387 + }, + { + "epoch": 3.46, + "learning_rate": 6.62622620474694e-05, + "loss": 0.07, + "step": 11388 + }, + { + "epoch": 3.46, + "learning_rate": 6.623827749322893e-05, + "loss": 0.0578, + "step": 11389 + }, + { + "epoch": 3.46, + "learning_rate": 6.621429605049076e-05, + "loss": 0.0622, + "step": 11390 + }, + { + "epoch": 3.46, + "learning_rate": 6.619031772014588e-05, + "loss": 0.0593, + "step": 11391 + }, + { + "epoch": 3.46, + "learning_rate": 6.61663425030849e-05, + "loss": 0.0123, + "step": 11392 + }, + { + "epoch": 3.46, + "learning_rate": 6.614237040019839e-05, + "loss": 0.0245, + "step": 11393 + }, + { + "epoch": 3.46, + "learning_rate": 6.6118401412377e-05, + "loss": 0.0576, + "step": 11394 + }, + { + "epoch": 3.46, + "learning_rate": 6.609443554051095e-05, + "loss": 0.0415, + "step": 11395 + }, + { + "epoch": 3.46, + "learning_rate": 6.60704727854906e-05, + "loss": 0.0398, + "step": 11396 + }, + { + "epoch": 3.46, + "learning_rate": 6.604651314820608e-05, + "loss": 0.0352, + "step": 11397 + }, + { + "epoch": 3.46, + "learning_rate": 6.60225566295474e-05, + "loss": 0.0673, + "step": 11398 + }, + { + "epoch": 3.46, + "learning_rate": 6.599860323040443e-05, + "loss": 0.0705, + "step": 11399 + }, + { + "epoch": 3.46, + "learning_rate": 6.597465295166709e-05, + "loss": 0.0432, + "step": 11400 + }, + { + "epoch": 3.46, + "learning_rate": 6.595070579422491e-05, + "loss": 0.0127, + "step": 11401 + }, + { + "epoch": 3.46, + "learning_rate": 6.592676175896761e-05, + "loss": 0.0575, + "step": 11402 + }, + { + "epoch": 3.46, + "learning_rate": 6.590282084678455e-05, + "loss": 0.064, + "step": 11403 + }, + { + "epoch": 3.46, + "learning_rate": 6.587888305856504e-05, + "loss": 0.0225, + "step": 11404 + }, + { + "epoch": 3.46, + "learning_rate": 6.585494839519839e-05, + "loss": 0.0677, + "step": 11405 + }, + { + "epoch": 3.46, + "learning_rate": 6.583101685757359e-05, + "loss": 0.0515, + "step": 11406 + }, + { + "epoch": 3.46, + "learning_rate": 6.580708844657983e-05, + "loss": 0.0565, + "step": 11407 + }, + { + "epoch": 3.46, + "learning_rate": 6.578316316310571e-05, + "loss": 0.0446, + "step": 11408 + }, + { + "epoch": 3.46, + "learning_rate": 6.575924100804016e-05, + "loss": 0.0141, + "step": 11409 + }, + { + "epoch": 3.46, + "learning_rate": 6.573532198227174e-05, + "loss": 0.0501, + "step": 11410 + }, + { + "epoch": 3.46, + "learning_rate": 6.571140608668905e-05, + "loss": 0.0472, + "step": 11411 + }, + { + "epoch": 3.47, + "learning_rate": 6.568749332218044e-05, + "loss": 0.0291, + "step": 11412 + }, + { + "epoch": 3.47, + "learning_rate": 6.566358368963417e-05, + "loss": 0.0627, + "step": 11413 + }, + { + "epoch": 3.47, + "learning_rate": 6.56396771899385e-05, + "loss": 0.0378, + "step": 11414 + }, + { + "epoch": 3.47, + "learning_rate": 6.561577382398139e-05, + "loss": 0.019, + "step": 11415 + }, + { + "epoch": 3.47, + "learning_rate": 6.559187359265087e-05, + "loss": 0.039, + "step": 11416 + }, + { + "epoch": 3.47, + "learning_rate": 6.556797649683472e-05, + "loss": 0.022, + "step": 11417 + }, + { + "epoch": 3.47, + "learning_rate": 6.554408253742059e-05, + "loss": 0.0644, + "step": 11418 + }, + { + "epoch": 3.47, + "learning_rate": 6.55201917152962e-05, + "loss": 0.0271, + "step": 11419 + }, + { + "epoch": 3.47, + "learning_rate": 6.54963040313489e-05, + "loss": 0.0145, + "step": 11420 + }, + { + "epoch": 3.47, + "learning_rate": 6.547241948646614e-05, + "loss": 0.0455, + "step": 11421 + }, + { + "epoch": 3.47, + "learning_rate": 6.544853808153506e-05, + "loss": 0.0356, + "step": 11422 + }, + { + "epoch": 3.47, + "learning_rate": 6.542465981744298e-05, + "loss": 0.0515, + "step": 11423 + }, + { + "epoch": 3.47, + "learning_rate": 6.540078469507665e-05, + "loss": 0.0236, + "step": 11424 + }, + { + "epoch": 3.47, + "learning_rate": 6.537691271532313e-05, + "loss": 0.0555, + "step": 11425 + }, + { + "epoch": 3.47, + "learning_rate": 6.53530438790691e-05, + "loss": 0.0422, + "step": 11426 + }, + { + "epoch": 3.47, + "learning_rate": 6.532917818720133e-05, + "loss": 0.0235, + "step": 11427 + }, + { + "epoch": 3.47, + "learning_rate": 6.530531564060628e-05, + "loss": 0.0318, + "step": 11428 + }, + { + "epoch": 3.47, + "learning_rate": 6.528145624017033e-05, + "loss": 0.0425, + "step": 11429 + }, + { + "epoch": 3.47, + "learning_rate": 6.525759998677992e-05, + "loss": 0.0115, + "step": 11430 + }, + { + "epoch": 3.47, + "learning_rate": 6.52337468813211e-05, + "loss": 0.0612, + "step": 11431 + }, + { + "epoch": 3.47, + "learning_rate": 6.520989692468005e-05, + "loss": 0.0269, + "step": 11432 + }, + { + "epoch": 3.47, + "learning_rate": 6.51860501177427e-05, + "loss": 0.0059, + "step": 11433 + }, + { + "epoch": 3.47, + "learning_rate": 6.51622064613949e-05, + "loss": 0.0348, + "step": 11434 + }, + { + "epoch": 3.47, + "learning_rate": 6.513836595652226e-05, + "loss": 0.0164, + "step": 11435 + }, + { + "epoch": 3.47, + "learning_rate": 6.511452860401046e-05, + "loss": 0.0331, + "step": 11436 + }, + { + "epoch": 3.47, + "learning_rate": 6.509069440474507e-05, + "loss": 0.0573, + "step": 11437 + }, + { + "epoch": 3.47, + "learning_rate": 6.506686335961139e-05, + "loss": 0.0464, + "step": 11438 + }, + { + "epoch": 3.47, + "learning_rate": 6.504303546949466e-05, + "loss": 0.0275, + "step": 11439 + }, + { + "epoch": 3.47, + "learning_rate": 6.501921073527997e-05, + "loss": 0.0116, + "step": 11440 + }, + { + "epoch": 3.47, + "learning_rate": 6.499538915785245e-05, + "loss": 0.0164, + "step": 11441 + }, + { + "epoch": 3.47, + "learning_rate": 6.49715707380969e-05, + "loss": 0.0914, + "step": 11442 + }, + { + "epoch": 3.47, + "learning_rate": 6.494775547689822e-05, + "loss": 0.0332, + "step": 11443 + }, + { + "epoch": 3.47, + "learning_rate": 6.4923943375141e-05, + "loss": 0.0862, + "step": 11444 + }, + { + "epoch": 3.48, + "learning_rate": 6.490013443370976e-05, + "loss": 0.0777, + "step": 11445 + }, + { + "epoch": 3.48, + "learning_rate": 6.4876328653489e-05, + "loss": 0.0502, + "step": 11446 + }, + { + "epoch": 3.48, + "learning_rate": 6.485252603536298e-05, + "loss": 0.038, + "step": 11447 + }, + { + "epoch": 3.48, + "learning_rate": 6.4828726580216e-05, + "loss": 0.0353, + "step": 11448 + }, + { + "epoch": 3.48, + "learning_rate": 6.4804930288932e-05, + "loss": 0.0381, + "step": 11449 + }, + { + "epoch": 3.48, + "learning_rate": 6.478113716239504e-05, + "loss": 0.0593, + "step": 11450 + }, + { + "epoch": 3.48, + "learning_rate": 6.475734720148886e-05, + "loss": 0.0261, + "step": 11451 + }, + { + "epoch": 3.48, + "learning_rate": 6.473356040709735e-05, + "loss": 0.0339, + "step": 11452 + }, + { + "epoch": 3.48, + "learning_rate": 6.470977678010399e-05, + "loss": 0.0519, + "step": 11453 + }, + { + "epoch": 3.48, + "learning_rate": 6.468599632139229e-05, + "loss": 0.0269, + "step": 11454 + }, + { + "epoch": 3.48, + "learning_rate": 6.466221903184569e-05, + "loss": 0.053, + "step": 11455 + }, + { + "epoch": 3.48, + "learning_rate": 6.463844491234733e-05, + "loss": 0.0599, + "step": 11456 + }, + { + "epoch": 3.48, + "learning_rate": 6.46146739637805e-05, + "loss": 0.0274, + "step": 11457 + }, + { + "epoch": 3.48, + "learning_rate": 6.459090618702813e-05, + "loss": 0.0447, + "step": 11458 + }, + { + "epoch": 3.48, + "learning_rate": 6.456714158297308e-05, + "loss": 0.0221, + "step": 11459 + }, + { + "epoch": 3.48, + "learning_rate": 6.454338015249825e-05, + "loss": 0.0366, + "step": 11460 + }, + { + "epoch": 3.48, + "learning_rate": 6.451962189648618e-05, + "loss": 0.0652, + "step": 11461 + }, + { + "epoch": 3.48, + "learning_rate": 6.449586681581956e-05, + "loss": 0.0387, + "step": 11462 + }, + { + "epoch": 3.48, + "learning_rate": 6.447211491138075e-05, + "loss": 0.0622, + "step": 11463 + }, + { + "epoch": 3.48, + "learning_rate": 6.444836618405204e-05, + "loss": 0.0335, + "step": 11464 + }, + { + "epoch": 3.48, + "learning_rate": 6.442462063471561e-05, + "loss": 0.0494, + "step": 11465 + }, + { + "epoch": 3.48, + "learning_rate": 6.440087826425363e-05, + "loss": 0.0574, + "step": 11466 + }, + { + "epoch": 3.48, + "learning_rate": 6.437713907354794e-05, + "loss": 0.0521, + "step": 11467 + }, + { + "epoch": 3.48, + "learning_rate": 6.435340306348051e-05, + "loss": 0.0404, + "step": 11468 + }, + { + "epoch": 3.48, + "learning_rate": 6.4329670234933e-05, + "loss": 0.0309, + "step": 11469 + }, + { + "epoch": 3.48, + "learning_rate": 6.430594058878697e-05, + "loss": 0.0382, + "step": 11470 + }, + { + "epoch": 3.48, + "learning_rate": 6.428221412592397e-05, + "loss": 0.047, + "step": 11471 + }, + { + "epoch": 3.48, + "learning_rate": 6.425849084722533e-05, + "loss": 0.0429, + "step": 11472 + }, + { + "epoch": 3.48, + "learning_rate": 6.423477075357242e-05, + "loss": 0.0513, + "step": 11473 + }, + { + "epoch": 3.48, + "learning_rate": 6.421105384584617e-05, + "loss": 0.0726, + "step": 11474 + }, + { + "epoch": 3.48, + "learning_rate": 6.418734012492773e-05, + "loss": 0.0355, + "step": 11475 + }, + { + "epoch": 3.48, + "learning_rate": 6.416362959169794e-05, + "loss": 0.0503, + "step": 11476 + }, + { + "epoch": 3.48, + "learning_rate": 6.413992224703757e-05, + "loss": 0.0377, + "step": 11477 + }, + { + "epoch": 3.49, + "learning_rate": 6.411621809182741e-05, + "loss": 0.0496, + "step": 11478 + }, + { + "epoch": 3.49, + "learning_rate": 6.409251712694781e-05, + "loss": 0.0604, + "step": 11479 + }, + { + "epoch": 3.49, + "learning_rate": 6.406881935327931e-05, + "loss": 0.0422, + "step": 11480 + }, + { + "epoch": 3.49, + "learning_rate": 6.404512477170214e-05, + "loss": 0.0439, + "step": 11481 + }, + { + "epoch": 3.49, + "learning_rate": 6.402143338309657e-05, + "loss": 0.0228, + "step": 11482 + }, + { + "epoch": 3.49, + "learning_rate": 6.399774518834257e-05, + "loss": 0.0466, + "step": 11483 + }, + { + "epoch": 3.49, + "learning_rate": 6.39740601883202e-05, + "loss": 0.0623, + "step": 11484 + }, + { + "epoch": 3.49, + "learning_rate": 6.39503783839092e-05, + "loss": 0.0704, + "step": 11485 + }, + { + "epoch": 3.49, + "learning_rate": 6.392669977598928e-05, + "loss": 0.0669, + "step": 11486 + }, + { + "epoch": 3.49, + "learning_rate": 6.390302436544009e-05, + "loss": 0.0368, + "step": 11487 + }, + { + "epoch": 3.49, + "learning_rate": 6.387935215314101e-05, + "loss": 0.0342, + "step": 11488 + }, + { + "epoch": 3.49, + "learning_rate": 6.385568313997158e-05, + "loss": 0.0529, + "step": 11489 + }, + { + "epoch": 3.49, + "learning_rate": 6.383201732681077e-05, + "loss": 0.0428, + "step": 11490 + }, + { + "epoch": 3.49, + "learning_rate": 6.38083547145379e-05, + "loss": 0.0706, + "step": 11491 + }, + { + "epoch": 3.49, + "learning_rate": 6.378469530403186e-05, + "loss": 0.0875, + "step": 11492 + }, + { + "epoch": 3.49, + "learning_rate": 6.376103909617159e-05, + "loss": 0.0013, + "step": 11493 + }, + { + "epoch": 3.49, + "learning_rate": 6.373738609183583e-05, + "loss": 0.0562, + "step": 11494 + }, + { + "epoch": 3.49, + "learning_rate": 6.371373629190317e-05, + "loss": 0.0159, + "step": 11495 + }, + { + "epoch": 3.49, + "learning_rate": 6.369008969725224e-05, + "loss": 0.0447, + "step": 11496 + }, + { + "epoch": 3.49, + "learning_rate": 6.36664463087613e-05, + "loss": 0.0212, + "step": 11497 + }, + { + "epoch": 3.49, + "learning_rate": 6.364280612730877e-05, + "loss": 0.0609, + "step": 11498 + }, + { + "epoch": 3.49, + "learning_rate": 6.361916915377275e-05, + "loss": 0.0349, + "step": 11499 + }, + { + "epoch": 3.49, + "learning_rate": 6.359553538903122e-05, + "loss": 0.0422, + "step": 11500 + }, + { + "epoch": 3.49, + "learning_rate": 6.357190483396225e-05, + "loss": 0.0681, + "step": 11501 + }, + { + "epoch": 3.49, + "learning_rate": 6.35482774894435e-05, + "loss": 0.0546, + "step": 11502 + }, + { + "epoch": 3.49, + "learning_rate": 6.352465335635277e-05, + "loss": 0.0345, + "step": 11503 + }, + { + "epoch": 3.49, + "learning_rate": 6.35010324355676e-05, + "loss": 0.0626, + "step": 11504 + }, + { + "epoch": 3.49, + "learning_rate": 6.347741472796542e-05, + "loss": 0.0464, + "step": 11505 + }, + { + "epoch": 3.49, + "learning_rate": 6.345380023442348e-05, + "loss": 0.0454, + "step": 11506 + }, + { + "epoch": 3.49, + "learning_rate": 6.343018895581913e-05, + "loss": 0.028, + "step": 11507 + }, + { + "epoch": 3.49, + "learning_rate": 6.340658089302934e-05, + "loss": 0.0601, + "step": 11508 + }, + { + "epoch": 3.49, + "learning_rate": 6.33829760469312e-05, + "loss": 0.0193, + "step": 11509 + }, + { + "epoch": 3.49, + "learning_rate": 6.33593744184015e-05, + "loss": 0.034, + "step": 11510 + }, + { + "epoch": 3.5, + "learning_rate": 6.33357760083169e-05, + "loss": 0.0474, + "step": 11511 + }, + { + "epoch": 3.5, + "learning_rate": 6.331218081755415e-05, + "loss": 0.0304, + "step": 11512 + }, + { + "epoch": 3.5, + "learning_rate": 6.328858884698961e-05, + "loss": 0.0247, + "step": 11513 + }, + { + "epoch": 3.5, + "learning_rate": 6.326500009749983e-05, + "loss": 0.0791, + "step": 11514 + }, + { + "epoch": 3.5, + "learning_rate": 6.324141456996083e-05, + "loss": 0.0326, + "step": 11515 + }, + { + "epoch": 3.5, + "learning_rate": 6.321783226524892e-05, + "loss": 0.0389, + "step": 11516 + }, + { + "epoch": 3.5, + "learning_rate": 6.319425318423999e-05, + "loss": 0.0666, + "step": 11517 + }, + { + "epoch": 3.5, + "learning_rate": 6.317067732781002e-05, + "loss": 0.0654, + "step": 11518 + }, + { + "epoch": 3.5, + "learning_rate": 6.314710469683487e-05, + "loss": 0.0333, + "step": 11519 + }, + { + "epoch": 3.5, + "learning_rate": 6.312353529218997e-05, + "loss": 0.0525, + "step": 11520 + }, + { + "epoch": 3.5, + "learning_rate": 6.309996911475101e-05, + "loss": 0.0847, + "step": 11521 + }, + { + "epoch": 3.5, + "learning_rate": 6.307640616539333e-05, + "loss": 0.0229, + "step": 11522 + }, + { + "epoch": 3.5, + "learning_rate": 6.305284644499231e-05, + "loss": 0.0591, + "step": 11523 + }, + { + "epoch": 3.5, + "learning_rate": 6.302928995442308e-05, + "loss": 0.0268, + "step": 11524 + }, + { + "epoch": 3.5, + "learning_rate": 6.300573669456062e-05, + "loss": 0.063, + "step": 11525 + }, + { + "epoch": 3.5, + "learning_rate": 6.298218666628e-05, + "loss": 0.0561, + "step": 11526 + }, + { + "epoch": 3.5, + "learning_rate": 6.29586398704559e-05, + "loss": 0.0581, + "step": 11527 + }, + { + "epoch": 3.5, + "learning_rate": 6.293509630796314e-05, + "loss": 0.0364, + "step": 11528 + }, + { + "epoch": 3.5, + "learning_rate": 6.291155597967625e-05, + "loss": 0.0702, + "step": 11529 + }, + { + "epoch": 3.5, + "learning_rate": 6.288801888646966e-05, + "loss": 0.0547, + "step": 11530 + }, + { + "epoch": 3.5, + "learning_rate": 6.286448502921766e-05, + "loss": 0.0636, + "step": 11531 + }, + { + "epoch": 3.5, + "learning_rate": 6.284095440879458e-05, + "loss": 0.0149, + "step": 11532 + }, + { + "epoch": 3.5, + "learning_rate": 6.281742702607439e-05, + "loss": 0.0265, + "step": 11533 + }, + { + "epoch": 3.5, + "learning_rate": 6.279390288193117e-05, + "loss": 0.0355, + "step": 11534 + }, + { + "epoch": 3.5, + "learning_rate": 6.277038197723875e-05, + "loss": 0.0557, + "step": 11535 + }, + { + "epoch": 3.5, + "learning_rate": 6.274686431287077e-05, + "loss": 0.0451, + "step": 11536 + }, + { + "epoch": 3.5, + "learning_rate": 6.272334988970098e-05, + "loss": 0.0338, + "step": 11537 + }, + { + "epoch": 3.5, + "learning_rate": 6.269983870860275e-05, + "loss": 0.0283, + "step": 11538 + }, + { + "epoch": 3.5, + "learning_rate": 6.267633077044961e-05, + "loss": 0.0249, + "step": 11539 + }, + { + "epoch": 3.5, + "learning_rate": 6.265282607611459e-05, + "loss": 0.0337, + "step": 11540 + }, + { + "epoch": 3.5, + "learning_rate": 6.262932462647094e-05, + "loss": 0.0443, + "step": 11541 + }, + { + "epoch": 3.5, + "learning_rate": 6.260582642239173e-05, + "loss": 0.0867, + "step": 11542 + }, + { + "epoch": 3.5, + "learning_rate": 6.258233146474972e-05, + "loss": 0.0443, + "step": 11543 + }, + { + "epoch": 3.51, + "learning_rate": 6.255883975441782e-05, + "loss": 0.0429, + "step": 11544 + }, + { + "epoch": 3.51, + "learning_rate": 6.253535129226859e-05, + "loss": 0.0546, + "step": 11545 + }, + { + "epoch": 3.51, + "learning_rate": 6.251186607917459e-05, + "loss": 0.0258, + "step": 11546 + }, + { + "epoch": 3.51, + "learning_rate": 6.248838411600813e-05, + "loss": 0.0283, + "step": 11547 + }, + { + "epoch": 3.51, + "learning_rate": 6.246490540364164e-05, + "loss": 0.0284, + "step": 11548 + }, + { + "epoch": 3.51, + "learning_rate": 6.244142994294716e-05, + "loss": 0.0402, + "step": 11549 + }, + { + "epoch": 3.51, + "learning_rate": 6.241795773479686e-05, + "loss": 0.0521, + "step": 11550 + }, + { + "epoch": 3.51, + "learning_rate": 6.239448878006258e-05, + "loss": 0.018, + "step": 11551 + }, + { + "epoch": 3.51, + "learning_rate": 6.237102307961612e-05, + "loss": 0.0489, + "step": 11552 + }, + { + "epoch": 3.51, + "learning_rate": 6.23475606343292e-05, + "loss": 0.0486, + "step": 11553 + }, + { + "epoch": 3.51, + "learning_rate": 6.232410144507332e-05, + "loss": 0.0617, + "step": 11554 + }, + { + "epoch": 3.51, + "learning_rate": 6.230064551272008e-05, + "loss": 0.0583, + "step": 11555 + }, + { + "epoch": 3.51, + "learning_rate": 6.227719283814057e-05, + "loss": 0.0302, + "step": 11556 + }, + { + "epoch": 3.51, + "learning_rate": 6.225374342220616e-05, + "loss": 0.0248, + "step": 11557 + }, + { + "epoch": 3.51, + "learning_rate": 6.223029726578781e-05, + "loss": 0.0496, + "step": 11558 + }, + { + "epoch": 3.51, + "learning_rate": 6.220685436975654e-05, + "loss": 0.0587, + "step": 11559 + }, + { + "epoch": 3.51, + "learning_rate": 6.218341473498328e-05, + "loss": 0.0512, + "step": 11560 + }, + { + "epoch": 3.51, + "learning_rate": 6.215997836233853e-05, + "loss": 0.0915, + "step": 11561 + }, + { + "epoch": 3.51, + "learning_rate": 6.213654525269304e-05, + "loss": 0.0534, + "step": 11562 + }, + { + "epoch": 3.51, + "learning_rate": 6.211311540691721e-05, + "loss": 0.0257, + "step": 11563 + }, + { + "epoch": 3.51, + "learning_rate": 6.208968882588146e-05, + "loss": 0.0724, + "step": 11564 + }, + { + "epoch": 3.51, + "learning_rate": 6.206626551045599e-05, + "loss": 0.0485, + "step": 11565 + }, + { + "epoch": 3.51, + "learning_rate": 6.20428454615108e-05, + "loss": 0.0533, + "step": 11566 + }, + { + "epoch": 3.51, + "learning_rate": 6.201942867991604e-05, + "loss": 0.0306, + "step": 11567 + }, + { + "epoch": 3.51, + "learning_rate": 6.199601516654145e-05, + "loss": 0.0317, + "step": 11568 + }, + { + "epoch": 3.51, + "learning_rate": 6.19726049222569e-05, + "loss": 0.0509, + "step": 11569 + }, + { + "epoch": 3.51, + "learning_rate": 6.194919794793192e-05, + "loss": 0.0312, + "step": 11570 + }, + { + "epoch": 3.51, + "learning_rate": 6.192579424443604e-05, + "loss": 0.0542, + "step": 11571 + }, + { + "epoch": 3.51, + "learning_rate": 6.190239381263856e-05, + "loss": 0.0306, + "step": 11572 + }, + { + "epoch": 3.51, + "learning_rate": 6.187899665340888e-05, + "loss": 0.0204, + "step": 11573 + }, + { + "epoch": 3.51, + "learning_rate": 6.1855602767616e-05, + "loss": 0.034, + "step": 11574 + }, + { + "epoch": 3.51, + "learning_rate": 6.183221215612904e-05, + "loss": 0.0188, + "step": 11575 + }, + { + "epoch": 3.51, + "learning_rate": 6.180882481981685e-05, + "loss": 0.0225, + "step": 11576 + }, + { + "epoch": 3.52, + "learning_rate": 6.178544075954814e-05, + "loss": 0.038, + "step": 11577 + }, + { + "epoch": 3.52, + "learning_rate": 6.176205997619169e-05, + "loss": 0.0521, + "step": 11578 + }, + { + "epoch": 3.52, + "learning_rate": 6.173868247061589e-05, + "loss": 0.052, + "step": 11579 + }, + { + "epoch": 3.52, + "learning_rate": 6.171530824368934e-05, + "loss": 0.0558, + "step": 11580 + }, + { + "epoch": 3.52, + "learning_rate": 6.169193729628006e-05, + "loss": 0.0332, + "step": 11581 + }, + { + "epoch": 3.52, + "learning_rate": 6.166856962925637e-05, + "loss": 0.0393, + "step": 11582 + }, + { + "epoch": 3.52, + "learning_rate": 6.164520524348632e-05, + "loss": 0.0538, + "step": 11583 + }, + { + "epoch": 3.52, + "learning_rate": 6.162184413983775e-05, + "loss": 0.042, + "step": 11584 + }, + { + "epoch": 3.52, + "learning_rate": 6.159848631917864e-05, + "loss": 0.0248, + "step": 11585 + }, + { + "epoch": 3.52, + "learning_rate": 6.157513178237639e-05, + "loss": 0.0461, + "step": 11586 + }, + { + "epoch": 3.52, + "learning_rate": 6.155178053029874e-05, + "loss": 0.0113, + "step": 11587 + }, + { + "epoch": 3.52, + "learning_rate": 6.152843256381304e-05, + "loss": 0.0301, + "step": 11588 + }, + { + "epoch": 3.52, + "learning_rate": 6.150508788378666e-05, + "loss": 0.0305, + "step": 11589 + }, + { + "epoch": 3.52, + "learning_rate": 6.148174649108679e-05, + "loss": 0.0519, + "step": 11590 + }, + { + "epoch": 3.52, + "learning_rate": 6.145840838658038e-05, + "loss": 0.046, + "step": 11591 + }, + { + "epoch": 3.52, + "learning_rate": 6.14350735711345e-05, + "loss": 0.0274, + "step": 11592 + }, + { + "epoch": 3.52, + "learning_rate": 6.141174204561588e-05, + "loss": 0.0779, + "step": 11593 + }, + { + "epoch": 3.52, + "learning_rate": 6.138841381089132e-05, + "loss": 0.0176, + "step": 11594 + }, + { + "epoch": 3.52, + "learning_rate": 6.136508886782733e-05, + "loss": 0.059, + "step": 11595 + }, + { + "epoch": 3.52, + "learning_rate": 6.134176721729037e-05, + "loss": 0.0239, + "step": 11596 + }, + { + "epoch": 3.52, + "learning_rate": 6.131844886014671e-05, + "loss": 0.0335, + "step": 11597 + }, + { + "epoch": 3.52, + "learning_rate": 6.129513379726267e-05, + "loss": 0.0216, + "step": 11598 + }, + { + "epoch": 3.52, + "learning_rate": 6.127182202950424e-05, + "loss": 0.0719, + "step": 11599 + }, + { + "epoch": 3.52, + "learning_rate": 6.124851355773748e-05, + "loss": 0.0556, + "step": 11600 + }, + { + "epoch": 3.52, + "learning_rate": 6.122520838282818e-05, + "loss": 0.0576, + "step": 11601 + }, + { + "epoch": 3.52, + "learning_rate": 6.120190650564201e-05, + "loss": 0.0588, + "step": 11602 + }, + { + "epoch": 3.52, + "learning_rate": 6.117860792704466e-05, + "loss": 0.0364, + "step": 11603 + }, + { + "epoch": 3.52, + "learning_rate": 6.115531264790151e-05, + "loss": 0.0323, + "step": 11604 + }, + { + "epoch": 3.52, + "learning_rate": 6.1132020669078e-05, + "loss": 0.0419, + "step": 11605 + }, + { + "epoch": 3.52, + "learning_rate": 6.110873199143934e-05, + "loss": 0.0301, + "step": 11606 + }, + { + "epoch": 3.52, + "learning_rate": 6.108544661585055e-05, + "loss": 0.0027, + "step": 11607 + }, + { + "epoch": 3.52, + "learning_rate": 6.106216454317672e-05, + "loss": 0.0567, + "step": 11608 + }, + { + "epoch": 3.52, + "learning_rate": 6.103888577428264e-05, + "loss": 0.0272, + "step": 11609 + }, + { + "epoch": 3.53, + "learning_rate": 6.101561031003311e-05, + "loss": 0.0725, + "step": 11610 + }, + { + "epoch": 3.53, + "learning_rate": 6.0992338151292735e-05, + "loss": 0.0411, + "step": 11611 + }, + { + "epoch": 3.53, + "learning_rate": 6.0969069298925964e-05, + "loss": 0.0112, + "step": 11612 + }, + { + "epoch": 3.53, + "learning_rate": 6.094580375379715e-05, + "loss": 0.0141, + "step": 11613 + }, + { + "epoch": 3.53, + "learning_rate": 6.092254151677062e-05, + "loss": 0.0453, + "step": 11614 + }, + { + "epoch": 3.53, + "learning_rate": 6.089928258871041e-05, + "loss": 0.0806, + "step": 11615 + }, + { + "epoch": 3.53, + "learning_rate": 6.087602697048061e-05, + "loss": 0.0245, + "step": 11616 + }, + { + "epoch": 3.53, + "learning_rate": 6.085277466294506e-05, + "loss": 0.0128, + "step": 11617 + }, + { + "epoch": 3.53, + "learning_rate": 6.082952566696746e-05, + "loss": 0.0751, + "step": 11618 + }, + { + "epoch": 3.53, + "learning_rate": 6.080627998341154e-05, + "loss": 0.0001, + "step": 11619 + }, + { + "epoch": 3.53, + "learning_rate": 6.07830376131407e-05, + "loss": 0.0215, + "step": 11620 + }, + { + "epoch": 3.53, + "learning_rate": 6.0759798557018504e-05, + "loss": 0.0462, + "step": 11621 + }, + { + "epoch": 3.53, + "learning_rate": 6.073656281590798e-05, + "loss": 0.0378, + "step": 11622 + }, + { + "epoch": 3.53, + "learning_rate": 6.0713330390672394e-05, + "loss": 0.0255, + "step": 11623 + }, + { + "epoch": 3.53, + "learning_rate": 6.06901012821748e-05, + "loss": 0.0316, + "step": 11624 + }, + { + "epoch": 3.53, + "learning_rate": 6.066687549127799e-05, + "loss": 0.0605, + "step": 11625 + }, + { + "epoch": 3.53, + "learning_rate": 6.064365301884489e-05, + "loss": 0.0134, + "step": 11626 + }, + { + "epoch": 3.53, + "learning_rate": 6.062043386573795e-05, + "loss": 0.0473, + "step": 11627 + }, + { + "epoch": 3.53, + "learning_rate": 6.059721803281984e-05, + "loss": 0.0265, + "step": 11628 + }, + { + "epoch": 3.53, + "learning_rate": 6.0574005520952856e-05, + "loss": 0.0169, + "step": 11629 + }, + { + "epoch": 3.53, + "learning_rate": 6.055079633099938e-05, + "loss": 0.0432, + "step": 11630 + }, + { + "epoch": 3.53, + "learning_rate": 6.0527590463821505e-05, + "loss": 0.0687, + "step": 11631 + }, + { + "epoch": 3.53, + "learning_rate": 6.050438792028122e-05, + "loss": 0.056, + "step": 11632 + }, + { + "epoch": 3.53, + "learning_rate": 6.048118870124053e-05, + "loss": 0.0803, + "step": 11633 + }, + { + "epoch": 3.53, + "learning_rate": 6.0457992807561125e-05, + "loss": 0.0213, + "step": 11634 + }, + { + "epoch": 3.53, + "learning_rate": 6.0434800240104756e-05, + "loss": 0.0503, + "step": 11635 + }, + { + "epoch": 3.53, + "learning_rate": 6.041161099973292e-05, + "loss": 0.0359, + "step": 11636 + }, + { + "epoch": 3.53, + "learning_rate": 6.0388425087306994e-05, + "loss": 0.0179, + "step": 11637 + }, + { + "epoch": 3.53, + "learning_rate": 6.0365242503688255e-05, + "loss": 0.0232, + "step": 11638 + }, + { + "epoch": 3.53, + "learning_rate": 6.034206324973795e-05, + "loss": 0.038, + "step": 11639 + }, + { + "epoch": 3.53, + "learning_rate": 6.031888732631704e-05, + "loss": 0.0495, + "step": 11640 + }, + { + "epoch": 3.53, + "learning_rate": 6.0295714734286524e-05, + "loss": 0.0344, + "step": 11641 + }, + { + "epoch": 3.53, + "learning_rate": 6.0272545474507145e-05, + "loss": 0.0344, + "step": 11642 + }, + { + "epoch": 3.54, + "learning_rate": 6.024937954783952e-05, + "loss": 0.0379, + "step": 11643 + }, + { + "epoch": 3.54, + "learning_rate": 6.0226216955144316e-05, + "loss": 0.0663, + "step": 11644 + }, + { + "epoch": 3.54, + "learning_rate": 6.0203057697281834e-05, + "loss": 0.037, + "step": 11645 + }, + { + "epoch": 3.54, + "learning_rate": 6.017990177511249e-05, + "loss": 0.0365, + "step": 11646 + }, + { + "epoch": 3.54, + "learning_rate": 6.0156749189496395e-05, + "loss": 0.032, + "step": 11647 + }, + { + "epoch": 3.54, + "learning_rate": 6.0133599941293566e-05, + "loss": 0.0277, + "step": 11648 + }, + { + "epoch": 3.54, + "learning_rate": 6.011045403136399e-05, + "loss": 0.0926, + "step": 11649 + }, + { + "epoch": 3.54, + "learning_rate": 6.008731146056742e-05, + "loss": 0.0411, + "step": 11650 + }, + { + "epoch": 3.54, + "learning_rate": 6.006417222976367e-05, + "loss": 0.0253, + "step": 11651 + }, + { + "epoch": 3.54, + "learning_rate": 6.004103633981206e-05, + "loss": 0.0346, + "step": 11652 + }, + { + "epoch": 3.54, + "learning_rate": 6.001790379157223e-05, + "loss": 0.0492, + "step": 11653 + }, + { + "epoch": 3.54, + "learning_rate": 5.999477458590333e-05, + "loss": 0.0109, + "step": 11654 + }, + { + "epoch": 3.54, + "learning_rate": 5.9971648723664663e-05, + "loss": 0.0749, + "step": 11655 + }, + { + "epoch": 3.54, + "learning_rate": 5.994852620571526e-05, + "loss": 0.0248, + "step": 11656 + }, + { + "epoch": 3.54, + "learning_rate": 5.9925407032913965e-05, + "loss": 0.0416, + "step": 11657 + }, + { + "epoch": 3.54, + "learning_rate": 5.990229120611972e-05, + "loss": 0.0473, + "step": 11658 + }, + { + "epoch": 3.54, + "learning_rate": 5.987917872619109e-05, + "loss": 0.0137, + "step": 11659 + }, + { + "epoch": 3.54, + "learning_rate": 5.9856069593986746e-05, + "loss": 0.0249, + "step": 11660 + }, + { + "epoch": 3.54, + "learning_rate": 5.983296381036507e-05, + "loss": 0.0578, + "step": 11661 + }, + { + "epoch": 3.54, + "learning_rate": 5.980986137618438e-05, + "loss": 0.0426, + "step": 11662 + }, + { + "epoch": 3.54, + "learning_rate": 5.978676229230281e-05, + "loss": 0.0376, + "step": 11663 + }, + { + "epoch": 3.54, + "learning_rate": 5.976366655957846e-05, + "loss": 0.0383, + "step": 11664 + }, + { + "epoch": 3.54, + "learning_rate": 5.974057417886934e-05, + "loss": 0.0127, + "step": 11665 + }, + { + "epoch": 3.54, + "learning_rate": 5.971748515103322e-05, + "loss": 0.0541, + "step": 11666 + }, + { + "epoch": 3.54, + "learning_rate": 5.969439947692776e-05, + "loss": 0.054, + "step": 11667 + }, + { + "epoch": 3.54, + "learning_rate": 5.9671317157410494e-05, + "loss": 0.0402, + "step": 11668 + }, + { + "epoch": 3.54, + "learning_rate": 5.9648238193338956e-05, + "loss": 0.0629, + "step": 11669 + }, + { + "epoch": 3.54, + "learning_rate": 5.9625162585570365e-05, + "loss": 0.0285, + "step": 11670 + }, + { + "epoch": 3.54, + "learning_rate": 5.9602090334962014e-05, + "loss": 0.0544, + "step": 11671 + }, + { + "epoch": 3.54, + "learning_rate": 5.9579021442370916e-05, + "loss": 0.0487, + "step": 11672 + }, + { + "epoch": 3.54, + "learning_rate": 5.9555955908653965e-05, + "loss": 0.0365, + "step": 11673 + }, + { + "epoch": 3.54, + "learning_rate": 5.9532893734668085e-05, + "loss": 0.0385, + "step": 11674 + }, + { + "epoch": 3.54, + "learning_rate": 5.950983492126985e-05, + "loss": 0.0486, + "step": 11675 + }, + { + "epoch": 3.55, + "learning_rate": 5.9486779469315935e-05, + "loss": 0.0023, + "step": 11676 + }, + { + "epoch": 3.55, + "learning_rate": 5.9463727379662744e-05, + "loss": 0.0473, + "step": 11677 + }, + { + "epoch": 3.55, + "learning_rate": 5.9440678653166575e-05, + "loss": 0.0287, + "step": 11678 + }, + { + "epoch": 3.55, + "learning_rate": 5.941763329068358e-05, + "loss": 0.0538, + "step": 11679 + }, + { + "epoch": 3.55, + "learning_rate": 5.939459129306995e-05, + "loss": 0.0516, + "step": 11680 + }, + { + "epoch": 3.55, + "learning_rate": 5.937155266118148e-05, + "loss": 0.0739, + "step": 11681 + }, + { + "epoch": 3.55, + "learning_rate": 5.934851739587412e-05, + "loss": 0.0405, + "step": 11682 + }, + { + "epoch": 3.55, + "learning_rate": 5.932548549800352e-05, + "loss": 0.0351, + "step": 11683 + }, + { + "epoch": 3.55, + "learning_rate": 5.930245696842517e-05, + "loss": 0.0468, + "step": 11684 + }, + { + "epoch": 3.55, + "learning_rate": 5.927943180799461e-05, + "loss": 0.0635, + "step": 11685 + }, + { + "epoch": 3.55, + "learning_rate": 5.92564100175671e-05, + "loss": 0.0375, + "step": 11686 + }, + { + "epoch": 3.55, + "learning_rate": 5.92333915979979e-05, + "loss": 0.0525, + "step": 11687 + }, + { + "epoch": 3.55, + "learning_rate": 5.9210376550142017e-05, + "loss": 0.0392, + "step": 11688 + }, + { + "epoch": 3.55, + "learning_rate": 5.9187364874854364e-05, + "loss": 0.0147, + "step": 11689 + }, + { + "epoch": 3.55, + "learning_rate": 5.916435657298985e-05, + "loss": 0.037, + "step": 11690 + }, + { + "epoch": 3.55, + "learning_rate": 5.9141351645403065e-05, + "loss": 0.0552, + "step": 11691 + }, + { + "epoch": 3.55, + "learning_rate": 5.911835009294873e-05, + "loss": 0.032, + "step": 11692 + }, + { + "epoch": 3.55, + "learning_rate": 5.9095351916481086e-05, + "loss": 0.0065, + "step": 11693 + }, + { + "epoch": 3.55, + "learning_rate": 5.907235711685459e-05, + "loss": 0.0407, + "step": 11694 + }, + { + "epoch": 3.55, + "learning_rate": 5.904936569492332e-05, + "loss": 0.026, + "step": 11695 + }, + { + "epoch": 3.55, + "learning_rate": 5.902637765154147e-05, + "loss": 0.0301, + "step": 11696 + }, + { + "epoch": 3.55, + "learning_rate": 5.9003392987562906e-05, + "loss": 0.0164, + "step": 11697 + }, + { + "epoch": 3.55, + "learning_rate": 5.898041170384138e-05, + "loss": 0.053, + "step": 11698 + }, + { + "epoch": 3.55, + "learning_rate": 5.8957433801230714e-05, + "loss": 0.0607, + "step": 11699 + }, + { + "epoch": 3.55, + "learning_rate": 5.893445928058436e-05, + "loss": 0.043, + "step": 11700 + }, + { + "epoch": 3.55, + "learning_rate": 5.8911488142755843e-05, + "loss": 0.0397, + "step": 11701 + }, + { + "epoch": 3.55, + "learning_rate": 5.888852038859842e-05, + "loss": 0.0308, + "step": 11702 + }, + { + "epoch": 3.55, + "learning_rate": 5.886555601896528e-05, + "loss": 0.0517, + "step": 11703 + }, + { + "epoch": 3.55, + "learning_rate": 5.884259503470943e-05, + "loss": 0.0424, + "step": 11704 + }, + { + "epoch": 3.55, + "learning_rate": 5.881963743668386e-05, + "loss": 0.038, + "step": 11705 + }, + { + "epoch": 3.55, + "learning_rate": 5.8796683225741435e-05, + "loss": 0.0176, + "step": 11706 + }, + { + "epoch": 3.55, + "learning_rate": 5.877373240273478e-05, + "loss": 0.0328, + "step": 11707 + }, + { + "epoch": 3.55, + "learning_rate": 5.875078496851644e-05, + "loss": 0.0385, + "step": 11708 + }, + { + "epoch": 3.56, + "learning_rate": 5.87278409239388e-05, + "loss": 0.0189, + "step": 11709 + }, + { + "epoch": 3.56, + "learning_rate": 5.870490026985426e-05, + "loss": 0.0506, + "step": 11710 + }, + { + "epoch": 3.56, + "learning_rate": 5.868196300711492e-05, + "loss": 0.025, + "step": 11711 + }, + { + "epoch": 3.56, + "learning_rate": 5.865902913657291e-05, + "loss": 0.0376, + "step": 11712 + }, + { + "epoch": 3.56, + "learning_rate": 5.863609865908011e-05, + "loss": 0.0385, + "step": 11713 + }, + { + "epoch": 3.56, + "learning_rate": 5.861317157548827e-05, + "loss": 0.0238, + "step": 11714 + }, + { + "epoch": 3.56, + "learning_rate": 5.859024788664916e-05, + "loss": 0.0195, + "step": 11715 + }, + { + "epoch": 3.56, + "learning_rate": 5.8567327593414245e-05, + "loss": 0.0392, + "step": 11716 + }, + { + "epoch": 3.56, + "learning_rate": 5.854441069663509e-05, + "loss": 0.0301, + "step": 11717 + }, + { + "epoch": 3.56, + "learning_rate": 5.852149719716278e-05, + "loss": 0.0285, + "step": 11718 + }, + { + "epoch": 3.56, + "learning_rate": 5.8498587095848636e-05, + "loss": 0.0451, + "step": 11719 + }, + { + "epoch": 3.56, + "learning_rate": 5.847568039354362e-05, + "loss": 0.0517, + "step": 11720 + }, + { + "epoch": 3.56, + "learning_rate": 5.845277709109872e-05, + "loss": 0.04, + "step": 11721 + }, + { + "epoch": 3.56, + "learning_rate": 5.842987718936471e-05, + "loss": 0.0316, + "step": 11722 + }, + { + "epoch": 3.56, + "learning_rate": 5.840698068919217e-05, + "loss": 0.0346, + "step": 11723 + }, + { + "epoch": 3.56, + "learning_rate": 5.838408759143174e-05, + "loss": 0.0375, + "step": 11724 + }, + { + "epoch": 3.56, + "learning_rate": 5.836119789693376e-05, + "loss": 0.0352, + "step": 11725 + }, + { + "epoch": 3.56, + "learning_rate": 5.83383116065486e-05, + "loss": 0.0531, + "step": 11726 + }, + { + "epoch": 3.56, + "learning_rate": 5.831542872112637e-05, + "loss": 0.0233, + "step": 11727 + }, + { + "epoch": 3.56, + "learning_rate": 5.829254924151705e-05, + "loss": 0.0376, + "step": 11728 + }, + { + "epoch": 3.56, + "learning_rate": 5.826967316857065e-05, + "loss": 0.0629, + "step": 11729 + }, + { + "epoch": 3.56, + "learning_rate": 5.8246800503136825e-05, + "loss": 0.0174, + "step": 11730 + }, + { + "epoch": 3.56, + "learning_rate": 5.8223931246065377e-05, + "loss": 0.0581, + "step": 11731 + }, + { + "epoch": 3.56, + "learning_rate": 5.820106539820567e-05, + "loss": 0.0612, + "step": 11732 + }, + { + "epoch": 3.56, + "learning_rate": 5.8178202960407315e-05, + "loss": 0.0346, + "step": 11733 + }, + { + "epoch": 3.56, + "learning_rate": 5.8155343933519334e-05, + "loss": 0.0639, + "step": 11734 + }, + { + "epoch": 3.56, + "learning_rate": 5.813248831839106e-05, + "loss": 0.049, + "step": 11735 + }, + { + "epoch": 3.56, + "learning_rate": 5.810963611587138e-05, + "loss": 0.0127, + "step": 11736 + }, + { + "epoch": 3.56, + "learning_rate": 5.80867873268093e-05, + "loss": 0.0052, + "step": 11737 + }, + { + "epoch": 3.56, + "learning_rate": 5.806394195205356e-05, + "loss": 0.0454, + "step": 11738 + }, + { + "epoch": 3.56, + "learning_rate": 5.804109999245271e-05, + "loss": 0.0312, + "step": 11739 + }, + { + "epoch": 3.56, + "learning_rate": 5.801826144885537e-05, + "loss": 0.0407, + "step": 11740 + }, + { + "epoch": 3.56, + "learning_rate": 5.799542632210985e-05, + "loss": 0.0557, + "step": 11741 + }, + { + "epoch": 3.57, + "learning_rate": 5.797259461306449e-05, + "loss": 0.0314, + "step": 11742 + }, + { + "epoch": 3.57, + "learning_rate": 5.794976632256735e-05, + "loss": 0.0803, + "step": 11743 + }, + { + "epoch": 3.57, + "learning_rate": 5.792694145146648e-05, + "loss": 0.043, + "step": 11744 + }, + { + "epoch": 3.57, + "learning_rate": 5.790412000060967e-05, + "loss": 0.0578, + "step": 11745 + }, + { + "epoch": 3.57, + "learning_rate": 5.7881301970844736e-05, + "loss": 0.0298, + "step": 11746 + }, + { + "epoch": 3.57, + "learning_rate": 5.785848736301936e-05, + "loss": 0.0409, + "step": 11747 + }, + { + "epoch": 3.57, + "learning_rate": 5.783567617798096e-05, + "loss": 0.0369, + "step": 11748 + }, + { + "epoch": 3.57, + "learning_rate": 5.781286841657693e-05, + "loss": 0.0359, + "step": 11749 + }, + { + "epoch": 3.57, + "learning_rate": 5.779006407965444e-05, + "loss": 0.0158, + "step": 11750 + }, + { + "epoch": 3.57, + "learning_rate": 5.776726316806073e-05, + "loss": 0.0041, + "step": 11751 + }, + { + "epoch": 3.57, + "learning_rate": 5.774446568264267e-05, + "loss": 0.0469, + "step": 11752 + }, + { + "epoch": 3.57, + "learning_rate": 5.772167162424722e-05, + "loss": 0.0476, + "step": 11753 + }, + { + "epoch": 3.57, + "learning_rate": 5.7698880993721066e-05, + "loss": 0.0726, + "step": 11754 + }, + { + "epoch": 3.57, + "learning_rate": 5.767609379191075e-05, + "loss": 0.0344, + "step": 11755 + }, + { + "epoch": 3.57, + "learning_rate": 5.765331001966289e-05, + "loss": 0.0111, + "step": 11756 + }, + { + "epoch": 3.57, + "learning_rate": 5.763052967782368e-05, + "loss": 0.0697, + "step": 11757 + }, + { + "epoch": 3.57, + "learning_rate": 5.760775276723953e-05, + "loss": 0.0462, + "step": 11758 + }, + { + "epoch": 3.57, + "learning_rate": 5.7584979288756325e-05, + "loss": 0.0586, + "step": 11759 + }, + { + "epoch": 3.57, + "learning_rate": 5.75622092432202e-05, + "loss": 0.0309, + "step": 11760 + }, + { + "epoch": 3.57, + "learning_rate": 5.753944263147687e-05, + "loss": 0.021, + "step": 11761 + }, + { + "epoch": 3.57, + "learning_rate": 5.751667945437216e-05, + "loss": 0.0334, + "step": 11762 + }, + { + "epoch": 3.57, + "learning_rate": 5.74939197127516e-05, + "loss": 0.0694, + "step": 11763 + }, + { + "epoch": 3.57, + "learning_rate": 5.7471163407460597e-05, + "loss": 0.0118, + "step": 11764 + }, + { + "epoch": 3.57, + "learning_rate": 5.744841053934459e-05, + "loss": 0.0239, + "step": 11765 + }, + { + "epoch": 3.57, + "learning_rate": 5.742566110924868e-05, + "loss": 0.0634, + "step": 11766 + }, + { + "epoch": 3.57, + "learning_rate": 5.740291511801803e-05, + "loss": 0.0346, + "step": 11767 + }, + { + "epoch": 3.57, + "learning_rate": 5.7380172566497546e-05, + "loss": 0.0548, + "step": 11768 + }, + { + "epoch": 3.57, + "learning_rate": 5.7357433455532006e-05, + "loss": 0.0922, + "step": 11769 + }, + { + "epoch": 3.57, + "learning_rate": 5.733469778596618e-05, + "loss": 0.0395, + "step": 11770 + }, + { + "epoch": 3.57, + "learning_rate": 5.731196555864454e-05, + "loss": 0.0852, + "step": 11771 + }, + { + "epoch": 3.57, + "learning_rate": 5.7289236774411634e-05, + "loss": 0.0354, + "step": 11772 + }, + { + "epoch": 3.57, + "learning_rate": 5.72665114341117e-05, + "loss": 0.0451, + "step": 11773 + }, + { + "epoch": 3.57, + "learning_rate": 5.724378953858893e-05, + "loss": 0.0524, + "step": 11774 + }, + { + "epoch": 3.58, + "learning_rate": 5.722107108868733e-05, + "loss": 0.031, + "step": 11775 + }, + { + "epoch": 3.58, + "learning_rate": 5.7198356085250916e-05, + "loss": 0.0498, + "step": 11776 + }, + { + "epoch": 3.58, + "learning_rate": 5.7175644529123385e-05, + "loss": 0.0362, + "step": 11777 + }, + { + "epoch": 3.58, + "learning_rate": 5.71529364211485e-05, + "loss": 0.0575, + "step": 11778 + }, + { + "epoch": 3.58, + "learning_rate": 5.7130231762169755e-05, + "loss": 0.0655, + "step": 11779 + }, + { + "epoch": 3.58, + "learning_rate": 5.7107530553030505e-05, + "loss": 0.0281, + "step": 11780 + }, + { + "epoch": 3.58, + "learning_rate": 5.708483279457415e-05, + "loss": 0.0395, + "step": 11781 + }, + { + "epoch": 3.58, + "learning_rate": 5.706213848764374e-05, + "loss": 0.0345, + "step": 11782 + }, + { + "epoch": 3.58, + "learning_rate": 5.703944763308244e-05, + "loss": 0.0386, + "step": 11783 + }, + { + "epoch": 3.58, + "learning_rate": 5.701676023173295e-05, + "loss": 0.0347, + "step": 11784 + }, + { + "epoch": 3.58, + "learning_rate": 5.699407628443821e-05, + "loss": 0.041, + "step": 11785 + }, + { + "epoch": 3.58, + "learning_rate": 5.697139579204073e-05, + "loss": 0.0786, + "step": 11786 + }, + { + "epoch": 3.58, + "learning_rate": 5.6948718755383096e-05, + "loss": 0.0287, + "step": 11787 + }, + { + "epoch": 3.58, + "learning_rate": 5.692604517530781e-05, + "loss": 0.0543, + "step": 11788 + }, + { + "epoch": 3.58, + "learning_rate": 5.6903375052656894e-05, + "loss": 0.0261, + "step": 11789 + }, + { + "epoch": 3.58, + "learning_rate": 5.6880708388272647e-05, + "loss": 0.0527, + "step": 11790 + }, + { + "epoch": 3.58, + "learning_rate": 5.685804518299696e-05, + "loss": 0.0634, + "step": 11791 + }, + { + "epoch": 3.58, + "learning_rate": 5.683538543767181e-05, + "loss": 0.05, + "step": 11792 + }, + { + "epoch": 3.58, + "learning_rate": 5.681272915313888e-05, + "loss": 0.0653, + "step": 11793 + }, + { + "epoch": 3.58, + "learning_rate": 5.6790076330239747e-05, + "loss": 0.0162, + "step": 11794 + }, + { + "epoch": 3.58, + "learning_rate": 5.676742696981599e-05, + "loss": 0.0506, + "step": 11795 + }, + { + "epoch": 3.58, + "learning_rate": 5.6744781072708875e-05, + "loss": 0.0558, + "step": 11796 + }, + { + "epoch": 3.58, + "learning_rate": 5.672213863975971e-05, + "loss": 0.0401, + "step": 11797 + }, + { + "epoch": 3.58, + "learning_rate": 5.669949967180954e-05, + "loss": 0.0069, + "step": 11798 + }, + { + "epoch": 3.58, + "learning_rate": 5.667686416969944e-05, + "loss": 0.0539, + "step": 11799 + }, + { + "epoch": 3.58, + "learning_rate": 5.665423213427005e-05, + "loss": 0.0501, + "step": 11800 + }, + { + "epoch": 3.58, + "learning_rate": 5.663160356636227e-05, + "loss": 0.0265, + "step": 11801 + }, + { + "epoch": 3.58, + "learning_rate": 5.660897846681656e-05, + "loss": 0.0472, + "step": 11802 + }, + { + "epoch": 3.58, + "learning_rate": 5.6586356836473486e-05, + "loss": 0.047, + "step": 11803 + }, + { + "epoch": 3.58, + "learning_rate": 5.656373867617332e-05, + "loss": 0.0629, + "step": 11804 + }, + { + "epoch": 3.58, + "learning_rate": 5.654112398675623e-05, + "loss": 0.0527, + "step": 11805 + }, + { + "epoch": 3.58, + "learning_rate": 5.651851276906235e-05, + "loss": 0.0497, + "step": 11806 + }, + { + "epoch": 3.58, + "learning_rate": 5.649590502393153e-05, + "loss": 0.0106, + "step": 11807 + }, + { + "epoch": 3.59, + "learning_rate": 5.647330075220371e-05, + "loss": 0.0702, + "step": 11808 + }, + { + "epoch": 3.59, + "learning_rate": 5.645069995471849e-05, + "loss": 0.0694, + "step": 11809 + }, + { + "epoch": 3.59, + "learning_rate": 5.642810263231539e-05, + "loss": 0.0301, + "step": 11810 + }, + { + "epoch": 3.59, + "learning_rate": 5.640550878583394e-05, + "loss": 0.0747, + "step": 11811 + }, + { + "epoch": 3.59, + "learning_rate": 5.6382918416113305e-05, + "loss": 0.0473, + "step": 11812 + }, + { + "epoch": 3.59, + "learning_rate": 5.636033152399279e-05, + "loss": 0.0272, + "step": 11813 + }, + { + "epoch": 3.59, + "learning_rate": 5.633774811031135e-05, + "loss": 0.0428, + "step": 11814 + }, + { + "epoch": 3.59, + "learning_rate": 5.631516817590792e-05, + "loss": 0.0559, + "step": 11815 + }, + { + "epoch": 3.59, + "learning_rate": 5.629259172162119e-05, + "loss": 0.0285, + "step": 11816 + }, + { + "epoch": 3.59, + "learning_rate": 5.6270018748289943e-05, + "loss": 0.0452, + "step": 11817 + }, + { + "epoch": 3.59, + "learning_rate": 5.624744925675259e-05, + "loss": 0.0435, + "step": 11818 + }, + { + "epoch": 3.59, + "learning_rate": 5.622488324784763e-05, + "loss": 0.0091, + "step": 11819 + }, + { + "epoch": 3.59, + "learning_rate": 5.6202320722413256e-05, + "loss": 0.0378, + "step": 11820 + }, + { + "epoch": 3.59, + "learning_rate": 5.617976168128755e-05, + "loss": 0.04, + "step": 11821 + }, + { + "epoch": 3.59, + "learning_rate": 5.615720612530864e-05, + "loss": 0.0162, + "step": 11822 + }, + { + "epoch": 3.59, + "learning_rate": 5.613465405531426e-05, + "loss": 0.023, + "step": 11823 + }, + { + "epoch": 3.59, + "learning_rate": 5.6112105472142346e-05, + "loss": 0.0256, + "step": 11824 + }, + { + "epoch": 3.59, + "learning_rate": 5.608956037663027e-05, + "loss": 0.0279, + "step": 11825 + }, + { + "epoch": 3.59, + "learning_rate": 5.606701876961571e-05, + "loss": 0.0338, + "step": 11826 + }, + { + "epoch": 3.59, + "learning_rate": 5.604448065193586e-05, + "loss": 0.0556, + "step": 11827 + }, + { + "epoch": 3.59, + "learning_rate": 5.6021946024428056e-05, + "loss": 0.0493, + "step": 11828 + }, + { + "epoch": 3.59, + "learning_rate": 5.5999414887929476e-05, + "loss": 0.0894, + "step": 11829 + }, + { + "epoch": 3.59, + "learning_rate": 5.597688724327686e-05, + "loss": 0.0295, + "step": 11830 + }, + { + "epoch": 3.59, + "learning_rate": 5.5954363091307224e-05, + "loss": 0.0108, + "step": 11831 + }, + { + "epoch": 3.59, + "learning_rate": 5.593184243285716e-05, + "loss": 0.0409, + "step": 11832 + }, + { + "epoch": 3.59, + "learning_rate": 5.5909325268763336e-05, + "loss": 0.0618, + "step": 11833 + }, + { + "epoch": 3.59, + "learning_rate": 5.588681159986217e-05, + "loss": 0.0386, + "step": 11834 + }, + { + "epoch": 3.59, + "learning_rate": 5.5864301426989914e-05, + "loss": 0.0173, + "step": 11835 + }, + { + "epoch": 3.59, + "learning_rate": 5.584179475098287e-05, + "loss": 0.0558, + "step": 11836 + }, + { + "epoch": 3.59, + "learning_rate": 5.5819291572676975e-05, + "loss": 0.0934, + "step": 11837 + }, + { + "epoch": 3.59, + "learning_rate": 5.579679189290827e-05, + "loss": 0.0286, + "step": 11838 + }, + { + "epoch": 3.59, + "learning_rate": 5.57742957125125e-05, + "loss": 0.063, + "step": 11839 + }, + { + "epoch": 3.59, + "learning_rate": 5.5751803032325324e-05, + "loss": 0.019, + "step": 11840 + }, + { + "epoch": 3.6, + "learning_rate": 5.572931385318224e-05, + "loss": 0.0576, + "step": 11841 + }, + { + "epoch": 3.6, + "learning_rate": 5.570682817591874e-05, + "loss": 0.0491, + "step": 11842 + }, + { + "epoch": 3.6, + "learning_rate": 5.568434600137001e-05, + "loss": 0.0452, + "step": 11843 + }, + { + "epoch": 3.6, + "learning_rate": 5.566186733037131e-05, + "loss": 0.0625, + "step": 11844 + }, + { + "epoch": 3.6, + "learning_rate": 5.563939216375759e-05, + "loss": 0.0379, + "step": 11845 + }, + { + "epoch": 3.6, + "learning_rate": 5.561692050236369e-05, + "loss": 0.0244, + "step": 11846 + }, + { + "epoch": 3.6, + "learning_rate": 5.5594452347024465e-05, + "loss": 0.0207, + "step": 11847 + }, + { + "epoch": 3.6, + "learning_rate": 5.557198769857444e-05, + "loss": 0.0375, + "step": 11848 + }, + { + "epoch": 3.6, + "learning_rate": 5.5549526557848274e-05, + "loss": 0.0221, + "step": 11849 + }, + { + "epoch": 3.6, + "learning_rate": 5.552706892568012e-05, + "loss": 0.0232, + "step": 11850 + }, + { + "epoch": 3.6, + "learning_rate": 5.550461480290431e-05, + "loss": 0.036, + "step": 11851 + }, + { + "epoch": 3.6, + "learning_rate": 5.5482164190355005e-05, + "loss": 0.0276, + "step": 11852 + }, + { + "epoch": 3.6, + "learning_rate": 5.545971708886608e-05, + "loss": 0.0423, + "step": 11853 + }, + { + "epoch": 3.6, + "learning_rate": 5.543727349927153e-05, + "loss": 0.0504, + "step": 11854 + }, + { + "epoch": 3.6, + "learning_rate": 5.541483342240487e-05, + "loss": 0.0321, + "step": 11855 + }, + { + "epoch": 3.6, + "learning_rate": 5.539239685909982e-05, + "loss": 0.0368, + "step": 11856 + }, + { + "epoch": 3.6, + "learning_rate": 5.536996381018976e-05, + "loss": 0.014, + "step": 11857 + }, + { + "epoch": 3.6, + "learning_rate": 5.5347534276508076e-05, + "loss": 0.0195, + "step": 11858 + }, + { + "epoch": 3.6, + "learning_rate": 5.532510825888789e-05, + "loss": 0.0444, + "step": 11859 + }, + { + "epoch": 3.6, + "learning_rate": 5.530268575816233e-05, + "loss": 0.0287, + "step": 11860 + }, + { + "epoch": 3.6, + "learning_rate": 5.528026677516433e-05, + "loss": 0.0443, + "step": 11861 + }, + { + "epoch": 3.6, + "learning_rate": 5.525785131072658e-05, + "loss": 0.0382, + "step": 11862 + }, + { + "epoch": 3.6, + "learning_rate": 5.523543936568188e-05, + "loss": 0.0521, + "step": 11863 + }, + { + "epoch": 3.6, + "learning_rate": 5.5213030940862666e-05, + "loss": 0.0511, + "step": 11864 + }, + { + "epoch": 3.6, + "learning_rate": 5.519062603710149e-05, + "loss": 0.0442, + "step": 11865 + }, + { + "epoch": 3.6, + "learning_rate": 5.516822465523044e-05, + "loss": 0.0626, + "step": 11866 + }, + { + "epoch": 3.6, + "learning_rate": 5.514582679608178e-05, + "loss": 0.0324, + "step": 11867 + }, + { + "epoch": 3.6, + "learning_rate": 5.512343246048746e-05, + "loss": 0.0488, + "step": 11868 + }, + { + "epoch": 3.6, + "learning_rate": 5.51010416492794e-05, + "loss": 0.037, + "step": 11869 + }, + { + "epoch": 3.6, + "learning_rate": 5.507865436328946e-05, + "loss": 0.0817, + "step": 11870 + }, + { + "epoch": 3.6, + "learning_rate": 5.5056270603349064e-05, + "loss": 0.0118, + "step": 11871 + }, + { + "epoch": 3.6, + "learning_rate": 5.503389037028983e-05, + "loss": 0.0286, + "step": 11872 + }, + { + "epoch": 3.6, + "learning_rate": 5.501151366494304e-05, + "loss": 0.0515, + "step": 11873 + }, + { + "epoch": 3.61, + "learning_rate": 5.498914048814e-05, + "loss": 0.0393, + "step": 11874 + }, + { + "epoch": 3.61, + "learning_rate": 5.4966770840711786e-05, + "loss": 0.0323, + "step": 11875 + }, + { + "epoch": 3.61, + "learning_rate": 5.49444047234893e-05, + "loss": 0.0558, + "step": 11876 + }, + { + "epoch": 3.61, + "learning_rate": 5.492204213730349e-05, + "loss": 0.0216, + "step": 11877 + }, + { + "epoch": 3.61, + "learning_rate": 5.489968308298493e-05, + "loss": 0.0416, + "step": 11878 + }, + { + "epoch": 3.61, + "learning_rate": 5.487732756136433e-05, + "loss": 0.0343, + "step": 11879 + }, + { + "epoch": 3.61, + "learning_rate": 5.4854975573272055e-05, + "loss": 0.0423, + "step": 11880 + }, + { + "epoch": 3.61, + "learning_rate": 5.483262711953845e-05, + "loss": 0.057, + "step": 11881 + }, + { + "epoch": 3.61, + "learning_rate": 5.48102822009936e-05, + "loss": 0.0336, + "step": 11882 + }, + { + "epoch": 3.61, + "learning_rate": 5.478794081846768e-05, + "loss": 0.0211, + "step": 11883 + }, + { + "epoch": 3.61, + "learning_rate": 5.476560297279051e-05, + "loss": 0.0298, + "step": 11884 + }, + { + "epoch": 3.61, + "learning_rate": 5.474326866479196e-05, + "loss": 0.0516, + "step": 11885 + }, + { + "epoch": 3.61, + "learning_rate": 5.4720937895301636e-05, + "loss": 0.0457, + "step": 11886 + }, + { + "epoch": 3.61, + "learning_rate": 5.469861066514902e-05, + "loss": 0.0147, + "step": 11887 + }, + { + "epoch": 3.61, + "learning_rate": 5.4676286975163606e-05, + "loss": 0.0303, + "step": 11888 + }, + { + "epoch": 3.61, + "learning_rate": 5.4653966826174526e-05, + "loss": 0.0386, + "step": 11889 + }, + { + "epoch": 3.61, + "learning_rate": 5.46316502190111e-05, + "loss": 0.0497, + "step": 11890 + }, + { + "epoch": 3.61, + "learning_rate": 5.460933715450209e-05, + "loss": 0.0528, + "step": 11891 + }, + { + "epoch": 3.61, + "learning_rate": 5.4587027633476485e-05, + "loss": 0.0675, + "step": 11892 + }, + { + "epoch": 3.61, + "learning_rate": 5.4564721656763056e-05, + "loss": 0.0457, + "step": 11893 + }, + { + "epoch": 3.61, + "learning_rate": 5.4542419225190296e-05, + "loss": 0.0456, + "step": 11894 + }, + { + "epoch": 3.61, + "learning_rate": 5.452012033958683e-05, + "loss": 0.036, + "step": 11895 + }, + { + "epoch": 3.61, + "learning_rate": 5.449782500078083e-05, + "loss": 0.0003, + "step": 11896 + }, + { + "epoch": 3.61, + "learning_rate": 5.4475533209600594e-05, + "loss": 0.0382, + "step": 11897 + }, + { + "epoch": 3.61, + "learning_rate": 5.445324496687413e-05, + "loss": 0.0397, + "step": 11898 + }, + { + "epoch": 3.61, + "learning_rate": 5.4430960273429484e-05, + "loss": 0.0562, + "step": 11899 + }, + { + "epoch": 3.61, + "learning_rate": 5.440867913009441e-05, + "loss": 0.0498, + "step": 11900 + }, + { + "epoch": 3.61, + "learning_rate": 5.4386401537696536e-05, + "loss": 0.0443, + "step": 11901 + }, + { + "epoch": 3.61, + "learning_rate": 5.436412749706352e-05, + "loss": 0.0704, + "step": 11902 + }, + { + "epoch": 3.61, + "learning_rate": 5.434185700902264e-05, + "loss": 0.0259, + "step": 11903 + }, + { + "epoch": 3.61, + "learning_rate": 5.431959007440134e-05, + "loss": 0.0425, + "step": 11904 + }, + { + "epoch": 3.61, + "learning_rate": 5.4297326694026675e-05, + "loss": 0.0479, + "step": 11905 + }, + { + "epoch": 3.61, + "learning_rate": 5.427506686872567e-05, + "loss": 0.0482, + "step": 11906 + }, + { + "epoch": 3.62, + "learning_rate": 5.425281059932517e-05, + "loss": 0.0676, + "step": 11907 + }, + { + "epoch": 3.62, + "learning_rate": 5.4230557886652044e-05, + "loss": 0.0302, + "step": 11908 + }, + { + "epoch": 3.62, + "learning_rate": 5.42083087315328e-05, + "loss": 0.0456, + "step": 11909 + }, + { + "epoch": 3.62, + "learning_rate": 5.4186063134794e-05, + "loss": 0.0315, + "step": 11910 + }, + { + "epoch": 3.62, + "learning_rate": 5.416382109726201e-05, + "loss": 0.0082, + "step": 11911 + }, + { + "epoch": 3.62, + "learning_rate": 5.4141582619762983e-05, + "loss": 0.0237, + "step": 11912 + }, + { + "epoch": 3.62, + "learning_rate": 5.41193477031231e-05, + "loss": 0.0426, + "step": 11913 + }, + { + "epoch": 3.62, + "learning_rate": 5.409711634816823e-05, + "loss": 0.0625, + "step": 11914 + }, + { + "epoch": 3.62, + "learning_rate": 5.407488855572431e-05, + "loss": 0.0316, + "step": 11915 + }, + { + "epoch": 3.62, + "learning_rate": 5.405266432661698e-05, + "loss": 0.0456, + "step": 11916 + }, + { + "epoch": 3.62, + "learning_rate": 5.4030443661671755e-05, + "loss": 0.0377, + "step": 11917 + }, + { + "epoch": 3.62, + "learning_rate": 5.400822656171417e-05, + "loss": 0.0565, + "step": 11918 + }, + { + "epoch": 3.62, + "learning_rate": 5.398601302756942e-05, + "loss": 0.0226, + "step": 11919 + }, + { + "epoch": 3.62, + "learning_rate": 5.396380306006278e-05, + "loss": 0.0488, + "step": 11920 + }, + { + "epoch": 3.62, + "learning_rate": 5.394159666001925e-05, + "loss": 0.0447, + "step": 11921 + }, + { + "epoch": 3.62, + "learning_rate": 5.39193938282637e-05, + "loss": 0.0243, + "step": 11922 + }, + { + "epoch": 3.62, + "learning_rate": 5.3897194565620846e-05, + "loss": 0.0458, + "step": 11923 + }, + { + "epoch": 3.62, + "learning_rate": 5.387499887291548e-05, + "loss": 0.019, + "step": 11924 + }, + { + "epoch": 3.62, + "learning_rate": 5.385280675097195e-05, + "loss": 0.0617, + "step": 11925 + }, + { + "epoch": 3.62, + "learning_rate": 5.383061820061476e-05, + "loss": 0.063, + "step": 11926 + }, + { + "epoch": 3.62, + "learning_rate": 5.380843322266809e-05, + "loss": 0.0409, + "step": 11927 + }, + { + "epoch": 3.62, + "learning_rate": 5.3786251817955976e-05, + "loss": 0.0706, + "step": 11928 + }, + { + "epoch": 3.62, + "learning_rate": 5.3764073987302534e-05, + "loss": 0.0432, + "step": 11929 + }, + { + "epoch": 3.62, + "learning_rate": 5.374189973153147e-05, + "loss": 0.0614, + "step": 11930 + }, + { + "epoch": 3.62, + "learning_rate": 5.3719729051466683e-05, + "loss": 0.0283, + "step": 11931 + }, + { + "epoch": 3.62, + "learning_rate": 5.36975619479315e-05, + "loss": 0.0495, + "step": 11932 + }, + { + "epoch": 3.62, + "learning_rate": 5.367539842174949e-05, + "loss": 0.0377, + "step": 11933 + }, + { + "epoch": 3.62, + "learning_rate": 5.3653238473744e-05, + "loss": 0.0913, + "step": 11934 + }, + { + "epoch": 3.62, + "learning_rate": 5.363108210473813e-05, + "loss": 0.0417, + "step": 11935 + }, + { + "epoch": 3.62, + "learning_rate": 5.360892931555505e-05, + "loss": 0.0174, + "step": 11936 + }, + { + "epoch": 3.62, + "learning_rate": 5.3586780107017495e-05, + "loss": 0.0532, + "step": 11937 + }, + { + "epoch": 3.62, + "learning_rate": 5.356463447994837e-05, + "loss": 0.0529, + "step": 11938 + }, + { + "epoch": 3.63, + "learning_rate": 5.354249243517023e-05, + "loss": 0.0428, + "step": 11939 + }, + { + "epoch": 3.63, + "learning_rate": 5.352035397350569e-05, + "loss": 0.0457, + "step": 11940 + }, + { + "epoch": 3.63, + "learning_rate": 5.349821909577707e-05, + "loss": 0.0272, + "step": 11941 + }, + { + "epoch": 3.63, + "learning_rate": 5.3476087802806564e-05, + "loss": 0.0591, + "step": 11942 + }, + { + "epoch": 3.63, + "learning_rate": 5.345396009541641e-05, + "loss": 0.0517, + "step": 11943 + }, + { + "epoch": 3.63, + "learning_rate": 5.343183597442846e-05, + "loss": 0.0704, + "step": 11944 + }, + { + "epoch": 3.63, + "learning_rate": 5.340971544066468e-05, + "loss": 0.039, + "step": 11945 + }, + { + "epoch": 3.63, + "learning_rate": 5.338759849494671e-05, + "loss": 0.0365, + "step": 11946 + }, + { + "epoch": 3.63, + "learning_rate": 5.336548513809616e-05, + "loss": 0.045, + "step": 11947 + }, + { + "epoch": 3.63, + "learning_rate": 5.334337537093439e-05, + "loss": 0.0385, + "step": 11948 + }, + { + "epoch": 3.63, + "learning_rate": 5.332126919428284e-05, + "loss": 0.0239, + "step": 11949 + }, + { + "epoch": 3.63, + "learning_rate": 5.3299166608962574e-05, + "loss": 0.0355, + "step": 11950 + }, + { + "epoch": 3.63, + "learning_rate": 5.327706761579475e-05, + "loss": 0.0454, + "step": 11951 + }, + { + "epoch": 3.63, + "learning_rate": 5.325497221560023e-05, + "loss": 0.0463, + "step": 11952 + }, + { + "epoch": 3.63, + "learning_rate": 5.3232880409199735e-05, + "loss": 0.0372, + "step": 11953 + }, + { + "epoch": 3.63, + "learning_rate": 5.321079219741401e-05, + "loss": 0.0475, + "step": 11954 + }, + { + "epoch": 3.63, + "learning_rate": 5.31887075810635e-05, + "loss": 0.0161, + "step": 11955 + }, + { + "epoch": 3.63, + "learning_rate": 5.3166626560968635e-05, + "loss": 0.0582, + "step": 11956 + }, + { + "epoch": 3.63, + "learning_rate": 5.314454913794963e-05, + "loss": 0.0424, + "step": 11957 + }, + { + "epoch": 3.63, + "learning_rate": 5.312247531282655e-05, + "loss": 0.0307, + "step": 11958 + }, + { + "epoch": 3.63, + "learning_rate": 5.310040508641947e-05, + "loss": 0.0548, + "step": 11959 + }, + { + "epoch": 3.63, + "learning_rate": 5.307833845954814e-05, + "loss": 0.0273, + "step": 11960 + }, + { + "epoch": 3.63, + "learning_rate": 5.305627543303242e-05, + "loss": 0.0075, + "step": 11961 + }, + { + "epoch": 3.63, + "learning_rate": 5.3034216007691674e-05, + "loss": 0.0758, + "step": 11962 + }, + { + "epoch": 3.63, + "learning_rate": 5.3012160184345504e-05, + "loss": 0.0431, + "step": 11963 + }, + { + "epoch": 3.63, + "learning_rate": 5.299010796381313e-05, + "loss": 0.0641, + "step": 11964 + }, + { + "epoch": 3.63, + "learning_rate": 5.2968059346913795e-05, + "loss": 0.0548, + "step": 11965 + }, + { + "epoch": 3.63, + "learning_rate": 5.294601433446653e-05, + "loss": 0.051, + "step": 11966 + }, + { + "epoch": 3.63, + "learning_rate": 5.2923972927290166e-05, + "loss": 0.0378, + "step": 11967 + }, + { + "epoch": 3.63, + "learning_rate": 5.290193512620359e-05, + "loss": 0.056, + "step": 11968 + }, + { + "epoch": 3.63, + "learning_rate": 5.2879900932025326e-05, + "loss": 0.0225, + "step": 11969 + }, + { + "epoch": 3.63, + "learning_rate": 5.285787034557399e-05, + "loss": 0.0398, + "step": 11970 + }, + { + "epoch": 3.63, + "learning_rate": 5.283584336766792e-05, + "loss": 0.0336, + "step": 11971 + }, + { + "epoch": 3.64, + "learning_rate": 5.281381999912533e-05, + "loss": 0.0535, + "step": 11972 + }, + { + "epoch": 3.64, + "learning_rate": 5.279180024076427e-05, + "loss": 0.0562, + "step": 11973 + }, + { + "epoch": 3.64, + "learning_rate": 5.276978409340278e-05, + "loss": 0.0521, + "step": 11974 + }, + { + "epoch": 3.64, + "learning_rate": 5.274777155785875e-05, + "loss": 0.0635, + "step": 11975 + }, + { + "epoch": 3.64, + "learning_rate": 5.2725762634949794e-05, + "loss": 0.0694, + "step": 11976 + }, + { + "epoch": 3.64, + "learning_rate": 5.270375732549351e-05, + "loss": 0.0596, + "step": 11977 + }, + { + "epoch": 3.64, + "learning_rate": 5.2681755630307273e-05, + "loss": 0.0206, + "step": 11978 + }, + { + "epoch": 3.64, + "learning_rate": 5.265975755020848e-05, + "loss": 0.0873, + "step": 11979 + }, + { + "epoch": 3.64, + "learning_rate": 5.26377630860142e-05, + "loss": 0.0192, + "step": 11980 + }, + { + "epoch": 3.64, + "learning_rate": 5.261577223854154e-05, + "loss": 0.0482, + "step": 11981 + }, + { + "epoch": 3.64, + "learning_rate": 5.259378500860739e-05, + "loss": 0.0051, + "step": 11982 + }, + { + "epoch": 3.64, + "learning_rate": 5.257180139702843e-05, + "loss": 0.0399, + "step": 11983 + }, + { + "epoch": 3.64, + "learning_rate": 5.2549821404621385e-05, + "loss": 0.0223, + "step": 11984 + }, + { + "epoch": 3.64, + "learning_rate": 5.2527845032202646e-05, + "loss": 0.0719, + "step": 11985 + }, + { + "epoch": 3.64, + "learning_rate": 5.25058722805887e-05, + "loss": 0.0264, + "step": 11986 + }, + { + "epoch": 3.64, + "learning_rate": 5.248390315059569e-05, + "loss": 0.0681, + "step": 11987 + }, + { + "epoch": 3.64, + "learning_rate": 5.246193764303971e-05, + "loss": 0.0425, + "step": 11988 + }, + { + "epoch": 3.64, + "learning_rate": 5.243997575873667e-05, + "loss": 0.0562, + "step": 11989 + }, + { + "epoch": 3.64, + "learning_rate": 5.24180174985025e-05, + "loss": 0.0694, + "step": 11990 + }, + { + "epoch": 3.64, + "learning_rate": 5.239606286315276e-05, + "loss": 0.0407, + "step": 11991 + }, + { + "epoch": 3.64, + "learning_rate": 5.237411185350311e-05, + "loss": 0.0439, + "step": 11992 + }, + { + "epoch": 3.64, + "learning_rate": 5.235216447036893e-05, + "loss": 0.0568, + "step": 11993 + }, + { + "epoch": 3.64, + "learning_rate": 5.2330220714565435e-05, + "loss": 0.0496, + "step": 11994 + }, + { + "epoch": 3.64, + "learning_rate": 5.2308280586907876e-05, + "loss": 0.0478, + "step": 11995 + }, + { + "epoch": 3.64, + "learning_rate": 5.228634408821118e-05, + "loss": 0.0446, + "step": 11996 + }, + { + "epoch": 3.64, + "learning_rate": 5.226441121929028e-05, + "loss": 0.0531, + "step": 11997 + }, + { + "epoch": 3.64, + "learning_rate": 5.224248198095991e-05, + "loss": 0.0776, + "step": 11998 + }, + { + "epoch": 3.64, + "learning_rate": 5.222055637403462e-05, + "loss": 0.0509, + "step": 11999 + }, + { + "epoch": 3.64, + "learning_rate": 5.2198634399328965e-05, + "loss": 0.0195, + "step": 12000 + }, + { + "epoch": 3.64, + "learning_rate": 5.21767160576572e-05, + "loss": 0.0451, + "step": 12001 + }, + { + "epoch": 3.64, + "learning_rate": 5.215480134983366e-05, + "loss": 0.0609, + "step": 12002 + }, + { + "epoch": 3.64, + "learning_rate": 5.213289027667223e-05, + "loss": 0.0356, + "step": 12003 + }, + { + "epoch": 3.64, + "learning_rate": 5.211098283898698e-05, + "loss": 0.0627, + "step": 12004 + }, + { + "epoch": 3.65, + "learning_rate": 5.2089079037591605e-05, + "loss": 0.0367, + "step": 12005 + }, + { + "epoch": 3.65, + "learning_rate": 5.2067178873299874e-05, + "loss": 0.0429, + "step": 12006 + }, + { + "epoch": 3.65, + "learning_rate": 5.2045282346925266e-05, + "loss": 0.0383, + "step": 12007 + }, + { + "epoch": 3.65, + "learning_rate": 5.202338945928112e-05, + "loss": 0.0262, + "step": 12008 + }, + { + "epoch": 3.65, + "learning_rate": 5.20015002111808e-05, + "loss": 0.0606, + "step": 12009 + }, + { + "epoch": 3.65, + "learning_rate": 5.19796146034373e-05, + "loss": 0.0134, + "step": 12010 + }, + { + "epoch": 3.65, + "learning_rate": 5.1957732636863724e-05, + "loss": 0.041, + "step": 12011 + }, + { + "epoch": 3.65, + "learning_rate": 5.19358543122729e-05, + "loss": 0.0376, + "step": 12012 + }, + { + "epoch": 3.65, + "learning_rate": 5.1913979630477496e-05, + "loss": 0.0417, + "step": 12013 + }, + { + "epoch": 3.65, + "learning_rate": 5.1892108592290064e-05, + "loss": 0.0094, + "step": 12014 + }, + { + "epoch": 3.65, + "learning_rate": 5.1870241198523146e-05, + "loss": 0.0347, + "step": 12015 + }, + { + "epoch": 3.65, + "learning_rate": 5.1848377449988964e-05, + "loss": 0.0531, + "step": 12016 + }, + { + "epoch": 3.65, + "learning_rate": 5.182651734749978e-05, + "loss": 0.009, + "step": 12017 + }, + { + "epoch": 3.65, + "learning_rate": 5.180466089186759e-05, + "loss": 0.0676, + "step": 12018 + }, + { + "epoch": 3.65, + "learning_rate": 5.178280808390424e-05, + "loss": 0.0521, + "step": 12019 + }, + { + "epoch": 3.65, + "learning_rate": 5.176095892442159e-05, + "loss": 0.0646, + "step": 12020 + }, + { + "epoch": 3.65, + "learning_rate": 5.173911341423119e-05, + "loss": 0.0413, + "step": 12021 + }, + { + "epoch": 3.65, + "learning_rate": 5.171727155414465e-05, + "loss": 0.0374, + "step": 12022 + }, + { + "epoch": 3.65, + "learning_rate": 5.169543334497323e-05, + "loss": 0.0329, + "step": 12023 + }, + { + "epoch": 3.65, + "learning_rate": 5.1673598787528145e-05, + "loss": 0.0411, + "step": 12024 + }, + { + "epoch": 3.65, + "learning_rate": 5.1651767882620585e-05, + "loss": 0.0161, + "step": 12025 + }, + { + "epoch": 3.65, + "learning_rate": 5.162994063106137e-05, + "loss": 0.0266, + "step": 12026 + }, + { + "epoch": 3.65, + "learning_rate": 5.160811703366151e-05, + "loss": 0.051, + "step": 12027 + }, + { + "epoch": 3.65, + "learning_rate": 5.158629709123147e-05, + "loss": 0.0271, + "step": 12028 + }, + { + "epoch": 3.65, + "learning_rate": 5.1564480804581935e-05, + "loss": 0.039, + "step": 12029 + }, + { + "epoch": 3.65, + "learning_rate": 5.1542668174523254e-05, + "loss": 0.0181, + "step": 12030 + }, + { + "epoch": 3.65, + "learning_rate": 5.152085920186576e-05, + "loss": 0.0338, + "step": 12031 + }, + { + "epoch": 3.65, + "learning_rate": 5.1499053887419554e-05, + "loss": 0.034, + "step": 12032 + }, + { + "epoch": 3.65, + "learning_rate": 5.147725223199461e-05, + "loss": 0.0555, + "step": 12033 + }, + { + "epoch": 3.65, + "learning_rate": 5.145545423640087e-05, + "loss": 0.0441, + "step": 12034 + }, + { + "epoch": 3.65, + "learning_rate": 5.1433659901447966e-05, + "loss": 0.0145, + "step": 12035 + }, + { + "epoch": 3.65, + "learning_rate": 5.141186922794561e-05, + "loss": 0.0243, + "step": 12036 + }, + { + "epoch": 3.65, + "learning_rate": 5.139008221670321e-05, + "loss": 0.0419, + "step": 12037 + }, + { + "epoch": 3.66, + "learning_rate": 5.136829886853003e-05, + "loss": 0.0275, + "step": 12038 + }, + { + "epoch": 3.66, + "learning_rate": 5.1346519184235364e-05, + "loss": 0.0108, + "step": 12039 + }, + { + "epoch": 3.66, + "learning_rate": 5.132474316462817e-05, + "loss": 0.028, + "step": 12040 + }, + { + "epoch": 3.66, + "learning_rate": 5.130297081051745e-05, + "loss": 0.0089, + "step": 12041 + }, + { + "epoch": 3.66, + "learning_rate": 5.128120212271193e-05, + "loss": 0.0578, + "step": 12042 + }, + { + "epoch": 3.66, + "learning_rate": 5.125943710202027e-05, + "loss": 0.0262, + "step": 12043 + }, + { + "epoch": 3.66, + "learning_rate": 5.12376757492509e-05, + "loss": 0.0531, + "step": 12044 + }, + { + "epoch": 3.66, + "learning_rate": 5.1215918065212336e-05, + "loss": 0.0476, + "step": 12045 + }, + { + "epoch": 3.66, + "learning_rate": 5.119416405071266e-05, + "loss": 0.0089, + "step": 12046 + }, + { + "epoch": 3.66, + "learning_rate": 5.1172413706560096e-05, + "loss": 0.0082, + "step": 12047 + }, + { + "epoch": 3.66, + "learning_rate": 5.115066703356256e-05, + "loss": 0.0278, + "step": 12048 + }, + { + "epoch": 3.66, + "learning_rate": 5.1128924032527824e-05, + "loss": 0.0389, + "step": 12049 + }, + { + "epoch": 3.66, + "learning_rate": 5.1107184704263674e-05, + "loss": 0.0231, + "step": 12050 + }, + { + "epoch": 3.66, + "learning_rate": 5.1085449049577555e-05, + "loss": 0.048, + "step": 12051 + }, + { + "epoch": 3.66, + "learning_rate": 5.106371706927699e-05, + "loss": 0.0549, + "step": 12052 + }, + { + "epoch": 3.66, + "learning_rate": 5.10419887641692e-05, + "loss": 0.0349, + "step": 12053 + }, + { + "epoch": 3.66, + "learning_rate": 5.1020264135061346e-05, + "loss": 0.0151, + "step": 12054 + }, + { + "epoch": 3.66, + "learning_rate": 5.099854318276038e-05, + "loss": 0.0429, + "step": 12055 + }, + { + "epoch": 3.66, + "learning_rate": 5.097682590807325e-05, + "loss": 0.0582, + "step": 12056 + }, + { + "epoch": 3.66, + "learning_rate": 5.095511231180662e-05, + "loss": 0.0578, + "step": 12057 + }, + { + "epoch": 3.66, + "learning_rate": 5.093340239476716e-05, + "loss": 0.0086, + "step": 12058 + }, + { + "epoch": 3.66, + "learning_rate": 5.0911696157761294e-05, + "loss": 0.0453, + "step": 12059 + }, + { + "epoch": 3.66, + "learning_rate": 5.088999360159528e-05, + "loss": 0.0689, + "step": 12060 + }, + { + "epoch": 3.66, + "learning_rate": 5.0868294727075436e-05, + "loss": 0.0312, + "step": 12061 + }, + { + "epoch": 3.66, + "learning_rate": 5.084659953500767e-05, + "loss": 0.0491, + "step": 12062 + }, + { + "epoch": 3.66, + "learning_rate": 5.0824908026198035e-05, + "loss": 0.037, + "step": 12063 + }, + { + "epoch": 3.66, + "learning_rate": 5.080322020145224e-05, + "loss": 0.0564, + "step": 12064 + }, + { + "epoch": 3.66, + "learning_rate": 5.078153606157585e-05, + "loss": 0.0001, + "step": 12065 + }, + { + "epoch": 3.66, + "learning_rate": 5.0759855607374514e-05, + "loss": 0.0261, + "step": 12066 + }, + { + "epoch": 3.66, + "learning_rate": 5.073817883965346e-05, + "loss": 0.0462, + "step": 12067 + }, + { + "epoch": 3.66, + "learning_rate": 5.071650575921808e-05, + "loss": 0.0429, + "step": 12068 + }, + { + "epoch": 3.66, + "learning_rate": 5.069483636687327e-05, + "loss": 0.0227, + "step": 12069 + }, + { + "epoch": 3.66, + "learning_rate": 5.067317066342412e-05, + "loss": 0.029, + "step": 12070 + }, + { + "epoch": 3.67, + "learning_rate": 5.0651508649675365e-05, + "loss": 0.0667, + "step": 12071 + }, + { + "epoch": 3.67, + "learning_rate": 5.062985032643178e-05, + "loss": 0.0437, + "step": 12072 + }, + { + "epoch": 3.67, + "learning_rate": 5.0608195694497845e-05, + "loss": 0.0361, + "step": 12073 + }, + { + "epoch": 3.67, + "learning_rate": 5.0586544754677924e-05, + "loss": 0.0398, + "step": 12074 + }, + { + "epoch": 3.67, + "learning_rate": 5.05648975077764e-05, + "loss": 0.025, + "step": 12075 + }, + { + "epoch": 3.67, + "learning_rate": 5.054325395459728e-05, + "loss": 0.0588, + "step": 12076 + }, + { + "epoch": 3.67, + "learning_rate": 5.052161409594468e-05, + "loss": 0.0284, + "step": 12077 + }, + { + "epoch": 3.67, + "learning_rate": 5.0499977932622386e-05, + "loss": 0.0061, + "step": 12078 + }, + { + "epoch": 3.67, + "learning_rate": 5.047834546543409e-05, + "loss": 0.0252, + "step": 12079 + }, + { + "epoch": 3.67, + "learning_rate": 5.045671669518347e-05, + "loss": 0.0209, + "step": 12080 + }, + { + "epoch": 3.67, + "learning_rate": 5.0435091622673865e-05, + "loss": 0.0249, + "step": 12081 + }, + { + "epoch": 3.67, + "learning_rate": 5.041347024870868e-05, + "loss": 0.0495, + "step": 12082 + }, + { + "epoch": 3.67, + "learning_rate": 5.0391852574091044e-05, + "loss": 0.0407, + "step": 12083 + }, + { + "epoch": 3.67, + "learning_rate": 5.0370238599623986e-05, + "loss": 0.0735, + "step": 12084 + }, + { + "epoch": 3.67, + "learning_rate": 5.034862832611034e-05, + "loss": 0.0264, + "step": 12085 + }, + { + "epoch": 3.67, + "learning_rate": 5.032702175435298e-05, + "loss": 0.034, + "step": 12086 + }, + { + "epoch": 3.67, + "learning_rate": 5.030541888515443e-05, + "loss": 0.0171, + "step": 12087 + }, + { + "epoch": 3.67, + "learning_rate": 5.028381971931728e-05, + "loss": 0.0571, + "step": 12088 + }, + { + "epoch": 3.67, + "learning_rate": 5.026222425764379e-05, + "loss": 0.0428, + "step": 12089 + }, + { + "epoch": 3.67, + "learning_rate": 5.024063250093615e-05, + "loss": 0.0685, + "step": 12090 + }, + { + "epoch": 3.67, + "learning_rate": 5.021904444999652e-05, + "loss": 0.0287, + "step": 12091 + }, + { + "epoch": 3.67, + "learning_rate": 5.019746010562672e-05, + "loss": 0.0556, + "step": 12092 + }, + { + "epoch": 3.67, + "learning_rate": 5.017587946862874e-05, + "loss": 0.06, + "step": 12093 + }, + { + "epoch": 3.67, + "learning_rate": 5.015430253980397e-05, + "loss": 0.0779, + "step": 12094 + }, + { + "epoch": 3.67, + "learning_rate": 5.013272931995415e-05, + "loss": 0.0197, + "step": 12095 + }, + { + "epoch": 3.67, + "learning_rate": 5.011115980988049e-05, + "loss": 0.0522, + "step": 12096 + }, + { + "epoch": 3.67, + "learning_rate": 5.00895940103844e-05, + "loss": 0.0299, + "step": 12097 + }, + { + "epoch": 3.67, + "learning_rate": 5.0068031922266895e-05, + "loss": 0.0542, + "step": 12098 + }, + { + "epoch": 3.67, + "learning_rate": 5.004647354632892e-05, + "loss": 0.0245, + "step": 12099 + }, + { + "epoch": 3.67, + "learning_rate": 5.002491888337137e-05, + "loss": 0.0468, + "step": 12100 + }, + { + "epoch": 3.67, + "learning_rate": 5.000336793419488e-05, + "loss": 0.0473, + "step": 12101 + }, + { + "epoch": 3.67, + "learning_rate": 4.998182069960007e-05, + "loss": 0.033, + "step": 12102 + }, + { + "epoch": 3.67, + "learning_rate": 4.9960277180387315e-05, + "loss": 0.0456, + "step": 12103 + }, + { + "epoch": 3.68, + "learning_rate": 4.993873737735687e-05, + "loss": 0.0556, + "step": 12104 + }, + { + "epoch": 3.68, + "learning_rate": 4.991720129130894e-05, + "loss": 0.0625, + "step": 12105 + }, + { + "epoch": 3.68, + "learning_rate": 4.989566892304344e-05, + "loss": 0.065, + "step": 12106 + }, + { + "epoch": 3.68, + "learning_rate": 4.9874140273360334e-05, + "loss": 0.0501, + "step": 12107 + }, + { + "epoch": 3.68, + "learning_rate": 4.985261534305925e-05, + "loss": 0.0525, + "step": 12108 + }, + { + "epoch": 3.68, + "learning_rate": 4.983109413293994e-05, + "loss": 0.0434, + "step": 12109 + }, + { + "epoch": 3.68, + "learning_rate": 4.980957664380164e-05, + "loss": 0.0252, + "step": 12110 + }, + { + "epoch": 3.68, + "learning_rate": 4.9788062876443796e-05, + "loss": 0.0557, + "step": 12111 + }, + { + "epoch": 3.68, + "learning_rate": 4.9766552831665514e-05, + "loss": 0.022, + "step": 12112 + }, + { + "epoch": 3.68, + "learning_rate": 4.97450465102659e-05, + "loss": 0.0248, + "step": 12113 + }, + { + "epoch": 3.68, + "learning_rate": 4.972354391304381e-05, + "loss": 0.0504, + "step": 12114 + }, + { + "epoch": 3.68, + "learning_rate": 4.9702045040797975e-05, + "loss": 0.0372, + "step": 12115 + }, + { + "epoch": 3.68, + "learning_rate": 4.968054989432707e-05, + "loss": 0.0463, + "step": 12116 + }, + { + "epoch": 3.68, + "learning_rate": 4.9659058474429515e-05, + "loss": 0.0378, + "step": 12117 + }, + { + "epoch": 3.68, + "learning_rate": 4.963757078190375e-05, + "loss": 0.0234, + "step": 12118 + }, + { + "epoch": 3.68, + "learning_rate": 4.96160868175479e-05, + "loss": 0.0389, + "step": 12119 + }, + { + "epoch": 3.68, + "learning_rate": 4.959460658216006e-05, + "loss": 0.015, + "step": 12120 + }, + { + "epoch": 3.68, + "learning_rate": 4.957313007653809e-05, + "loss": 0.0459, + "step": 12121 + }, + { + "epoch": 3.68, + "learning_rate": 4.9551657301479854e-05, + "loss": 0.0458, + "step": 12122 + }, + { + "epoch": 3.68, + "learning_rate": 4.9530188257783035e-05, + "loss": 0.0503, + "step": 12123 + }, + { + "epoch": 3.68, + "learning_rate": 4.950872294624509e-05, + "loss": 0.0204, + "step": 12124 + }, + { + "epoch": 3.68, + "learning_rate": 4.94872613676634e-05, + "loss": 0.0444, + "step": 12125 + }, + { + "epoch": 3.68, + "learning_rate": 4.9465803522835144e-05, + "loss": 0.0451, + "step": 12126 + }, + { + "epoch": 3.68, + "learning_rate": 4.9444349412557545e-05, + "loss": 0.0283, + "step": 12127 + }, + { + "epoch": 3.68, + "learning_rate": 4.942289903762742e-05, + "loss": 0.0216, + "step": 12128 + }, + { + "epoch": 3.68, + "learning_rate": 4.940145239884171e-05, + "loss": 0.0495, + "step": 12129 + }, + { + "epoch": 3.68, + "learning_rate": 4.938000949699704e-05, + "loss": 0.0137, + "step": 12130 + }, + { + "epoch": 3.68, + "learning_rate": 4.935857033288989e-05, + "loss": 0.052, + "step": 12131 + }, + { + "epoch": 3.68, + "learning_rate": 4.933713490731679e-05, + "loss": 0.0035, + "step": 12132 + }, + { + "epoch": 3.68, + "learning_rate": 4.931570322107388e-05, + "loss": 0.0761, + "step": 12133 + }, + { + "epoch": 3.68, + "learning_rate": 4.929427527495745e-05, + "loss": 0.0395, + "step": 12134 + }, + { + "epoch": 3.68, + "learning_rate": 4.927285106976326e-05, + "loss": 0.0491, + "step": 12135 + }, + { + "epoch": 3.68, + "learning_rate": 4.9251430606287335e-05, + "loss": 0.0928, + "step": 12136 + }, + { + "epoch": 3.69, + "learning_rate": 4.923001388532528e-05, + "loss": 0.0354, + "step": 12137 + }, + { + "epoch": 3.69, + "learning_rate": 4.920860090767273e-05, + "loss": 0.0213, + "step": 12138 + }, + { + "epoch": 3.69, + "learning_rate": 4.9187191674125094e-05, + "loss": 0.0747, + "step": 12139 + }, + { + "epoch": 3.69, + "learning_rate": 4.9165786185477614e-05, + "loss": 0.0433, + "step": 12140 + }, + { + "epoch": 3.69, + "learning_rate": 4.914438444252554e-05, + "loss": 0.0286, + "step": 12141 + }, + { + "epoch": 3.69, + "learning_rate": 4.9122986446063764e-05, + "loss": 0.0389, + "step": 12142 + }, + { + "epoch": 3.69, + "learning_rate": 4.910159219688727e-05, + "loss": 0.0183, + "step": 12143 + }, + { + "epoch": 3.69, + "learning_rate": 4.9080201695790757e-05, + "loss": 0.046, + "step": 12144 + }, + { + "epoch": 3.69, + "learning_rate": 4.9058814943568754e-05, + "loss": 0.0372, + "step": 12145 + }, + { + "epoch": 3.69, + "learning_rate": 4.903743194101581e-05, + "loss": 0.0364, + "step": 12146 + }, + { + "epoch": 3.69, + "learning_rate": 4.901605268892615e-05, + "loss": 0.024, + "step": 12147 + }, + { + "epoch": 3.69, + "learning_rate": 4.899467718809406e-05, + "loss": 0.0457, + "step": 12148 + }, + { + "epoch": 3.69, + "learning_rate": 4.897330543931349e-05, + "loss": 0.0095, + "step": 12149 + }, + { + "epoch": 3.69, + "learning_rate": 4.8951937443378396e-05, + "loss": 0.0777, + "step": 12150 + }, + { + "epoch": 3.69, + "learning_rate": 4.893057320108243e-05, + "loss": 0.0297, + "step": 12151 + }, + { + "epoch": 3.69, + "learning_rate": 4.8909212713219355e-05, + "loss": 0.0572, + "step": 12152 + }, + { + "epoch": 3.69, + "learning_rate": 4.888785598058252e-05, + "loss": 0.0351, + "step": 12153 + }, + { + "epoch": 3.69, + "learning_rate": 4.886650300396538e-05, + "loss": 0.045, + "step": 12154 + }, + { + "epoch": 3.69, + "learning_rate": 4.884515378416108e-05, + "loss": 0.0389, + "step": 12155 + }, + { + "epoch": 3.69, + "learning_rate": 4.882380832196265e-05, + "loss": 0.0665, + "step": 12156 + }, + { + "epoch": 3.69, + "learning_rate": 4.880246661816307e-05, + "loss": 0.0381, + "step": 12157 + }, + { + "epoch": 3.69, + "learning_rate": 4.878112867355505e-05, + "loss": 0.0521, + "step": 12158 + }, + { + "epoch": 3.69, + "learning_rate": 4.875979448893139e-05, + "loss": 0.0543, + "step": 12159 + }, + { + "epoch": 3.69, + "learning_rate": 4.873846406508437e-05, + "loss": 0.0254, + "step": 12160 + }, + { + "epoch": 3.69, + "learning_rate": 4.871713740280652e-05, + "loss": 0.0122, + "step": 12161 + }, + { + "epoch": 3.69, + "learning_rate": 4.869581450288996e-05, + "loss": 0.0383, + "step": 12162 + }, + { + "epoch": 3.69, + "learning_rate": 4.867449536612681e-05, + "loss": 0.0383, + "step": 12163 + }, + { + "epoch": 3.69, + "learning_rate": 4.8653179993309126e-05, + "loss": 0.0145, + "step": 12164 + }, + { + "epoch": 3.69, + "learning_rate": 4.86318683852285e-05, + "loss": 0.0129, + "step": 12165 + }, + { + "epoch": 3.69, + "learning_rate": 4.8610560542676766e-05, + "loss": 0.0581, + "step": 12166 + }, + { + "epoch": 3.69, + "learning_rate": 4.858925646644533e-05, + "loss": 0.0194, + "step": 12167 + }, + { + "epoch": 3.69, + "learning_rate": 4.856795615732569e-05, + "loss": 0.076, + "step": 12168 + }, + { + "epoch": 3.69, + "learning_rate": 4.854665961610902e-05, + "loss": 0.0414, + "step": 12169 + }, + { + "epoch": 3.7, + "learning_rate": 4.8525366843586385e-05, + "loss": 0.0118, + "step": 12170 + }, + { + "epoch": 3.7, + "learning_rate": 4.850407784054885e-05, + "loss": 0.048, + "step": 12171 + }, + { + "epoch": 3.7, + "learning_rate": 4.848279260778715e-05, + "loss": 0.041, + "step": 12172 + }, + { + "epoch": 3.7, + "learning_rate": 4.846151114609205e-05, + "loss": 0.0114, + "step": 12173 + }, + { + "epoch": 3.7, + "learning_rate": 4.8440233456254e-05, + "loss": 0.0265, + "step": 12174 + }, + { + "epoch": 3.7, + "learning_rate": 4.8418959539063565e-05, + "loss": 0.0489, + "step": 12175 + }, + { + "epoch": 3.7, + "learning_rate": 4.8397689395310794e-05, + "loss": 0.0243, + "step": 12176 + }, + { + "epoch": 3.7, + "learning_rate": 4.8376423025785956e-05, + "loss": 0.0437, + "step": 12177 + }, + { + "epoch": 3.7, + "learning_rate": 4.835516043127896e-05, + "loss": 0.0625, + "step": 12178 + }, + { + "epoch": 3.7, + "learning_rate": 4.833390161257972e-05, + "loss": 0.0075, + "step": 12179 + }, + { + "epoch": 3.7, + "learning_rate": 4.831264657047791e-05, + "loss": 0.0671, + "step": 12180 + }, + { + "epoch": 3.7, + "learning_rate": 4.8291395305763034e-05, + "loss": 0.043, + "step": 12181 + }, + { + "epoch": 3.7, + "learning_rate": 4.8270147819224604e-05, + "loss": 0.0638, + "step": 12182 + }, + { + "epoch": 3.7, + "learning_rate": 4.8248904111651827e-05, + "loss": 0.061, + "step": 12183 + }, + { + "epoch": 3.7, + "learning_rate": 4.8227664183833926e-05, + "loss": 0.0533, + "step": 12184 + }, + { + "epoch": 3.7, + "learning_rate": 4.820642803655985e-05, + "loss": 0.0539, + "step": 12185 + }, + { + "epoch": 3.7, + "learning_rate": 4.818519567061841e-05, + "loss": 0.0299, + "step": 12186 + }, + { + "epoch": 3.7, + "learning_rate": 4.816396708679845e-05, + "loss": 0.0835, + "step": 12187 + }, + { + "epoch": 3.7, + "learning_rate": 4.814274228588841e-05, + "loss": 0.0431, + "step": 12188 + }, + { + "epoch": 3.7, + "learning_rate": 4.812152126867685e-05, + "loss": 0.0555, + "step": 12189 + }, + { + "epoch": 3.7, + "learning_rate": 4.8100304035952044e-05, + "loss": 0.0852, + "step": 12190 + }, + { + "epoch": 3.7, + "learning_rate": 4.8079090588502105e-05, + "loss": 0.0635, + "step": 12191 + }, + { + "epoch": 3.7, + "learning_rate": 4.805788092711503e-05, + "loss": 0.0454, + "step": 12192 + }, + { + "epoch": 3.7, + "learning_rate": 4.8036675052578776e-05, + "loss": 0.0464, + "step": 12193 + }, + { + "epoch": 3.7, + "learning_rate": 4.801547296568099e-05, + "loss": 0.0345, + "step": 12194 + }, + { + "epoch": 3.7, + "learning_rate": 4.7994274667209375e-05, + "loss": 0.0444, + "step": 12195 + }, + { + "epoch": 3.7, + "learning_rate": 4.7973080157951324e-05, + "loss": 0.009, + "step": 12196 + }, + { + "epoch": 3.7, + "learning_rate": 4.79518894386941e-05, + "loss": 0.0499, + "step": 12197 + }, + { + "epoch": 3.7, + "learning_rate": 4.793070251022497e-05, + "loss": 0.0409, + "step": 12198 + }, + { + "epoch": 3.7, + "learning_rate": 4.79095193733309e-05, + "loss": 0.0812, + "step": 12199 + }, + { + "epoch": 3.7, + "learning_rate": 4.7888340028798886e-05, + "loss": 0.03, + "step": 12200 + }, + { + "epoch": 3.7, + "learning_rate": 4.78671644774155e-05, + "loss": 0.0508, + "step": 12201 + }, + { + "epoch": 3.7, + "learning_rate": 4.7845992719967516e-05, + "loss": 0.0161, + "step": 12202 + }, + { + "epoch": 3.71, + "learning_rate": 4.7824824757241284e-05, + "loss": 0.0443, + "step": 12203 + }, + { + "epoch": 3.71, + "learning_rate": 4.7803660590023176e-05, + "loss": 0.0351, + "step": 12204 + }, + { + "epoch": 3.71, + "learning_rate": 4.7782500219099504e-05, + "loss": 0.0236, + "step": 12205 + }, + { + "epoch": 3.71, + "learning_rate": 4.776134364525609e-05, + "loss": 0.032, + "step": 12206 + }, + { + "epoch": 3.71, + "learning_rate": 4.7740190869279e-05, + "loss": 0.0492, + "step": 12207 + }, + { + "epoch": 3.71, + "learning_rate": 4.7719041891953904e-05, + "loss": 0.0424, + "step": 12208 + }, + { + "epoch": 3.71, + "learning_rate": 4.76978967140665e-05, + "loss": 0.0324, + "step": 12209 + }, + { + "epoch": 3.71, + "learning_rate": 4.767675533640224e-05, + "loss": 0.0512, + "step": 12210 + }, + { + "epoch": 3.71, + "learning_rate": 4.76556177597464e-05, + "loss": 0.0193, + "step": 12211 + }, + { + "epoch": 3.71, + "learning_rate": 4.763448398488429e-05, + "loss": 0.0203, + "step": 12212 + }, + { + "epoch": 3.71, + "learning_rate": 4.7613354012600884e-05, + "loss": 0.0638, + "step": 12213 + }, + { + "epoch": 3.71, + "learning_rate": 4.759222784368117e-05, + "loss": 0.0181, + "step": 12214 + }, + { + "epoch": 3.71, + "learning_rate": 4.757110547890986e-05, + "loss": 0.0419, + "step": 12215 + }, + { + "epoch": 3.71, + "learning_rate": 4.754998691907165e-05, + "loss": 0.0527, + "step": 12216 + }, + { + "epoch": 3.71, + "learning_rate": 4.752887216495092e-05, + "loss": 0.0219, + "step": 12217 + }, + { + "epoch": 3.71, + "learning_rate": 4.750776121733216e-05, + "loss": 0.0393, + "step": 12218 + }, + { + "epoch": 3.71, + "learning_rate": 4.748665407699945e-05, + "loss": 0.0256, + "step": 12219 + }, + { + "epoch": 3.71, + "learning_rate": 4.746555074473698e-05, + "loss": 0.0242, + "step": 12220 + }, + { + "epoch": 3.71, + "learning_rate": 4.7444451221328614e-05, + "loss": 0.0534, + "step": 12221 + }, + { + "epoch": 3.71, + "learning_rate": 4.7423355507558094e-05, + "loss": 0.0368, + "step": 12222 + }, + { + "epoch": 3.71, + "learning_rate": 4.740226360420918e-05, + "loss": 0.0812, + "step": 12223 + }, + { + "epoch": 3.71, + "learning_rate": 4.738117551206524e-05, + "loss": 0.0648, + "step": 12224 + }, + { + "epoch": 3.71, + "learning_rate": 4.7360091231909804e-05, + "loss": 0.0568, + "step": 12225 + }, + { + "epoch": 3.71, + "learning_rate": 4.733901076452589e-05, + "loss": 0.0641, + "step": 12226 + }, + { + "epoch": 3.71, + "learning_rate": 4.7317934110696685e-05, + "loss": 0.0549, + "step": 12227 + }, + { + "epoch": 3.71, + "learning_rate": 4.729686127120516e-05, + "loss": 0.0473, + "step": 12228 + }, + { + "epoch": 3.71, + "learning_rate": 4.7275792246834024e-05, + "loss": 0.0321, + "step": 12229 + }, + { + "epoch": 3.71, + "learning_rate": 4.725472703836604e-05, + "loss": 0.0493, + "step": 12230 + }, + { + "epoch": 3.71, + "learning_rate": 4.723366564658358e-05, + "loss": 0.024, + "step": 12231 + }, + { + "epoch": 3.71, + "learning_rate": 4.7212608072269106e-05, + "loss": 0.0274, + "step": 12232 + }, + { + "epoch": 3.71, + "learning_rate": 4.719155431620479e-05, + "loss": 0.0421, + "step": 12233 + }, + { + "epoch": 3.71, + "learning_rate": 4.717050437917279e-05, + "loss": 0.0134, + "step": 12234 + }, + { + "epoch": 3.71, + "learning_rate": 4.714945826195496e-05, + "loss": 0.0659, + "step": 12235 + }, + { + "epoch": 3.72, + "learning_rate": 4.71284159653332e-05, + "loss": 0.0462, + "step": 12236 + }, + { + "epoch": 3.72, + "learning_rate": 4.710737749008912e-05, + "loss": 0.0401, + "step": 12237 + }, + { + "epoch": 3.72, + "learning_rate": 4.7086342837004184e-05, + "loss": 0.039, + "step": 12238 + }, + { + "epoch": 3.72, + "learning_rate": 4.706531200685986e-05, + "loss": 0.048, + "step": 12239 + }, + { + "epoch": 3.72, + "learning_rate": 4.7044285000437305e-05, + "loss": 0.0099, + "step": 12240 + }, + { + "epoch": 3.72, + "learning_rate": 4.702326181851773e-05, + "loss": 0.0604, + "step": 12241 + }, + { + "epoch": 3.72, + "learning_rate": 4.700224246188192e-05, + "loss": 0.0419, + "step": 12242 + }, + { + "epoch": 3.72, + "learning_rate": 4.69812269313108e-05, + "loss": 0.0374, + "step": 12243 + }, + { + "epoch": 3.72, + "learning_rate": 4.696021522758497e-05, + "loss": 0.0257, + "step": 12244 + }, + { + "epoch": 3.72, + "learning_rate": 4.6939207351484967e-05, + "loss": 0.0507, + "step": 12245 + }, + { + "epoch": 3.72, + "learning_rate": 4.691820330379128e-05, + "loss": 0.0362, + "step": 12246 + }, + { + "epoch": 3.72, + "learning_rate": 4.689720308528398e-05, + "loss": 0.0377, + "step": 12247 + }, + { + "epoch": 3.72, + "learning_rate": 4.6876206696743275e-05, + "loss": 0.029, + "step": 12248 + }, + { + "epoch": 3.72, + "learning_rate": 4.685521413894902e-05, + "loss": 0.0291, + "step": 12249 + }, + { + "epoch": 3.72, + "learning_rate": 4.683422541268116e-05, + "loss": 0.0508, + "step": 12250 + }, + { + "epoch": 3.72, + "learning_rate": 4.681324051871928e-05, + "loss": 0.0186, + "step": 12251 + }, + { + "epoch": 3.72, + "learning_rate": 4.679225945784288e-05, + "loss": 0.0398, + "step": 12252 + }, + { + "epoch": 3.72, + "learning_rate": 4.6771282230831435e-05, + "loss": 0.0415, + "step": 12253 + }, + { + "epoch": 3.72, + "learning_rate": 4.67503088384641e-05, + "loss": 0.0613, + "step": 12254 + }, + { + "epoch": 3.72, + "learning_rate": 4.672933928152005e-05, + "loss": 0.0558, + "step": 12255 + }, + { + "epoch": 3.72, + "learning_rate": 4.670837356077821e-05, + "loss": 0.0961, + "step": 12256 + }, + { + "epoch": 3.72, + "learning_rate": 4.668741167701741e-05, + "loss": 0.0408, + "step": 12257 + }, + { + "epoch": 3.72, + "learning_rate": 4.6666453631016224e-05, + "loss": 0.0734, + "step": 12258 + }, + { + "epoch": 3.72, + "learning_rate": 4.664549942355333e-05, + "loss": 0.0321, + "step": 12259 + }, + { + "epoch": 3.72, + "learning_rate": 4.662454905540701e-05, + "loss": 0.0315, + "step": 12260 + }, + { + "epoch": 3.72, + "learning_rate": 4.660360252735557e-05, + "loss": 0.0703, + "step": 12261 + }, + { + "epoch": 3.72, + "learning_rate": 4.658265984017711e-05, + "loss": 0.0306, + "step": 12262 + }, + { + "epoch": 3.72, + "learning_rate": 4.6561720994649506e-05, + "loss": 0.0258, + "step": 12263 + }, + { + "epoch": 3.72, + "learning_rate": 4.654078599155068e-05, + "loss": 0.0786, + "step": 12264 + }, + { + "epoch": 3.72, + "learning_rate": 4.6519854831658224e-05, + "loss": 0.0412, + "step": 12265 + }, + { + "epoch": 3.72, + "learning_rate": 4.6498927515749813e-05, + "loss": 0.0453, + "step": 12266 + }, + { + "epoch": 3.72, + "learning_rate": 4.6478004044602616e-05, + "loss": 0.0611, + "step": 12267 + }, + { + "epoch": 3.72, + "learning_rate": 4.6457084418994e-05, + "loss": 0.0207, + "step": 12268 + }, + { + "epoch": 3.73, + "learning_rate": 4.643616863970112e-05, + "loss": 0.0307, + "step": 12269 + }, + { + "epoch": 3.73, + "learning_rate": 4.641525670750082e-05, + "loss": 0.0246, + "step": 12270 + }, + { + "epoch": 3.73, + "learning_rate": 4.6394348623170076e-05, + "loss": 0.026, + "step": 12271 + }, + { + "epoch": 3.73, + "learning_rate": 4.6373444387485345e-05, + "loss": 0.0229, + "step": 12272 + }, + { + "epoch": 3.73, + "learning_rate": 4.635254400122333e-05, + "loss": 0.0329, + "step": 12273 + }, + { + "epoch": 3.73, + "learning_rate": 4.633164746516032e-05, + "loss": 0.0222, + "step": 12274 + }, + { + "epoch": 3.73, + "learning_rate": 4.631075478007265e-05, + "loss": 0.0526, + "step": 12275 + }, + { + "epoch": 3.73, + "learning_rate": 4.628986594673637e-05, + "loss": 0.0312, + "step": 12276 + }, + { + "epoch": 3.73, + "learning_rate": 4.626898096592739e-05, + "loss": 0.0166, + "step": 12277 + }, + { + "epoch": 3.73, + "learning_rate": 4.624809983842163e-05, + "loss": 0.0393, + "step": 12278 + }, + { + "epoch": 3.73, + "learning_rate": 4.622722256499466e-05, + "loss": 0.0281, + "step": 12279 + }, + { + "epoch": 3.73, + "learning_rate": 4.620634914642212e-05, + "loss": 0.0125, + "step": 12280 + }, + { + "epoch": 3.73, + "learning_rate": 4.618547958347932e-05, + "loss": 0.0396, + "step": 12281 + }, + { + "epoch": 3.73, + "learning_rate": 4.616461387694153e-05, + "loss": 0.0268, + "step": 12282 + }, + { + "epoch": 3.73, + "learning_rate": 4.61437520275838e-05, + "loss": 0.0345, + "step": 12283 + }, + { + "epoch": 3.73, + "learning_rate": 4.6122894036181164e-05, + "loss": 0.0318, + "step": 12284 + }, + { + "epoch": 3.73, + "learning_rate": 4.610203990350834e-05, + "loss": 0.0422, + "step": 12285 + }, + { + "epoch": 3.73, + "learning_rate": 4.608118963034012e-05, + "loss": 0.0684, + "step": 12286 + }, + { + "epoch": 3.73, + "learning_rate": 4.606034321745096e-05, + "loss": 0.0623, + "step": 12287 + }, + { + "epoch": 3.73, + "learning_rate": 4.60395006656152e-05, + "loss": 0.0479, + "step": 12288 + }, + { + "epoch": 3.73, + "learning_rate": 4.6018661975607175e-05, + "loss": 0.0355, + "step": 12289 + }, + { + "epoch": 3.73, + "learning_rate": 4.599782714820089e-05, + "loss": 0.0241, + "step": 12290 + }, + { + "epoch": 3.73, + "learning_rate": 4.597699618417039e-05, + "loss": 0.0755, + "step": 12291 + }, + { + "epoch": 3.73, + "learning_rate": 4.595616908428944e-05, + "loss": 0.054, + "step": 12292 + }, + { + "epoch": 3.73, + "learning_rate": 4.5935345849331656e-05, + "loss": 0.0361, + "step": 12293 + }, + { + "epoch": 3.73, + "learning_rate": 4.5914526480070634e-05, + "loss": 0.0279, + "step": 12294 + }, + { + "epoch": 3.73, + "learning_rate": 4.5893710977279704e-05, + "loss": 0.0307, + "step": 12295 + }, + { + "epoch": 3.73, + "learning_rate": 4.587289934173215e-05, + "loss": 0.0186, + "step": 12296 + }, + { + "epoch": 3.73, + "learning_rate": 4.585209157420104e-05, + "loss": 0.0286, + "step": 12297 + }, + { + "epoch": 3.73, + "learning_rate": 4.5831287675459336e-05, + "loss": 0.0852, + "step": 12298 + }, + { + "epoch": 3.73, + "learning_rate": 4.581048764627975e-05, + "loss": 0.0428, + "step": 12299 + }, + { + "epoch": 3.73, + "learning_rate": 4.578969148743507e-05, + "loss": 0.0749, + "step": 12300 + }, + { + "epoch": 3.73, + "learning_rate": 4.576889919969772e-05, + "loss": 0.0497, + "step": 12301 + }, + { + "epoch": 3.74, + "learning_rate": 4.574811078384014e-05, + "loss": 0.0459, + "step": 12302 + }, + { + "epoch": 3.74, + "learning_rate": 4.572732624063454e-05, + "loss": 0.0544, + "step": 12303 + }, + { + "epoch": 3.74, + "learning_rate": 4.5706545570852945e-05, + "loss": 0.0169, + "step": 12304 + }, + { + "epoch": 3.74, + "learning_rate": 4.568576877526739e-05, + "loss": 0.0513, + "step": 12305 + }, + { + "epoch": 3.74, + "learning_rate": 4.5664995854649586e-05, + "loss": 0.0309, + "step": 12306 + }, + { + "epoch": 3.74, + "learning_rate": 4.564422680977131e-05, + "loss": 0.0611, + "step": 12307 + }, + { + "epoch": 3.74, + "learning_rate": 4.562346164140389e-05, + "loss": 0.0066, + "step": 12308 + }, + { + "epoch": 3.74, + "learning_rate": 4.560270035031879e-05, + "loss": 0.0691, + "step": 12309 + }, + { + "epoch": 3.74, + "learning_rate": 4.558194293728728e-05, + "loss": 0.0238, + "step": 12310 + }, + { + "epoch": 3.74, + "learning_rate": 4.556118940308035e-05, + "loss": 0.0473, + "step": 12311 + }, + { + "epoch": 3.74, + "learning_rate": 4.554043974846905e-05, + "loss": 0.0512, + "step": 12312 + }, + { + "epoch": 3.74, + "learning_rate": 4.551969397422399e-05, + "loss": 0.0595, + "step": 12313 + }, + { + "epoch": 3.74, + "learning_rate": 4.549895208111596e-05, + "loss": 0.042, + "step": 12314 + }, + { + "epoch": 3.74, + "learning_rate": 4.547821406991538e-05, + "loss": 0.0169, + "step": 12315 + }, + { + "epoch": 3.74, + "learning_rate": 4.5457479941392674e-05, + "loss": 0.0551, + "step": 12316 + }, + { + "epoch": 3.74, + "learning_rate": 4.5436749696318036e-05, + "loss": 0.0331, + "step": 12317 + }, + { + "epoch": 3.74, + "learning_rate": 4.541602333546146e-05, + "loss": 0.0219, + "step": 12318 + }, + { + "epoch": 3.74, + "learning_rate": 4.5395300859592984e-05, + "loss": 0.0347, + "step": 12319 + }, + { + "epoch": 3.74, + "learning_rate": 4.537458226948227e-05, + "loss": 0.0647, + "step": 12320 + }, + { + "epoch": 3.74, + "learning_rate": 4.5353867565899056e-05, + "loss": 0.0414, + "step": 12321 + }, + { + "epoch": 3.74, + "learning_rate": 4.533315674961281e-05, + "loss": 0.0376, + "step": 12322 + }, + { + "epoch": 3.74, + "learning_rate": 4.531244982139283e-05, + "loss": 0.0264, + "step": 12323 + }, + { + "epoch": 3.74, + "learning_rate": 4.5291746782008326e-05, + "loss": 0.0373, + "step": 12324 + }, + { + "epoch": 3.74, + "learning_rate": 4.527104763222841e-05, + "loss": 0.0573, + "step": 12325 + }, + { + "epoch": 3.74, + "learning_rate": 4.5250352372821916e-05, + "loss": 0.079, + "step": 12326 + }, + { + "epoch": 3.74, + "learning_rate": 4.522966100455769e-05, + "loss": 0.0753, + "step": 12327 + }, + { + "epoch": 3.74, + "learning_rate": 4.520897352820433e-05, + "loss": 0.0544, + "step": 12328 + }, + { + "epoch": 3.74, + "learning_rate": 4.5188289944530253e-05, + "loss": 0.0328, + "step": 12329 + }, + { + "epoch": 3.74, + "learning_rate": 4.516761025430391e-05, + "loss": 0.058, + "step": 12330 + }, + { + "epoch": 3.74, + "learning_rate": 4.514693445829337e-05, + "loss": 0.0421, + "step": 12331 + }, + { + "epoch": 3.74, + "learning_rate": 4.5126262557266775e-05, + "loss": 0.0427, + "step": 12332 + }, + { + "epoch": 3.74, + "learning_rate": 4.5105594551992e-05, + "loss": 0.029, + "step": 12333 + }, + { + "epoch": 3.74, + "learning_rate": 4.508493044323673e-05, + "loss": 0.0108, + "step": 12334 + }, + { + "epoch": 3.75, + "learning_rate": 4.5064270231768685e-05, + "loss": 0.022, + "step": 12335 + }, + { + "epoch": 3.75, + "learning_rate": 4.5043613918355235e-05, + "loss": 0.0518, + "step": 12336 + }, + { + "epoch": 3.75, + "learning_rate": 4.502296150376384e-05, + "loss": 0.0425, + "step": 12337 + }, + { + "epoch": 3.75, + "learning_rate": 4.500231298876149e-05, + "loss": 0.0897, + "step": 12338 + }, + { + "epoch": 3.75, + "learning_rate": 4.4981668374115346e-05, + "loss": 0.0719, + "step": 12339 + }, + { + "epoch": 3.75, + "learning_rate": 4.496102766059221e-05, + "loss": 0.0518, + "step": 12340 + }, + { + "epoch": 3.75, + "learning_rate": 4.494039084895894e-05, + "loss": 0.0258, + "step": 12341 + }, + { + "epoch": 3.75, + "learning_rate": 4.4919757939982046e-05, + "loss": 0.0524, + "step": 12342 + }, + { + "epoch": 3.75, + "learning_rate": 4.489912893442796e-05, + "loss": 0.0312, + "step": 12343 + }, + { + "epoch": 3.75, + "learning_rate": 4.487850383306307e-05, + "loss": 0.0221, + "step": 12344 + }, + { + "epoch": 3.75, + "learning_rate": 4.4857882636653464e-05, + "loss": 0.0548, + "step": 12345 + }, + { + "epoch": 3.75, + "learning_rate": 4.4837265345965216e-05, + "loss": 0.0606, + "step": 12346 + }, + { + "epoch": 3.75, + "learning_rate": 4.481665196176419e-05, + "loss": 0.0603, + "step": 12347 + }, + { + "epoch": 3.75, + "learning_rate": 4.479604248481608e-05, + "loss": 0.054, + "step": 12348 + }, + { + "epoch": 3.75, + "learning_rate": 4.477543691588645e-05, + "loss": 0.0551, + "step": 12349 + }, + { + "epoch": 3.75, + "learning_rate": 4.475483525574078e-05, + "loss": 0.0315, + "step": 12350 + }, + { + "epoch": 3.75, + "learning_rate": 4.473423750514438e-05, + "loss": 0.0715, + "step": 12351 + }, + { + "epoch": 3.75, + "learning_rate": 4.471364366486237e-05, + "loss": 0.0453, + "step": 12352 + }, + { + "epoch": 3.75, + "learning_rate": 4.469305373565975e-05, + "loss": 0.0177, + "step": 12353 + }, + { + "epoch": 3.75, + "learning_rate": 4.4672467718301327e-05, + "loss": 0.0419, + "step": 12354 + }, + { + "epoch": 3.75, + "learning_rate": 4.465188561355191e-05, + "loss": 0.0566, + "step": 12355 + }, + { + "epoch": 3.75, + "learning_rate": 4.463130742217595e-05, + "loss": 0.0194, + "step": 12356 + }, + { + "epoch": 3.75, + "learning_rate": 4.461073314493799e-05, + "loss": 0.0175, + "step": 12357 + }, + { + "epoch": 3.75, + "learning_rate": 4.459016278260224e-05, + "loss": 0.0474, + "step": 12358 + }, + { + "epoch": 3.75, + "learning_rate": 4.4569596335932776e-05, + "loss": 0.0355, + "step": 12359 + }, + { + "epoch": 3.75, + "learning_rate": 4.454903380569369e-05, + "loss": 0.0739, + "step": 12360 + }, + { + "epoch": 3.75, + "learning_rate": 4.4528475192648724e-05, + "loss": 0.0335, + "step": 12361 + }, + { + "epoch": 3.75, + "learning_rate": 4.4507920497561643e-05, + "loss": 0.0355, + "step": 12362 + }, + { + "epoch": 3.75, + "learning_rate": 4.448736972119598e-05, + "loss": 0.046, + "step": 12363 + }, + { + "epoch": 3.75, + "learning_rate": 4.44668228643151e-05, + "loss": 0.0289, + "step": 12364 + }, + { + "epoch": 3.75, + "learning_rate": 4.444627992768225e-05, + "loss": 0.0267, + "step": 12365 + }, + { + "epoch": 3.75, + "learning_rate": 4.44257409120606e-05, + "loss": 0.0695, + "step": 12366 + }, + { + "epoch": 3.75, + "learning_rate": 4.4405205818213044e-05, + "loss": 0.0346, + "step": 12367 + }, + { + "epoch": 3.76, + "learning_rate": 4.438467464690247e-05, + "loss": 0.0379, + "step": 12368 + }, + { + "epoch": 3.76, + "learning_rate": 4.4364147398891525e-05, + "loss": 0.0445, + "step": 12369 + }, + { + "epoch": 3.76, + "learning_rate": 4.434362407494268e-05, + "loss": 0.0561, + "step": 12370 + }, + { + "epoch": 3.76, + "learning_rate": 4.432310467581843e-05, + "loss": 0.046, + "step": 12371 + }, + { + "epoch": 3.76, + "learning_rate": 4.430258920228089e-05, + "loss": 0.0323, + "step": 12372 + }, + { + "epoch": 3.76, + "learning_rate": 4.428207765509225e-05, + "loss": 0.0417, + "step": 12373 + }, + { + "epoch": 3.76, + "learning_rate": 4.426157003501441e-05, + "loss": 0.0246, + "step": 12374 + }, + { + "epoch": 3.76, + "learning_rate": 4.424106634280912e-05, + "loss": 0.0535, + "step": 12375 + }, + { + "epoch": 3.76, + "learning_rate": 4.422056657923813e-05, + "loss": 0.019, + "step": 12376 + }, + { + "epoch": 3.76, + "learning_rate": 4.420007074506285e-05, + "loss": 0.0439, + "step": 12377 + }, + { + "epoch": 3.76, + "learning_rate": 4.417957884104476e-05, + "loss": 0.0189, + "step": 12378 + }, + { + "epoch": 3.76, + "learning_rate": 4.415909086794494e-05, + "loss": 0.065, + "step": 12379 + }, + { + "epoch": 3.76, + "learning_rate": 4.4138606826524546e-05, + "loss": 0.021, + "step": 12380 + }, + { + "epoch": 3.76, + "learning_rate": 4.411812671754443e-05, + "loss": 0.0509, + "step": 12381 + }, + { + "epoch": 3.76, + "learning_rate": 4.409765054176546e-05, + "loss": 0.0441, + "step": 12382 + }, + { + "epoch": 3.76, + "learning_rate": 4.407717829994821e-05, + "loss": 0.0741, + "step": 12383 + }, + { + "epoch": 3.76, + "learning_rate": 4.4056709992853124e-05, + "loss": 0.0548, + "step": 12384 + }, + { + "epoch": 3.76, + "learning_rate": 4.403624562124063e-05, + "loss": 0.0343, + "step": 12385 + }, + { + "epoch": 3.76, + "learning_rate": 4.4015785185870835e-05, + "loss": 0.0429, + "step": 12386 + }, + { + "epoch": 3.76, + "learning_rate": 4.399532868750386e-05, + "loss": 0.0278, + "step": 12387 + }, + { + "epoch": 3.76, + "learning_rate": 4.3974876126899575e-05, + "loss": 0.0535, + "step": 12388 + }, + { + "epoch": 3.76, + "learning_rate": 4.3954427504817726e-05, + "loss": 0.0362, + "step": 12389 + }, + { + "epoch": 3.76, + "learning_rate": 4.3933982822017876e-05, + "loss": 0.0325, + "step": 12390 + }, + { + "epoch": 3.76, + "learning_rate": 4.391354207925952e-05, + "loss": 0.0181, + "step": 12391 + }, + { + "epoch": 3.76, + "learning_rate": 4.3893105277302034e-05, + "loss": 0.0162, + "step": 12392 + }, + { + "epoch": 3.76, + "learning_rate": 4.387267241690453e-05, + "loss": 0.0409, + "step": 12393 + }, + { + "epoch": 3.76, + "learning_rate": 4.385224349882603e-05, + "loss": 0.0369, + "step": 12394 + }, + { + "epoch": 3.76, + "learning_rate": 4.383181852382536e-05, + "loss": 0.0576, + "step": 12395 + }, + { + "epoch": 3.76, + "learning_rate": 4.381139749266134e-05, + "loss": 0.0644, + "step": 12396 + }, + { + "epoch": 3.76, + "learning_rate": 4.3790980406092464e-05, + "loss": 0.0247, + "step": 12397 + }, + { + "epoch": 3.76, + "learning_rate": 4.377056726487726e-05, + "loss": 0.0644, + "step": 12398 + }, + { + "epoch": 3.76, + "learning_rate": 4.375015806977396e-05, + "loss": 0.0405, + "step": 12399 + }, + { + "epoch": 3.76, + "learning_rate": 4.3729752821540676e-05, + "loss": 0.0909, + "step": 12400 + }, + { + "epoch": 3.77, + "learning_rate": 4.370935152093548e-05, + "loss": 0.0597, + "step": 12401 + }, + { + "epoch": 3.77, + "learning_rate": 4.368895416871614e-05, + "loss": 0.0252, + "step": 12402 + }, + { + "epoch": 3.77, + "learning_rate": 4.3668560765640475e-05, + "loss": 0.0684, + "step": 12403 + }, + { + "epoch": 3.77, + "learning_rate": 4.364817131246589e-05, + "loss": 0.0542, + "step": 12404 + }, + { + "epoch": 3.77, + "learning_rate": 4.3627785809949894e-05, + "loss": 0.0676, + "step": 12405 + }, + { + "epoch": 3.77, + "learning_rate": 4.36074042588497e-05, + "loss": 0.0606, + "step": 12406 + }, + { + "epoch": 3.77, + "learning_rate": 4.3587026659922474e-05, + "loss": 0.026, + "step": 12407 + }, + { + "epoch": 3.77, + "learning_rate": 4.356665301392515e-05, + "loss": 0.0275, + "step": 12408 + }, + { + "epoch": 3.77, + "learning_rate": 4.354628332161451e-05, + "loss": 0.0226, + "step": 12409 + }, + { + "epoch": 3.77, + "learning_rate": 4.3525917583747314e-05, + "loss": 0.0246, + "step": 12410 + }, + { + "epoch": 3.77, + "learning_rate": 4.350555580108001e-05, + "loss": 0.043, + "step": 12411 + }, + { + "epoch": 3.77, + "learning_rate": 4.348519797436907e-05, + "loss": 0.0517, + "step": 12412 + }, + { + "epoch": 3.77, + "learning_rate": 4.346484410437065e-05, + "loss": 0.0674, + "step": 12413 + }, + { + "epoch": 3.77, + "learning_rate": 4.344449419184084e-05, + "loss": 0.0339, + "step": 12414 + }, + { + "epoch": 3.77, + "learning_rate": 4.342414823753562e-05, + "loss": 0.046, + "step": 12415 + }, + { + "epoch": 3.77, + "learning_rate": 4.340380624221073e-05, + "loss": 0.0311, + "step": 12416 + }, + { + "epoch": 3.77, + "learning_rate": 4.338346820662189e-05, + "loss": 0.0462, + "step": 12417 + }, + { + "epoch": 3.77, + "learning_rate": 4.336313413152456e-05, + "loss": 0.063, + "step": 12418 + }, + { + "epoch": 3.77, + "learning_rate": 4.334280401767407e-05, + "loss": 0.0458, + "step": 12419 + }, + { + "epoch": 3.77, + "learning_rate": 4.3322477865825614e-05, + "loss": 0.0465, + "step": 12420 + }, + { + "epoch": 3.77, + "learning_rate": 4.330215567673432e-05, + "loss": 0.0221, + "step": 12421 + }, + { + "epoch": 3.77, + "learning_rate": 4.328183745115501e-05, + "loss": 0.028, + "step": 12422 + }, + { + "epoch": 3.77, + "learning_rate": 4.3261523189842515e-05, + "loss": 0.0687, + "step": 12423 + }, + { + "epoch": 3.77, + "learning_rate": 4.324121289355144e-05, + "loss": 0.0354, + "step": 12424 + }, + { + "epoch": 3.77, + "learning_rate": 4.3220906563036205e-05, + "loss": 0.0456, + "step": 12425 + }, + { + "epoch": 3.77, + "learning_rate": 4.320060419905119e-05, + "loss": 0.0554, + "step": 12426 + }, + { + "epoch": 3.77, + "learning_rate": 4.3180305802350513e-05, + "loss": 0.0574, + "step": 12427 + }, + { + "epoch": 3.77, + "learning_rate": 4.3160011373688267e-05, + "loss": 0.0505, + "step": 12428 + }, + { + "epoch": 3.77, + "learning_rate": 4.3139720913818294e-05, + "loss": 0.04, + "step": 12429 + }, + { + "epoch": 3.77, + "learning_rate": 4.3119434423494314e-05, + "loss": 0.0318, + "step": 12430 + }, + { + "epoch": 3.77, + "learning_rate": 4.3099151903469894e-05, + "loss": 0.0669, + "step": 12431 + }, + { + "epoch": 3.77, + "learning_rate": 4.307887335449847e-05, + "loss": 0.0549, + "step": 12432 + }, + { + "epoch": 3.78, + "learning_rate": 4.305859877733342e-05, + "loss": 0.0367, + "step": 12433 + }, + { + "epoch": 3.78, + "learning_rate": 4.3038328172727834e-05, + "loss": 0.0501, + "step": 12434 + }, + { + "epoch": 3.78, + "learning_rate": 4.3018061541434675e-05, + "loss": 0.0489, + "step": 12435 + }, + { + "epoch": 3.78, + "learning_rate": 4.2997798884206775e-05, + "loss": 0.0321, + "step": 12436 + }, + { + "epoch": 3.78, + "learning_rate": 4.29775402017969e-05, + "loss": 0.0012, + "step": 12437 + }, + { + "epoch": 3.78, + "learning_rate": 4.295728549495752e-05, + "loss": 0.0323, + "step": 12438 + }, + { + "epoch": 3.78, + "learning_rate": 4.2937034764441135e-05, + "loss": 0.018, + "step": 12439 + }, + { + "epoch": 3.78, + "learning_rate": 4.291678801099994e-05, + "loss": 0.0106, + "step": 12440 + }, + { + "epoch": 3.78, + "learning_rate": 4.289654523538601e-05, + "loss": 0.0493, + "step": 12441 + }, + { + "epoch": 3.78, + "learning_rate": 4.2876306438351406e-05, + "loss": 0.0479, + "step": 12442 + }, + { + "epoch": 3.78, + "learning_rate": 4.2856071620647826e-05, + "loss": 0.0545, + "step": 12443 + }, + { + "epoch": 3.78, + "learning_rate": 4.2835840783027076e-05, + "loss": 0.0667, + "step": 12444 + }, + { + "epoch": 3.78, + "learning_rate": 4.281561392624049e-05, + "loss": 0.0715, + "step": 12445 + }, + { + "epoch": 3.78, + "learning_rate": 4.279539105103958e-05, + "loss": 0.0493, + "step": 12446 + }, + { + "epoch": 3.78, + "learning_rate": 4.277517215817548e-05, + "loss": 0.045, + "step": 12447 + }, + { + "epoch": 3.78, + "learning_rate": 4.275495724839934e-05, + "loss": 0.0523, + "step": 12448 + }, + { + "epoch": 3.78, + "learning_rate": 4.273474632246204e-05, + "loss": 0.0411, + "step": 12449 + }, + { + "epoch": 3.78, + "learning_rate": 4.271453938111432e-05, + "loss": 0.0222, + "step": 12450 + }, + { + "epoch": 3.78, + "learning_rate": 4.269433642510689e-05, + "loss": 0.0425, + "step": 12451 + }, + { + "epoch": 3.78, + "learning_rate": 4.267413745519015e-05, + "loss": 0.0158, + "step": 12452 + }, + { + "epoch": 3.78, + "learning_rate": 4.265394247211452e-05, + "loss": 0.0338, + "step": 12453 + }, + { + "epoch": 3.78, + "learning_rate": 4.263375147663014e-05, + "loss": 0.0451, + "step": 12454 + }, + { + "epoch": 3.78, + "learning_rate": 4.261356446948699e-05, + "loss": 0.0693, + "step": 12455 + }, + { + "epoch": 3.78, + "learning_rate": 4.259338145143506e-05, + "loss": 0.0852, + "step": 12456 + }, + { + "epoch": 3.78, + "learning_rate": 4.257320242322401e-05, + "loss": 0.0722, + "step": 12457 + }, + { + "epoch": 3.78, + "learning_rate": 4.2553027385603494e-05, + "loss": 0.0528, + "step": 12458 + }, + { + "epoch": 3.78, + "learning_rate": 4.2532856339322924e-05, + "loss": 0.0417, + "step": 12459 + }, + { + "epoch": 3.78, + "learning_rate": 4.25126892851316e-05, + "loss": 0.0262, + "step": 12460 + }, + { + "epoch": 3.78, + "learning_rate": 4.249252622377863e-05, + "loss": 0.0702, + "step": 12461 + }, + { + "epoch": 3.78, + "learning_rate": 4.2472367156013074e-05, + "loss": 0.0419, + "step": 12462 + }, + { + "epoch": 3.78, + "learning_rate": 4.2452212082583726e-05, + "loss": 0.0385, + "step": 12463 + }, + { + "epoch": 3.78, + "learning_rate": 4.243206100423934e-05, + "loss": 0.0294, + "step": 12464 + }, + { + "epoch": 3.78, + "learning_rate": 4.2411913921728475e-05, + "loss": 0.0331, + "step": 12465 + }, + { + "epoch": 3.79, + "learning_rate": 4.2391770835799455e-05, + "loss": 0.0295, + "step": 12466 + }, + { + "epoch": 3.79, + "learning_rate": 4.237163174720063e-05, + "loss": 0.029, + "step": 12467 + }, + { + "epoch": 3.79, + "learning_rate": 4.235149665668002e-05, + "loss": 0.0073, + "step": 12468 + }, + { + "epoch": 3.79, + "learning_rate": 4.233136556498574e-05, + "loss": 0.0601, + "step": 12469 + }, + { + "epoch": 3.79, + "learning_rate": 4.231123847286538e-05, + "loss": 0.0292, + "step": 12470 + }, + { + "epoch": 3.79, + "learning_rate": 4.2291115381066774e-05, + "loss": 0.069, + "step": 12471 + }, + { + "epoch": 3.79, + "learning_rate": 4.227099629033733e-05, + "loss": 0.045, + "step": 12472 + }, + { + "epoch": 3.79, + "learning_rate": 4.225088120142447e-05, + "loss": 0.0584, + "step": 12473 + }, + { + "epoch": 3.79, + "learning_rate": 4.2230770115075484e-05, + "loss": 0.0587, + "step": 12474 + }, + { + "epoch": 3.79, + "learning_rate": 4.221066303203727e-05, + "loss": 0.018, + "step": 12475 + }, + { + "epoch": 3.79, + "learning_rate": 4.219055995305687e-05, + "loss": 0.0086, + "step": 12476 + }, + { + "epoch": 3.79, + "learning_rate": 4.217046087888099e-05, + "loss": 0.0696, + "step": 12477 + }, + { + "epoch": 3.79, + "learning_rate": 4.215036581025632e-05, + "loss": 0.0675, + "step": 12478 + }, + { + "epoch": 3.79, + "learning_rate": 4.2130274747929274e-05, + "loss": 0.0249, + "step": 12479 + }, + { + "epoch": 3.79, + "learning_rate": 4.211018769264618e-05, + "loss": 0.0415, + "step": 12480 + }, + { + "epoch": 3.79, + "learning_rate": 4.2090104645153266e-05, + "loss": 0.0408, + "step": 12481 + }, + { + "epoch": 3.79, + "learning_rate": 4.207002560619647e-05, + "loss": 0.0212, + "step": 12482 + }, + { + "epoch": 3.79, + "learning_rate": 4.204995057652177e-05, + "loss": 0.0074, + "step": 12483 + }, + { + "epoch": 3.79, + "learning_rate": 4.202987955687484e-05, + "loss": 0.033, + "step": 12484 + }, + { + "epoch": 3.79, + "learning_rate": 4.200981254800126e-05, + "loss": 0.0342, + "step": 12485 + }, + { + "epoch": 3.79, + "learning_rate": 4.198974955064643e-05, + "loss": 0.0161, + "step": 12486 + }, + { + "epoch": 3.79, + "learning_rate": 4.196969056555572e-05, + "loss": 0.0378, + "step": 12487 + }, + { + "epoch": 3.79, + "learning_rate": 4.1949635593474154e-05, + "loss": 0.0401, + "step": 12488 + }, + { + "epoch": 3.79, + "learning_rate": 4.192958463514681e-05, + "loss": 0.0104, + "step": 12489 + }, + { + "epoch": 3.79, + "learning_rate": 4.190953769131849e-05, + "loss": 0.0269, + "step": 12490 + }, + { + "epoch": 3.79, + "learning_rate": 4.188949476273382e-05, + "loss": 0.0308, + "step": 12491 + }, + { + "epoch": 3.79, + "learning_rate": 4.1869455850137445e-05, + "loss": 0.0605, + "step": 12492 + }, + { + "epoch": 3.79, + "learning_rate": 4.184942095427365e-05, + "loss": 0.0471, + "step": 12493 + }, + { + "epoch": 3.79, + "learning_rate": 4.1829390075886755e-05, + "loss": 0.043, + "step": 12494 + }, + { + "epoch": 3.79, + "learning_rate": 4.180936321572081e-05, + "loss": 0.0423, + "step": 12495 + }, + { + "epoch": 3.79, + "learning_rate": 4.17893403745197e-05, + "loss": 0.0186, + "step": 12496 + }, + { + "epoch": 3.79, + "learning_rate": 4.176932155302734e-05, + "loss": 0.0148, + "step": 12497 + }, + { + "epoch": 3.79, + "learning_rate": 4.174930675198725e-05, + "loss": 0.0378, + "step": 12498 + }, + { + "epoch": 3.8, + "learning_rate": 4.1729295972143e-05, + "loss": 0.0253, + "step": 12499 + }, + { + "epoch": 3.8, + "learning_rate": 4.1709289214237916e-05, + "loss": 0.0454, + "step": 12500 + }, + { + "epoch": 3.8, + "learning_rate": 4.168928647901517e-05, + "loss": 0.0788, + "step": 12501 + }, + { + "epoch": 3.8, + "learning_rate": 4.166928776721779e-05, + "loss": 0.0159, + "step": 12502 + }, + { + "epoch": 3.8, + "learning_rate": 4.1649293079588715e-05, + "loss": 0.028, + "step": 12503 + }, + { + "epoch": 3.8, + "learning_rate": 4.1629302416870616e-05, + "loss": 0.0052, + "step": 12504 + }, + { + "epoch": 3.8, + "learning_rate": 4.160931577980619e-05, + "loss": 0.0507, + "step": 12505 + }, + { + "epoch": 3.8, + "learning_rate": 4.158933316913783e-05, + "loss": 0.0161, + "step": 12506 + }, + { + "epoch": 3.8, + "learning_rate": 4.15693545856078e-05, + "loss": 0.0149, + "step": 12507 + }, + { + "epoch": 3.8, + "learning_rate": 4.154938002995831e-05, + "loss": 0.0407, + "step": 12508 + }, + { + "epoch": 3.8, + "learning_rate": 4.1529409502931275e-05, + "loss": 0.0531, + "step": 12509 + }, + { + "epoch": 3.8, + "learning_rate": 4.1509443005268684e-05, + "loss": 0.0413, + "step": 12510 + }, + { + "epoch": 3.8, + "learning_rate": 4.1489480537712055e-05, + "loss": 0.0568, + "step": 12511 + }, + { + "epoch": 3.8, + "learning_rate": 4.146952210100305e-05, + "loss": 0.0329, + "step": 12512 + }, + { + "epoch": 3.8, + "learning_rate": 4.144956769588301e-05, + "loss": 0.0556, + "step": 12513 + }, + { + "epoch": 3.8, + "learning_rate": 4.14296173230932e-05, + "loss": 0.0654, + "step": 12514 + }, + { + "epoch": 3.8, + "learning_rate": 4.14096709833748e-05, + "loss": 0.044, + "step": 12515 + }, + { + "epoch": 3.8, + "learning_rate": 4.1389728677468624e-05, + "loss": 0.0354, + "step": 12516 + }, + { + "epoch": 3.8, + "learning_rate": 4.1369790406115555e-05, + "loss": 0.021, + "step": 12517 + }, + { + "epoch": 3.8, + "learning_rate": 4.1349856170056184e-05, + "loss": 0.0542, + "step": 12518 + }, + { + "epoch": 3.8, + "learning_rate": 4.132992597003107e-05, + "loss": 0.0288, + "step": 12519 + }, + { + "epoch": 3.8, + "learning_rate": 4.130999980678055e-05, + "loss": 0.0281, + "step": 12520 + }, + { + "epoch": 3.8, + "learning_rate": 4.1290077681044756e-05, + "loss": 0.049, + "step": 12521 + }, + { + "epoch": 3.8, + "learning_rate": 4.127015959356383e-05, + "loss": 0.0419, + "step": 12522 + }, + { + "epoch": 3.8, + "learning_rate": 4.1250245545077584e-05, + "loss": 0.0387, + "step": 12523 + }, + { + "epoch": 3.8, + "learning_rate": 4.123033553632586e-05, + "loss": 0.0386, + "step": 12524 + }, + { + "epoch": 3.8, + "learning_rate": 4.1210429568048206e-05, + "loss": 0.0662, + "step": 12525 + }, + { + "epoch": 3.8, + "learning_rate": 4.1190527640984063e-05, + "loss": 0.038, + "step": 12526 + }, + { + "epoch": 3.8, + "learning_rate": 4.1170629755872694e-05, + "loss": 0.0187, + "step": 12527 + }, + { + "epoch": 3.8, + "learning_rate": 4.1150735913453335e-05, + "loss": 0.0303, + "step": 12528 + }, + { + "epoch": 3.8, + "learning_rate": 4.1130846114464904e-05, + "loss": 0.0826, + "step": 12529 + }, + { + "epoch": 3.8, + "learning_rate": 4.1110960359646324e-05, + "loss": 0.0095, + "step": 12530 + }, + { + "epoch": 3.8, + "learning_rate": 4.109107864973625e-05, + "loss": 0.012, + "step": 12531 + }, + { + "epoch": 3.81, + "learning_rate": 4.107120098547317e-05, + "loss": 0.0518, + "step": 12532 + }, + { + "epoch": 3.81, + "learning_rate": 4.105132736759561e-05, + "loss": 0.0455, + "step": 12533 + }, + { + "epoch": 3.81, + "learning_rate": 4.10314577968417e-05, + "loss": 0.0004, + "step": 12534 + }, + { + "epoch": 3.81, + "learning_rate": 4.101159227394967e-05, + "loss": 0.0411, + "step": 12535 + }, + { + "epoch": 3.81, + "learning_rate": 4.099173079965729e-05, + "loss": 0.0276, + "step": 12536 + }, + { + "epoch": 3.81, + "learning_rate": 4.0971873374702445e-05, + "loss": 0.0818, + "step": 12537 + }, + { + "epoch": 3.81, + "learning_rate": 4.0952019999822846e-05, + "loss": 0.0108, + "step": 12538 + }, + { + "epoch": 3.81, + "learning_rate": 4.093217067575586e-05, + "loss": 0.0441, + "step": 12539 + }, + { + "epoch": 3.81, + "learning_rate": 4.0912325403238986e-05, + "loss": 0.0261, + "step": 12540 + }, + { + "epoch": 3.81, + "learning_rate": 4.089248418300924e-05, + "loss": 0.0693, + "step": 12541 + }, + { + "epoch": 3.81, + "learning_rate": 4.0872647015803786e-05, + "loss": 0.0471, + "step": 12542 + }, + { + "epoch": 3.81, + "learning_rate": 4.085281390235945e-05, + "loss": 0.0135, + "step": 12543 + }, + { + "epoch": 3.81, + "learning_rate": 4.083298484341305e-05, + "loss": 0.0554, + "step": 12544 + }, + { + "epoch": 3.81, + "learning_rate": 4.0813159839701144e-05, + "loss": 0.0608, + "step": 12545 + }, + { + "epoch": 3.81, + "learning_rate": 4.0793338891960095e-05, + "loss": 0.0198, + "step": 12546 + }, + { + "epoch": 3.81, + "learning_rate": 4.077352200092631e-05, + "loss": 0.0372, + "step": 12547 + }, + { + "epoch": 3.81, + "learning_rate": 4.0753709167335845e-05, + "loss": 0.0527, + "step": 12548 + }, + { + "epoch": 3.81, + "learning_rate": 4.073390039192476e-05, + "loss": 0.0516, + "step": 12549 + }, + { + "epoch": 3.81, + "learning_rate": 4.071409567542881e-05, + "loss": 0.0349, + "step": 12550 + }, + { + "epoch": 3.81, + "learning_rate": 4.0694295018583815e-05, + "loss": 0.053, + "step": 12551 + }, + { + "epoch": 3.81, + "learning_rate": 4.0674498422125144e-05, + "loss": 0.0337, + "step": 12552 + }, + { + "epoch": 3.81, + "learning_rate": 4.06547058867883e-05, + "loss": 0.0404, + "step": 12553 + }, + { + "epoch": 3.81, + "learning_rate": 4.063491741330843e-05, + "loss": 0.0544, + "step": 12554 + }, + { + "epoch": 3.81, + "learning_rate": 4.061513300242068e-05, + "loss": 0.0323, + "step": 12555 + }, + { + "epoch": 3.81, + "learning_rate": 4.059535265486005e-05, + "loss": 0.0635, + "step": 12556 + }, + { + "epoch": 3.81, + "learning_rate": 4.0575576371361144e-05, + "loss": 0.0159, + "step": 12557 + }, + { + "epoch": 3.81, + "learning_rate": 4.0555804152658756e-05, + "loss": 0.0134, + "step": 12558 + }, + { + "epoch": 3.81, + "learning_rate": 4.0536035999487245e-05, + "loss": 0.0371, + "step": 12559 + }, + { + "epoch": 3.81, + "learning_rate": 4.051627191258106e-05, + "loss": 0.0285, + "step": 12560 + }, + { + "epoch": 3.81, + "learning_rate": 4.0496511892674285e-05, + "loss": 0.063, + "step": 12561 + }, + { + "epoch": 3.81, + "learning_rate": 4.0476755940500955e-05, + "loss": 0.03, + "step": 12562 + }, + { + "epoch": 3.81, + "learning_rate": 4.0457004056795e-05, + "loss": 0.0126, + "step": 12563 + }, + { + "epoch": 3.81, + "learning_rate": 4.0437256242290074e-05, + "loss": 0.0479, + "step": 12564 + }, + { + "epoch": 3.82, + "learning_rate": 4.0417512497719836e-05, + "loss": 0.0405, + "step": 12565 + }, + { + "epoch": 3.82, + "learning_rate": 4.039777282381765e-05, + "loss": 0.0267, + "step": 12566 + }, + { + "epoch": 3.82, + "learning_rate": 4.0378037221316793e-05, + "loss": 0.0288, + "step": 12567 + }, + { + "epoch": 3.82, + "learning_rate": 4.035830569095035e-05, + "loss": 0.032, + "step": 12568 + }, + { + "epoch": 3.82, + "learning_rate": 4.033857823345139e-05, + "loss": 0.033, + "step": 12569 + }, + { + "epoch": 3.82, + "learning_rate": 4.031885484955262e-05, + "loss": 0.0493, + "step": 12570 + }, + { + "epoch": 3.82, + "learning_rate": 4.029913553998678e-05, + "loss": 0.0345, + "step": 12571 + }, + { + "epoch": 3.82, + "learning_rate": 4.0279420305486384e-05, + "loss": 0.0275, + "step": 12572 + }, + { + "epoch": 3.82, + "learning_rate": 4.0259709146783716e-05, + "loss": 0.0197, + "step": 12573 + }, + { + "epoch": 3.82, + "learning_rate": 4.024000206461108e-05, + "loss": 0.0513, + "step": 12574 + }, + { + "epoch": 3.82, + "learning_rate": 4.022029905970045e-05, + "loss": 0.0337, + "step": 12575 + }, + { + "epoch": 3.82, + "learning_rate": 4.020060013278388e-05, + "loss": 0.0229, + "step": 12576 + }, + { + "epoch": 3.82, + "learning_rate": 4.0180905284592954e-05, + "loss": 0.0239, + "step": 12577 + }, + { + "epoch": 3.82, + "learning_rate": 4.016121451585934e-05, + "loss": 0.0521, + "step": 12578 + }, + { + "epoch": 3.82, + "learning_rate": 4.0141527827314565e-05, + "loss": 0.0219, + "step": 12579 + }, + { + "epoch": 3.82, + "learning_rate": 4.012184521968982e-05, + "loss": 0.0599, + "step": 12580 + }, + { + "epoch": 3.82, + "learning_rate": 4.01021666937164e-05, + "loss": 0.0597, + "step": 12581 + }, + { + "epoch": 3.82, + "learning_rate": 4.008249225012512e-05, + "loss": 0.0315, + "step": 12582 + }, + { + "epoch": 3.82, + "learning_rate": 4.006282188964698e-05, + "loss": 0.0395, + "step": 12583 + }, + { + "epoch": 3.82, + "learning_rate": 4.0043155613012564e-05, + "loss": 0.0529, + "step": 12584 + }, + { + "epoch": 3.82, + "learning_rate": 4.002349342095252e-05, + "loss": 0.0339, + "step": 12585 + }, + { + "epoch": 3.82, + "learning_rate": 4.000383531419719e-05, + "loss": 0.0327, + "step": 12586 + }, + { + "epoch": 3.82, + "learning_rate": 3.998418129347678e-05, + "loss": 0.035, + "step": 12587 + }, + { + "epoch": 3.82, + "learning_rate": 3.996453135952145e-05, + "loss": 0.0417, + "step": 12588 + }, + { + "epoch": 3.82, + "learning_rate": 3.9944885513061066e-05, + "loss": 0.0388, + "step": 12589 + }, + { + "epoch": 3.82, + "learning_rate": 3.992524375482549e-05, + "loss": 0.0334, + "step": 12590 + }, + { + "epoch": 3.82, + "learning_rate": 3.990560608554432e-05, + "loss": 0.0399, + "step": 12591 + }, + { + "epoch": 3.82, + "learning_rate": 3.988597250594704e-05, + "loss": 0.0216, + "step": 12592 + }, + { + "epoch": 3.82, + "learning_rate": 3.986634301676292e-05, + "loss": 0.0302, + "step": 12593 + }, + { + "epoch": 3.82, + "learning_rate": 3.984671761872124e-05, + "loss": 0.0243, + "step": 12594 + }, + { + "epoch": 3.82, + "learning_rate": 3.9827096312550935e-05, + "loss": 0.0179, + "step": 12595 + }, + { + "epoch": 3.82, + "learning_rate": 3.980747909898096e-05, + "loss": 0.0616, + "step": 12596 + }, + { + "epoch": 3.82, + "learning_rate": 3.978786597874001e-05, + "loss": 0.0128, + "step": 12597 + }, + { + "epoch": 3.83, + "learning_rate": 3.97682569525566e-05, + "loss": 0.0345, + "step": 12598 + }, + { + "epoch": 3.83, + "learning_rate": 3.974865202115923e-05, + "loss": 0.053, + "step": 12599 + }, + { + "epoch": 3.83, + "learning_rate": 3.9729051185276095e-05, + "loss": 0.0451, + "step": 12600 + }, + { + "epoch": 3.83, + "learning_rate": 3.970945444563539e-05, + "loss": 0.0325, + "step": 12601 + }, + { + "epoch": 3.83, + "learning_rate": 3.968986180296501e-05, + "loss": 0.0229, + "step": 12602 + }, + { + "epoch": 3.83, + "learning_rate": 3.967027325799274e-05, + "loss": 0.0634, + "step": 12603 + }, + { + "epoch": 3.83, + "learning_rate": 3.9650688811446325e-05, + "loss": 0.054, + "step": 12604 + }, + { + "epoch": 3.83, + "learning_rate": 3.963110846405319e-05, + "loss": 0.0574, + "step": 12605 + }, + { + "epoch": 3.83, + "learning_rate": 3.961153221654081e-05, + "loss": 0.0456, + "step": 12606 + }, + { + "epoch": 3.83, + "learning_rate": 3.95919600696362e-05, + "loss": 0.0666, + "step": 12607 + }, + { + "epoch": 3.83, + "learning_rate": 3.957239202406654e-05, + "loss": 0.0519, + "step": 12608 + }, + { + "epoch": 3.83, + "learning_rate": 3.9552828080558656e-05, + "loss": 0.0511, + "step": 12609 + }, + { + "epoch": 3.83, + "learning_rate": 3.953326823983937e-05, + "loss": 0.0428, + "step": 12610 + }, + { + "epoch": 3.83, + "learning_rate": 3.9513712502635186e-05, + "loss": 0.0129, + "step": 12611 + }, + { + "epoch": 3.83, + "learning_rate": 3.949416086967262e-05, + "loss": 0.0257, + "step": 12612 + }, + { + "epoch": 3.83, + "learning_rate": 3.947461334167794e-05, + "loss": 0.0495, + "step": 12613 + }, + { + "epoch": 3.83, + "learning_rate": 3.94550699193772e-05, + "loss": 0.0094, + "step": 12614 + }, + { + "epoch": 3.83, + "learning_rate": 3.943553060349649e-05, + "loss": 0.0608, + "step": 12615 + }, + { + "epoch": 3.83, + "learning_rate": 3.941599539476155e-05, + "loss": 0.0455, + "step": 12616 + }, + { + "epoch": 3.83, + "learning_rate": 3.9396464293898176e-05, + "loss": 0.0091, + "step": 12617 + }, + { + "epoch": 3.83, + "learning_rate": 3.937693730163174e-05, + "loss": 0.0117, + "step": 12618 + }, + { + "epoch": 3.83, + "learning_rate": 3.935741441868767e-05, + "loss": 0.0749, + "step": 12619 + }, + { + "epoch": 3.83, + "learning_rate": 3.933789564579124e-05, + "loss": 0.0564, + "step": 12620 + }, + { + "epoch": 3.83, + "learning_rate": 3.931838098366743e-05, + "loss": 0.0241, + "step": 12621 + }, + { + "epoch": 3.83, + "learning_rate": 3.9298870433041293e-05, + "loss": 0.0416, + "step": 12622 + }, + { + "epoch": 3.83, + "learning_rate": 3.927936399463739e-05, + "loss": 0.0264, + "step": 12623 + }, + { + "epoch": 3.83, + "learning_rate": 3.9259861669180484e-05, + "loss": 0.0626, + "step": 12624 + }, + { + "epoch": 3.83, + "learning_rate": 3.924036345739492e-05, + "loss": 0.0062, + "step": 12625 + }, + { + "epoch": 3.83, + "learning_rate": 3.922086936000511e-05, + "loss": 0.0356, + "step": 12626 + }, + { + "epoch": 3.83, + "learning_rate": 3.9201379377735145e-05, + "loss": 0.0687, + "step": 12627 + }, + { + "epoch": 3.83, + "learning_rate": 3.918189351130896e-05, + "loss": 0.0571, + "step": 12628 + }, + { + "epoch": 3.83, + "learning_rate": 3.916241176145051e-05, + "loss": 0.0321, + "step": 12629 + }, + { + "epoch": 3.83, + "learning_rate": 3.914293412888341e-05, + "loss": 0.0234, + "step": 12630 + }, + { + "epoch": 3.84, + "learning_rate": 3.912346061433125e-05, + "loss": 0.0601, + "step": 12631 + }, + { + "epoch": 3.84, + "learning_rate": 3.910399121851739e-05, + "loss": 0.0157, + "step": 12632 + }, + { + "epoch": 3.84, + "learning_rate": 3.908452594216505e-05, + "loss": 0.0506, + "step": 12633 + }, + { + "epoch": 3.84, + "learning_rate": 3.906506478599727e-05, + "loss": 0.0419, + "step": 12634 + }, + { + "epoch": 3.84, + "learning_rate": 3.904560775073708e-05, + "loss": 0.0676, + "step": 12635 + }, + { + "epoch": 3.84, + "learning_rate": 3.902615483710712e-05, + "loss": 0.0484, + "step": 12636 + }, + { + "epoch": 3.84, + "learning_rate": 3.900670604583014e-05, + "loss": 0.0207, + "step": 12637 + }, + { + "epoch": 3.84, + "learning_rate": 3.898726137762855e-05, + "loss": 0.0425, + "step": 12638 + }, + { + "epoch": 3.84, + "learning_rate": 3.896782083322461e-05, + "loss": 0.0533, + "step": 12639 + }, + { + "epoch": 3.84, + "learning_rate": 3.894838441334057e-05, + "loss": 0.0117, + "step": 12640 + }, + { + "epoch": 3.84, + "learning_rate": 3.892895211869834e-05, + "loss": 0.0299, + "step": 12641 + }, + { + "epoch": 3.84, + "learning_rate": 3.890952395001987e-05, + "loss": 0.0161, + "step": 12642 + }, + { + "epoch": 3.84, + "learning_rate": 3.889009990802683e-05, + "loss": 0.0478, + "step": 12643 + }, + { + "epoch": 3.84, + "learning_rate": 3.88706799934407e-05, + "loss": 0.0456, + "step": 12644 + }, + { + "epoch": 3.84, + "learning_rate": 3.885126420698296e-05, + "loss": 0.0246, + "step": 12645 + }, + { + "epoch": 3.84, + "learning_rate": 3.883185254937479e-05, + "loss": 0.0237, + "step": 12646 + }, + { + "epoch": 3.84, + "learning_rate": 3.881244502133736e-05, + "loss": 0.0439, + "step": 12647 + }, + { + "epoch": 3.84, + "learning_rate": 3.879304162359148e-05, + "loss": 0.0434, + "step": 12648 + }, + { + "epoch": 3.84, + "learning_rate": 3.8773642356858016e-05, + "loss": 0.0279, + "step": 12649 + }, + { + "epoch": 3.84, + "learning_rate": 3.875424722185754e-05, + "loss": 0.0176, + "step": 12650 + }, + { + "epoch": 3.84, + "learning_rate": 3.8734856219310586e-05, + "loss": 0.0243, + "step": 12651 + }, + { + "epoch": 3.84, + "learning_rate": 3.871546934993746e-05, + "loss": 0.0524, + "step": 12652 + }, + { + "epoch": 3.84, + "learning_rate": 3.8696086614458244e-05, + "loss": 0.0191, + "step": 12653 + }, + { + "epoch": 3.84, + "learning_rate": 3.867670801359308e-05, + "loss": 0.047, + "step": 12654 + }, + { + "epoch": 3.84, + "learning_rate": 3.86573335480617e-05, + "loss": 0.0386, + "step": 12655 + }, + { + "epoch": 3.84, + "learning_rate": 3.863796321858391e-05, + "loss": 0.0538, + "step": 12656 + }, + { + "epoch": 3.84, + "learning_rate": 3.861859702587923e-05, + "loss": 0.0107, + "step": 12657 + }, + { + "epoch": 3.84, + "learning_rate": 3.859923497066704e-05, + "loss": 0.0599, + "step": 12658 + }, + { + "epoch": 3.84, + "learning_rate": 3.857987705366656e-05, + "loss": 0.0309, + "step": 12659 + }, + { + "epoch": 3.84, + "learning_rate": 3.85605232755969e-05, + "loss": 0.049, + "step": 12660 + }, + { + "epoch": 3.84, + "learning_rate": 3.854117363717704e-05, + "loss": 0.0157, + "step": 12661 + }, + { + "epoch": 3.84, + "learning_rate": 3.852182813912575e-05, + "loss": 0.0358, + "step": 12662 + }, + { + "epoch": 3.84, + "learning_rate": 3.850248678216163e-05, + "loss": 0.0678, + "step": 12663 + }, + { + "epoch": 3.85, + "learning_rate": 3.848314956700312e-05, + "loss": 0.0468, + "step": 12664 + }, + { + "epoch": 3.85, + "learning_rate": 3.8463816494368624e-05, + "loss": 0.0577, + "step": 12665 + }, + { + "epoch": 3.85, + "learning_rate": 3.8444487564976226e-05, + "loss": 0.069, + "step": 12666 + }, + { + "epoch": 3.85, + "learning_rate": 3.842516277954402e-05, + "loss": 0.0322, + "step": 12667 + }, + { + "epoch": 3.85, + "learning_rate": 3.840584213878983e-05, + "loss": 0.0305, + "step": 12668 + }, + { + "epoch": 3.85, + "learning_rate": 3.8386525643431324e-05, + "loss": 0.058, + "step": 12669 + }, + { + "epoch": 3.85, + "learning_rate": 3.836721329418611e-05, + "loss": 0.0557, + "step": 12670 + }, + { + "epoch": 3.85, + "learning_rate": 3.8347905091771534e-05, + "loss": 0.0077, + "step": 12671 + }, + { + "epoch": 3.85, + "learning_rate": 3.832860103690494e-05, + "loss": 0.0317, + "step": 12672 + }, + { + "epoch": 3.85, + "learning_rate": 3.830930113030326e-05, + "loss": 0.0461, + "step": 12673 + }, + { + "epoch": 3.85, + "learning_rate": 3.8290005372683554e-05, + "loss": 0.0588, + "step": 12674 + }, + { + "epoch": 3.85, + "learning_rate": 3.827071376476254e-05, + "loss": 0.0681, + "step": 12675 + }, + { + "epoch": 3.85, + "learning_rate": 3.825142630725688e-05, + "loss": 0.0479, + "step": 12676 + }, + { + "epoch": 3.85, + "learning_rate": 3.8232143000883015e-05, + "loss": 0.0574, + "step": 12677 + }, + { + "epoch": 3.85, + "learning_rate": 3.821286384635731e-05, + "loss": 0.0491, + "step": 12678 + }, + { + "epoch": 3.85, + "learning_rate": 3.8193588844395904e-05, + "loss": 0.0552, + "step": 12679 + }, + { + "epoch": 3.85, + "learning_rate": 3.817431799571476e-05, + "loss": 0.0246, + "step": 12680 + }, + { + "epoch": 3.85, + "learning_rate": 3.815505130102981e-05, + "loss": 0.0209, + "step": 12681 + }, + { + "epoch": 3.85, + "learning_rate": 3.813578876105669e-05, + "loss": 0.0322, + "step": 12682 + }, + { + "epoch": 3.85, + "learning_rate": 3.8116530376511026e-05, + "loss": 0.0286, + "step": 12683 + }, + { + "epoch": 3.85, + "learning_rate": 3.8097276148108156e-05, + "loss": 0.0455, + "step": 12684 + }, + { + "epoch": 3.85, + "learning_rate": 3.8078026076563276e-05, + "loss": 0.0213, + "step": 12685 + }, + { + "epoch": 3.85, + "learning_rate": 3.8058780162591567e-05, + "loss": 0.0582, + "step": 12686 + }, + { + "epoch": 3.85, + "learning_rate": 3.803953840690787e-05, + "loss": 0.0409, + "step": 12687 + }, + { + "epoch": 3.85, + "learning_rate": 3.802030081022708e-05, + "loss": 0.0272, + "step": 12688 + }, + { + "epoch": 3.85, + "learning_rate": 3.800106737326365e-05, + "loss": 0.0457, + "step": 12689 + }, + { + "epoch": 3.85, + "learning_rate": 3.798183809673218e-05, + "loss": 0.0218, + "step": 12690 + }, + { + "epoch": 3.85, + "learning_rate": 3.796261298134689e-05, + "loss": 0.0242, + "step": 12691 + }, + { + "epoch": 3.85, + "learning_rate": 3.794339202782204e-05, + "loss": 0.061, + "step": 12692 + }, + { + "epoch": 3.85, + "learning_rate": 3.7924175236871554e-05, + "loss": 0.0287, + "step": 12693 + }, + { + "epoch": 3.85, + "learning_rate": 3.790496260920926e-05, + "loss": 0.0323, + "step": 12694 + }, + { + "epoch": 3.85, + "learning_rate": 3.788575414554893e-05, + "loss": 0.0552, + "step": 12695 + }, + { + "epoch": 3.85, + "learning_rate": 3.786654984660401e-05, + "loss": 0.043, + "step": 12696 + }, + { + "epoch": 3.86, + "learning_rate": 3.7847349713087995e-05, + "loss": 0.0168, + "step": 12697 + }, + { + "epoch": 3.86, + "learning_rate": 3.782815374571404e-05, + "loss": 0.0449, + "step": 12698 + }, + { + "epoch": 3.86, + "learning_rate": 3.7808961945195247e-05, + "loss": 0.0315, + "step": 12699 + }, + { + "epoch": 3.86, + "learning_rate": 3.778977431224446e-05, + "loss": 0.0447, + "step": 12700 + }, + { + "epoch": 3.86, + "learning_rate": 3.777059084757455e-05, + "loss": 0.0505, + "step": 12701 + }, + { + "epoch": 3.86, + "learning_rate": 3.7751411551898055e-05, + "loss": 0.0192, + "step": 12702 + }, + { + "epoch": 3.86, + "learning_rate": 3.773223642592747e-05, + "loss": 0.0135, + "step": 12703 + }, + { + "epoch": 3.86, + "learning_rate": 3.7713065470375096e-05, + "loss": 0.0307, + "step": 12704 + }, + { + "epoch": 3.86, + "learning_rate": 3.769389868595302e-05, + "loss": 0.0608, + "step": 12705 + }, + { + "epoch": 3.86, + "learning_rate": 3.7674736073373306e-05, + "loss": 0.0185, + "step": 12706 + }, + { + "epoch": 3.86, + "learning_rate": 3.765557763334771e-05, + "loss": 0.0752, + "step": 12707 + }, + { + "epoch": 3.86, + "learning_rate": 3.763642336658803e-05, + "loss": 0.0413, + "step": 12708 + }, + { + "epoch": 3.86, + "learning_rate": 3.761727327380568e-05, + "loss": 0.047, + "step": 12709 + }, + { + "epoch": 3.86, + "learning_rate": 3.759812735571205e-05, + "loss": 0.0298, + "step": 12710 + }, + { + "epoch": 3.86, + "learning_rate": 3.757898561301841e-05, + "loss": 0.0389, + "step": 12711 + }, + { + "epoch": 3.86, + "learning_rate": 3.755984804643574e-05, + "loss": 0.0454, + "step": 12712 + }, + { + "epoch": 3.86, + "learning_rate": 3.754071465667506e-05, + "loss": 0.0518, + "step": 12713 + }, + { + "epoch": 3.86, + "learning_rate": 3.752158544444697e-05, + "loss": 0.0148, + "step": 12714 + }, + { + "epoch": 3.86, + "learning_rate": 3.75024604104622e-05, + "loss": 0.0552, + "step": 12715 + }, + { + "epoch": 3.86, + "learning_rate": 3.7483339555431055e-05, + "loss": 0.039, + "step": 12716 + }, + { + "epoch": 3.86, + "learning_rate": 3.7464222880063966e-05, + "loss": 0.0452, + "step": 12717 + }, + { + "epoch": 3.86, + "learning_rate": 3.744511038507097e-05, + "loss": 0.0541, + "step": 12718 + }, + { + "epoch": 3.86, + "learning_rate": 3.742600207116202e-05, + "loss": 0.061, + "step": 12719 + }, + { + "epoch": 3.86, + "learning_rate": 3.740689793904701e-05, + "loss": 0.0471, + "step": 12720 + }, + { + "epoch": 3.86, + "learning_rate": 3.738779798943554e-05, + "loss": 0.05, + "step": 12721 + }, + { + "epoch": 3.86, + "learning_rate": 3.7368702223037164e-05, + "loss": 0.0545, + "step": 12722 + }, + { + "epoch": 3.86, + "learning_rate": 3.7349610640561226e-05, + "loss": 0.0088, + "step": 12723 + }, + { + "epoch": 3.86, + "learning_rate": 3.733052324271687e-05, + "loss": 0.0266, + "step": 12724 + }, + { + "epoch": 3.86, + "learning_rate": 3.731144003021321e-05, + "loss": 0.0236, + "step": 12725 + }, + { + "epoch": 3.86, + "learning_rate": 3.729236100375903e-05, + "loss": 0.0316, + "step": 12726 + }, + { + "epoch": 3.86, + "learning_rate": 3.7273286164063203e-05, + "loss": 0.037, + "step": 12727 + }, + { + "epoch": 3.86, + "learning_rate": 3.7254215511834215e-05, + "loss": 0.0451, + "step": 12728 + }, + { + "epoch": 3.86, + "learning_rate": 3.723514904778049e-05, + "loss": 0.0319, + "step": 12729 + }, + { + "epoch": 3.87, + "learning_rate": 3.721608677261027e-05, + "loss": 0.0122, + "step": 12730 + }, + { + "epoch": 3.87, + "learning_rate": 3.719702868703173e-05, + "loss": 0.0381, + "step": 12731 + }, + { + "epoch": 3.87, + "learning_rate": 3.717797479175273e-05, + "loss": 0.037, + "step": 12732 + }, + { + "epoch": 3.87, + "learning_rate": 3.715892508748116e-05, + "loss": 0.0312, + "step": 12733 + }, + { + "epoch": 3.87, + "learning_rate": 3.7139879574924625e-05, + "loss": 0.0388, + "step": 12734 + }, + { + "epoch": 3.87, + "learning_rate": 3.712083825479057e-05, + "loss": 0.064, + "step": 12735 + }, + { + "epoch": 3.87, + "learning_rate": 3.710180112778639e-05, + "loss": 0.0435, + "step": 12736 + }, + { + "epoch": 3.87, + "learning_rate": 3.708276819461919e-05, + "loss": 0.0613, + "step": 12737 + }, + { + "epoch": 3.87, + "learning_rate": 3.706373945599605e-05, + "loss": 0.0295, + "step": 12738 + }, + { + "epoch": 3.87, + "learning_rate": 3.7044714912623824e-05, + "loss": 0.0453, + "step": 12739 + }, + { + "epoch": 3.87, + "learning_rate": 3.70256945652092e-05, + "loss": 0.0357, + "step": 12740 + }, + { + "epoch": 3.87, + "learning_rate": 3.700667841445868e-05, + "loss": 0.0573, + "step": 12741 + }, + { + "epoch": 3.87, + "learning_rate": 3.698766646107876e-05, + "loss": 0.0634, + "step": 12742 + }, + { + "epoch": 3.87, + "learning_rate": 3.696865870577557e-05, + "loss": 0.0314, + "step": 12743 + }, + { + "epoch": 3.87, + "learning_rate": 3.6949655149255296e-05, + "loss": 0.0565, + "step": 12744 + }, + { + "epoch": 3.87, + "learning_rate": 3.69306557922238e-05, + "loss": 0.0831, + "step": 12745 + }, + { + "epoch": 3.87, + "learning_rate": 3.691166063538685e-05, + "loss": 0.0576, + "step": 12746 + }, + { + "epoch": 3.87, + "learning_rate": 3.68926696794501e-05, + "loss": 0.0275, + "step": 12747 + }, + { + "epoch": 3.87, + "learning_rate": 3.687368292511896e-05, + "loss": 0.0505, + "step": 12748 + }, + { + "epoch": 3.87, + "learning_rate": 3.6854700373098794e-05, + "loss": 0.0637, + "step": 12749 + }, + { + "epoch": 3.87, + "learning_rate": 3.683572202409472e-05, + "loss": 0.011, + "step": 12750 + }, + { + "epoch": 3.87, + "learning_rate": 3.6816747878811677e-05, + "loss": 0.0211, + "step": 12751 + }, + { + "epoch": 3.87, + "learning_rate": 3.6797777937954594e-05, + "loss": 0.0204, + "step": 12752 + }, + { + "epoch": 3.87, + "learning_rate": 3.677881220222804e-05, + "loss": 0.0604, + "step": 12753 + }, + { + "epoch": 3.87, + "learning_rate": 3.6759850672336674e-05, + "loss": 0.0143, + "step": 12754 + }, + { + "epoch": 3.87, + "learning_rate": 3.6740893348984717e-05, + "loss": 0.0463, + "step": 12755 + }, + { + "epoch": 3.87, + "learning_rate": 3.672194023287646e-05, + "loss": 0.0571, + "step": 12756 + }, + { + "epoch": 3.87, + "learning_rate": 3.6702991324715914e-05, + "loss": 0.0537, + "step": 12757 + }, + { + "epoch": 3.87, + "learning_rate": 3.668404662520704e-05, + "loss": 0.0247, + "step": 12758 + }, + { + "epoch": 3.87, + "learning_rate": 3.6665106135053534e-05, + "loss": 0.0217, + "step": 12759 + }, + { + "epoch": 3.87, + "learning_rate": 3.664616985495894e-05, + "loss": 0.0222, + "step": 12760 + }, + { + "epoch": 3.87, + "learning_rate": 3.662723778562678e-05, + "loss": 0.0589, + "step": 12761 + }, + { + "epoch": 3.87, + "learning_rate": 3.660830992776022e-05, + "loss": 0.0187, + "step": 12762 + }, + { + "epoch": 3.88, + "learning_rate": 3.658938628206248e-05, + "loss": 0.0337, + "step": 12763 + }, + { + "epoch": 3.88, + "learning_rate": 3.657046684923647e-05, + "loss": 0.0489, + "step": 12764 + }, + { + "epoch": 3.88, + "learning_rate": 3.655155162998493e-05, + "loss": 0.071, + "step": 12765 + }, + { + "epoch": 3.88, + "learning_rate": 3.653264062501061e-05, + "loss": 0.0742, + "step": 12766 + }, + { + "epoch": 3.88, + "learning_rate": 3.6513733835015914e-05, + "loss": 0.0412, + "step": 12767 + }, + { + "epoch": 3.88, + "learning_rate": 3.6494831260703236e-05, + "loss": 0.0247, + "step": 12768 + }, + { + "epoch": 3.88, + "learning_rate": 3.647593290277474e-05, + "loss": 0.0767, + "step": 12769 + }, + { + "epoch": 3.88, + "learning_rate": 3.6457038761932425e-05, + "loss": 0.0615, + "step": 12770 + }, + { + "epoch": 3.88, + "learning_rate": 3.6438148838878125e-05, + "loss": 0.0749, + "step": 12771 + }, + { + "epoch": 3.88, + "learning_rate": 3.641926313431361e-05, + "loss": 0.0537, + "step": 12772 + }, + { + "epoch": 3.88, + "learning_rate": 3.640038164894035e-05, + "loss": 0.0451, + "step": 12773 + }, + { + "epoch": 3.88, + "learning_rate": 3.638150438345984e-05, + "loss": 0.0137, + "step": 12774 + }, + { + "epoch": 3.88, + "learning_rate": 3.636263133857325e-05, + "loss": 0.018, + "step": 12775 + }, + { + "epoch": 3.88, + "learning_rate": 3.634376251498163e-05, + "loss": 0.0517, + "step": 12776 + }, + { + "epoch": 3.88, + "learning_rate": 3.632489791338598e-05, + "loss": 0.0181, + "step": 12777 + }, + { + "epoch": 3.88, + "learning_rate": 3.630603753448697e-05, + "loss": 0.0438, + "step": 12778 + }, + { + "epoch": 3.88, + "learning_rate": 3.628718137898536e-05, + "loss": 0.0629, + "step": 12779 + }, + { + "epoch": 3.88, + "learning_rate": 3.6268329447581404e-05, + "loss": 0.0607, + "step": 12780 + }, + { + "epoch": 3.88, + "learning_rate": 3.624948174097554e-05, + "loss": 0.0361, + "step": 12781 + }, + { + "epoch": 3.88, + "learning_rate": 3.623063825986783e-05, + "loss": 0.026, + "step": 12782 + }, + { + "epoch": 3.88, + "learning_rate": 3.621179900495831e-05, + "loss": 0.0444, + "step": 12783 + }, + { + "epoch": 3.88, + "learning_rate": 3.619296397694677e-05, + "loss": 0.0244, + "step": 12784 + }, + { + "epoch": 3.88, + "learning_rate": 3.6174133176532854e-05, + "loss": 0.0375, + "step": 12785 + }, + { + "epoch": 3.88, + "learning_rate": 3.6155306604416134e-05, + "loss": 0.0178, + "step": 12786 + }, + { + "epoch": 3.88, + "learning_rate": 3.61364842612959e-05, + "loss": 0.0335, + "step": 12787 + }, + { + "epoch": 3.88, + "learning_rate": 3.6117666147871395e-05, + "loss": 0.0558, + "step": 12788 + }, + { + "epoch": 3.88, + "learning_rate": 3.609885226484164e-05, + "loss": 0.0524, + "step": 12789 + }, + { + "epoch": 3.88, + "learning_rate": 3.608004261290547e-05, + "loss": 0.0514, + "step": 12790 + }, + { + "epoch": 3.88, + "learning_rate": 3.6061237192761694e-05, + "loss": 0.0466, + "step": 12791 + }, + { + "epoch": 3.88, + "learning_rate": 3.6042436005108806e-05, + "loss": 0.0286, + "step": 12792 + }, + { + "epoch": 3.88, + "learning_rate": 3.6023639050645264e-05, + "loss": 0.0597, + "step": 12793 + }, + { + "epoch": 3.88, + "learning_rate": 3.6004846330069295e-05, + "loss": 0.0186, + "step": 12794 + }, + { + "epoch": 3.88, + "learning_rate": 3.598605784407901e-05, + "loss": 0.0481, + "step": 12795 + }, + { + "epoch": 3.89, + "learning_rate": 3.5967273593372285e-05, + "loss": 0.0604, + "step": 12796 + }, + { + "epoch": 3.89, + "learning_rate": 3.594849357864699e-05, + "loss": 0.0257, + "step": 12797 + }, + { + "epoch": 3.89, + "learning_rate": 3.592971780060067e-05, + "loss": 0.011, + "step": 12798 + }, + { + "epoch": 3.89, + "learning_rate": 3.5910946259930865e-05, + "loss": 0.0285, + "step": 12799 + }, + { + "epoch": 3.89, + "learning_rate": 3.589217895733484e-05, + "loss": 0.0234, + "step": 12800 + }, + { + "epoch": 3.89, + "learning_rate": 3.587341589350971e-05, + "loss": 0.0097, + "step": 12801 + }, + { + "epoch": 3.89, + "learning_rate": 3.5854657069152546e-05, + "loss": 0.054, + "step": 12802 + }, + { + "epoch": 3.89, + "learning_rate": 3.5835902484960094e-05, + "loss": 0.022, + "step": 12803 + }, + { + "epoch": 3.89, + "learning_rate": 3.581715214162914e-05, + "loss": 0.0207, + "step": 12804 + }, + { + "epoch": 3.89, + "learning_rate": 3.579840603985613e-05, + "loss": 0.0182, + "step": 12805 + }, + { + "epoch": 3.89, + "learning_rate": 3.5779664180337455e-05, + "loss": 0.0468, + "step": 12806 + }, + { + "epoch": 3.89, + "learning_rate": 3.576092656376925e-05, + "loss": 0.0405, + "step": 12807 + }, + { + "epoch": 3.89, + "learning_rate": 3.574219319084762e-05, + "loss": 0.0324, + "step": 12808 + }, + { + "epoch": 3.89, + "learning_rate": 3.572346406226851e-05, + "loss": 0.0405, + "step": 12809 + }, + { + "epoch": 3.89, + "learning_rate": 3.57047391787276e-05, + "loss": 0.0727, + "step": 12810 + }, + { + "epoch": 3.89, + "learning_rate": 3.568601854092045e-05, + "loss": 0.0341, + "step": 12811 + }, + { + "epoch": 3.89, + "learning_rate": 3.566730214954245e-05, + "loss": 0.0362, + "step": 12812 + }, + { + "epoch": 3.89, + "learning_rate": 3.5648590005288944e-05, + "loss": 0.0473, + "step": 12813 + }, + { + "epoch": 3.89, + "learning_rate": 3.562988210885494e-05, + "loss": 0.0455, + "step": 12814 + }, + { + "epoch": 3.89, + "learning_rate": 3.561117846093546e-05, + "loss": 0.0819, + "step": 12815 + }, + { + "epoch": 3.89, + "learning_rate": 3.5592479062225274e-05, + "loss": 0.036, + "step": 12816 + }, + { + "epoch": 3.89, + "learning_rate": 3.557378391341894e-05, + "loss": 0.0293, + "step": 12817 + }, + { + "epoch": 3.89, + "learning_rate": 3.5555093015211026e-05, + "loss": 0.0502, + "step": 12818 + }, + { + "epoch": 3.89, + "learning_rate": 3.5536406368295764e-05, + "loss": 0.0121, + "step": 12819 + }, + { + "epoch": 3.89, + "learning_rate": 3.551772397336742e-05, + "loss": 0.0502, + "step": 12820 + }, + { + "epoch": 3.89, + "learning_rate": 3.5499045831119824e-05, + "loss": 0.0285, + "step": 12821 + }, + { + "epoch": 3.89, + "learning_rate": 3.548037194224697e-05, + "loss": 0.0491, + "step": 12822 + }, + { + "epoch": 3.89, + "learning_rate": 3.546170230744241e-05, + "loss": 0.0442, + "step": 12823 + }, + { + "epoch": 3.89, + "learning_rate": 3.5443036927399783e-05, + "loss": 0.036, + "step": 12824 + }, + { + "epoch": 3.89, + "learning_rate": 3.542437580281239e-05, + "loss": 0.0472, + "step": 12825 + }, + { + "epoch": 3.89, + "learning_rate": 3.5405718934373396e-05, + "loss": 0.0502, + "step": 12826 + }, + { + "epoch": 3.89, + "learning_rate": 3.5387066322775964e-05, + "loss": 0.0563, + "step": 12827 + }, + { + "epoch": 3.89, + "learning_rate": 3.536841796871286e-05, + "loss": 0.0891, + "step": 12828 + }, + { + "epoch": 3.9, + "learning_rate": 3.534977387287691e-05, + "loss": 0.0287, + "step": 12829 + }, + { + "epoch": 3.9, + "learning_rate": 3.533113403596066e-05, + "loss": 0.0406, + "step": 12830 + }, + { + "epoch": 3.9, + "learning_rate": 3.531249845865649e-05, + "loss": 0.0515, + "step": 12831 + }, + { + "epoch": 3.9, + "learning_rate": 3.529386714165671e-05, + "loss": 0.0385, + "step": 12832 + }, + { + "epoch": 3.9, + "learning_rate": 3.5275240085653334e-05, + "loss": 0.0001, + "step": 12833 + }, + { + "epoch": 3.9, + "learning_rate": 3.525661729133843e-05, + "loss": 0.0425, + "step": 12834 + }, + { + "epoch": 3.9, + "learning_rate": 3.523799875940371e-05, + "loss": 0.0298, + "step": 12835 + }, + { + "epoch": 3.9, + "learning_rate": 3.521938449054078e-05, + "loss": 0.0438, + "step": 12836 + }, + { + "epoch": 3.9, + "learning_rate": 3.52007744854411e-05, + "loss": 0.0306, + "step": 12837 + }, + { + "epoch": 3.9, + "learning_rate": 3.518216874479603e-05, + "loss": 0.0061, + "step": 12838 + }, + { + "epoch": 3.9, + "learning_rate": 3.516356726929666e-05, + "loss": 0.024, + "step": 12839 + }, + { + "epoch": 3.9, + "learning_rate": 3.5144970059634055e-05, + "loss": 0.0506, + "step": 12840 + }, + { + "epoch": 3.9, + "learning_rate": 3.5126377116498994e-05, + "loss": 0.0295, + "step": 12841 + }, + { + "epoch": 3.9, + "learning_rate": 3.510778844058213e-05, + "loss": 0.0589, + "step": 12842 + }, + { + "epoch": 3.9, + "learning_rate": 3.508920403257404e-05, + "loss": 0.08, + "step": 12843 + }, + { + "epoch": 3.9, + "learning_rate": 3.5070623893164996e-05, + "loss": 0.04, + "step": 12844 + }, + { + "epoch": 3.9, + "learning_rate": 3.5052048023045334e-05, + "loss": 0.0544, + "step": 12845 + }, + { + "epoch": 3.9, + "learning_rate": 3.503347642290493e-05, + "loss": 0.0357, + "step": 12846 + }, + { + "epoch": 3.9, + "learning_rate": 3.501490909343378e-05, + "loss": 0.0569, + "step": 12847 + }, + { + "epoch": 3.9, + "learning_rate": 3.4996346035321526e-05, + "loss": 0.0361, + "step": 12848 + }, + { + "epoch": 3.9, + "learning_rate": 3.4977787249257775e-05, + "loss": 0.0514, + "step": 12849 + }, + { + "epoch": 3.9, + "learning_rate": 3.4959232735932016e-05, + "loss": 0.0347, + "step": 12850 + }, + { + "epoch": 3.9, + "learning_rate": 3.49406824960333e-05, + "loss": 0.0695, + "step": 12851 + }, + { + "epoch": 3.9, + "learning_rate": 3.4922136530250886e-05, + "loss": 0.0188, + "step": 12852 + }, + { + "epoch": 3.9, + "learning_rate": 3.4903594839273576e-05, + "loss": 0.0291, + "step": 12853 + }, + { + "epoch": 3.9, + "learning_rate": 3.488505742379026e-05, + "loss": 0.0958, + "step": 12854 + }, + { + "epoch": 3.9, + "learning_rate": 3.486652428448947e-05, + "loss": 0.0233, + "step": 12855 + }, + { + "epoch": 3.9, + "learning_rate": 3.484799542205965e-05, + "loss": 0.0667, + "step": 12856 + }, + { + "epoch": 3.9, + "learning_rate": 3.482947083718916e-05, + "loss": 0.018, + "step": 12857 + }, + { + "epoch": 3.9, + "learning_rate": 3.4810950530566035e-05, + "loss": 0.0633, + "step": 12858 + }, + { + "epoch": 3.9, + "learning_rate": 3.479243450287837e-05, + "loss": 0.0288, + "step": 12859 + }, + { + "epoch": 3.9, + "learning_rate": 3.477392275481391e-05, + "loss": 0.0372, + "step": 12860 + }, + { + "epoch": 3.9, + "learning_rate": 3.475541528706032e-05, + "loss": 0.0164, + "step": 12861 + }, + { + "epoch": 3.91, + "learning_rate": 3.4736912100305075e-05, + "loss": 0.0539, + "step": 12862 + }, + { + "epoch": 3.91, + "learning_rate": 3.471841319523557e-05, + "loss": 0.0409, + "step": 12863 + }, + { + "epoch": 3.91, + "learning_rate": 3.4699918572538896e-05, + "loss": 0.039, + "step": 12864 + }, + { + "epoch": 3.91, + "learning_rate": 3.4681428232902205e-05, + "loss": 0.0321, + "step": 12865 + }, + { + "epoch": 3.91, + "learning_rate": 3.4662942177012266e-05, + "loss": 0.0048, + "step": 12866 + }, + { + "epoch": 3.91, + "learning_rate": 3.464446040555577e-05, + "loss": 0.0352, + "step": 12867 + }, + { + "epoch": 3.91, + "learning_rate": 3.462598291921933e-05, + "loss": 0.0237, + "step": 12868 + }, + { + "epoch": 3.91, + "learning_rate": 3.460750971868925e-05, + "loss": 0.032, + "step": 12869 + }, + { + "epoch": 3.91, + "learning_rate": 3.4589040804651836e-05, + "loss": 0.0257, + "step": 12870 + }, + { + "epoch": 3.91, + "learning_rate": 3.457057617779313e-05, + "loss": 0.0105, + "step": 12871 + }, + { + "epoch": 3.91, + "learning_rate": 3.455211583879897e-05, + "loss": 0.0484, + "step": 12872 + }, + { + "epoch": 3.91, + "learning_rate": 3.453365978835521e-05, + "loss": 0.023, + "step": 12873 + }, + { + "epoch": 3.91, + "learning_rate": 3.451520802714733e-05, + "loss": 0.0315, + "step": 12874 + }, + { + "epoch": 3.91, + "learning_rate": 3.4496760555860874e-05, + "loss": 0.0707, + "step": 12875 + }, + { + "epoch": 3.91, + "learning_rate": 3.447831737518104e-05, + "loss": 0.0316, + "step": 12876 + }, + { + "epoch": 3.91, + "learning_rate": 3.445987848579295e-05, + "loss": 0.008, + "step": 12877 + }, + { + "epoch": 3.91, + "learning_rate": 3.444144388838153e-05, + "loss": 0.0881, + "step": 12878 + }, + { + "epoch": 3.91, + "learning_rate": 3.442301358363163e-05, + "loss": 0.0053, + "step": 12879 + }, + { + "epoch": 3.91, + "learning_rate": 3.44045875722278e-05, + "loss": 0.0359, + "step": 12880 + }, + { + "epoch": 3.91, + "learning_rate": 3.4386165854854616e-05, + "loss": 0.0407, + "step": 12881 + }, + { + "epoch": 3.91, + "learning_rate": 3.436774843219632e-05, + "loss": 0.0326, + "step": 12882 + }, + { + "epoch": 3.91, + "learning_rate": 3.4349335304937054e-05, + "loss": 0.0598, + "step": 12883 + }, + { + "epoch": 3.91, + "learning_rate": 3.4330926473760874e-05, + "loss": 0.025, + "step": 12884 + }, + { + "epoch": 3.91, + "learning_rate": 3.4312521939351534e-05, + "loss": 0.0388, + "step": 12885 + }, + { + "epoch": 3.91, + "learning_rate": 3.429412170239284e-05, + "loss": 0.0092, + "step": 12886 + }, + { + "epoch": 3.91, + "learning_rate": 3.4275725763568146e-05, + "loss": 0.035, + "step": 12887 + }, + { + "epoch": 3.91, + "learning_rate": 3.425733412356092e-05, + "loss": 0.036, + "step": 12888 + }, + { + "epoch": 3.91, + "learning_rate": 3.4238946783054266e-05, + "loss": 0.0131, + "step": 12889 + }, + { + "epoch": 3.91, + "learning_rate": 3.422056374273127e-05, + "loss": 0.0274, + "step": 12890 + }, + { + "epoch": 3.91, + "learning_rate": 3.420218500327491e-05, + "loss": 0.056, + "step": 12891 + }, + { + "epoch": 3.91, + "learning_rate": 3.418381056536771e-05, + "loss": 0.008, + "step": 12892 + }, + { + "epoch": 3.91, + "learning_rate": 3.4165440429692366e-05, + "loss": 0.0493, + "step": 12893 + }, + { + "epoch": 3.91, + "learning_rate": 3.414707459693118e-05, + "loss": 0.0685, + "step": 12894 + }, + { + "epoch": 3.92, + "learning_rate": 3.412871306776647e-05, + "loss": 0.0174, + "step": 12895 + }, + { + "epoch": 3.92, + "learning_rate": 3.41103558428803e-05, + "loss": 0.0532, + "step": 12896 + }, + { + "epoch": 3.92, + "learning_rate": 3.409200292295451e-05, + "loss": 0.0195, + "step": 12897 + }, + { + "epoch": 3.92, + "learning_rate": 3.4073654308670976e-05, + "loss": 0.0392, + "step": 12898 + }, + { + "epoch": 3.92, + "learning_rate": 3.405531000071116e-05, + "loss": 0.0604, + "step": 12899 + }, + { + "epoch": 3.92, + "learning_rate": 3.4036969999756635e-05, + "loss": 0.0484, + "step": 12900 + }, + { + "epoch": 3.92, + "learning_rate": 3.401863430648862e-05, + "loss": 0.0335, + "step": 12901 + }, + { + "epoch": 3.92, + "learning_rate": 3.400030292158821e-05, + "loss": 0.052, + "step": 12902 + }, + { + "epoch": 3.92, + "learning_rate": 3.3981975845736356e-05, + "loss": 0.0438, + "step": 12903 + }, + { + "epoch": 3.92, + "learning_rate": 3.3963653079613914e-05, + "loss": 0.038, + "step": 12904 + }, + { + "epoch": 3.92, + "learning_rate": 3.3945334623901445e-05, + "loss": 0.0519, + "step": 12905 + }, + { + "epoch": 3.92, + "learning_rate": 3.392702047927951e-05, + "loss": 0.0516, + "step": 12906 + }, + { + "epoch": 3.92, + "learning_rate": 3.390871064642837e-05, + "loss": 0.0152, + "step": 12907 + }, + { + "epoch": 3.92, + "learning_rate": 3.389040512602817e-05, + "loss": 0.0526, + "step": 12908 + }, + { + "epoch": 3.92, + "learning_rate": 3.387210391875896e-05, + "loss": 0.0496, + "step": 12909 + }, + { + "epoch": 3.92, + "learning_rate": 3.3853807025300525e-05, + "loss": 0.0462, + "step": 12910 + }, + { + "epoch": 3.92, + "learning_rate": 3.383551444633262e-05, + "loss": 0.0645, + "step": 12911 + }, + { + "epoch": 3.92, + "learning_rate": 3.3817226182534634e-05, + "loss": 0.0269, + "step": 12912 + }, + { + "epoch": 3.92, + "learning_rate": 3.3798942234586e-05, + "loss": 0.0536, + "step": 12913 + }, + { + "epoch": 3.92, + "learning_rate": 3.3780662603165944e-05, + "loss": 0.048, + "step": 12914 + }, + { + "epoch": 3.92, + "learning_rate": 3.3762387288953416e-05, + "loss": 0.0525, + "step": 12915 + }, + { + "epoch": 3.92, + "learning_rate": 3.374411629262742e-05, + "loss": 0.0382, + "step": 12916 + }, + { + "epoch": 3.92, + "learning_rate": 3.372584961486649e-05, + "loss": 0.056, + "step": 12917 + }, + { + "epoch": 3.92, + "learning_rate": 3.370758725634933e-05, + "loss": 0.0355, + "step": 12918 + }, + { + "epoch": 3.92, + "learning_rate": 3.3689329217754236e-05, + "loss": 0.0771, + "step": 12919 + }, + { + "epoch": 3.92, + "learning_rate": 3.367107549975952e-05, + "loss": 0.0359, + "step": 12920 + }, + { + "epoch": 3.92, + "learning_rate": 3.365282610304322e-05, + "loss": 0.0267, + "step": 12921 + }, + { + "epoch": 3.92, + "learning_rate": 3.363458102828322e-05, + "loss": 0.0193, + "step": 12922 + }, + { + "epoch": 3.92, + "learning_rate": 3.361634027615732e-05, + "loss": 0.0296, + "step": 12923 + }, + { + "epoch": 3.92, + "learning_rate": 3.359810384734306e-05, + "loss": 0.0326, + "step": 12924 + }, + { + "epoch": 3.92, + "learning_rate": 3.357987174251791e-05, + "loss": 0.0458, + "step": 12925 + }, + { + "epoch": 3.92, + "learning_rate": 3.356164396235911e-05, + "loss": 0.0462, + "step": 12926 + }, + { + "epoch": 3.93, + "learning_rate": 3.354342050754385e-05, + "loss": 0.0525, + "step": 12927 + }, + { + "epoch": 3.93, + "learning_rate": 3.3525201378748944e-05, + "loss": 0.0231, + "step": 12928 + }, + { + "epoch": 3.93, + "learning_rate": 3.350698657665129e-05, + "loss": 0.053, + "step": 12929 + }, + { + "epoch": 3.93, + "learning_rate": 3.3488776101927416e-05, + "loss": 0.0539, + "step": 12930 + }, + { + "epoch": 3.93, + "learning_rate": 3.3470569955253833e-05, + "loss": 0.0483, + "step": 12931 + }, + { + "epoch": 3.93, + "learning_rate": 3.345236813730696e-05, + "loss": 0.0171, + "step": 12932 + }, + { + "epoch": 3.93, + "learning_rate": 3.343417064876276e-05, + "loss": 0.0456, + "step": 12933 + }, + { + "epoch": 3.93, + "learning_rate": 3.341597749029732e-05, + "loss": 0.0439, + "step": 12934 + }, + { + "epoch": 3.93, + "learning_rate": 3.33977886625864e-05, + "loss": 0.0586, + "step": 12935 + }, + { + "epoch": 3.93, + "learning_rate": 3.337960416630574e-05, + "loss": 0.0224, + "step": 12936 + }, + { + "epoch": 3.93, + "learning_rate": 3.33614240021308e-05, + "loss": 0.0202, + "step": 12937 + }, + { + "epoch": 3.93, + "learning_rate": 3.334324817073687e-05, + "loss": 0.0507, + "step": 12938 + }, + { + "epoch": 3.93, + "learning_rate": 3.332507667279923e-05, + "loss": 0.0482, + "step": 12939 + }, + { + "epoch": 3.93, + "learning_rate": 3.33069095089928e-05, + "loss": 0.0286, + "step": 12940 + }, + { + "epoch": 3.93, + "learning_rate": 3.328874667999252e-05, + "loss": 0.0306, + "step": 12941 + }, + { + "epoch": 3.93, + "learning_rate": 3.3270588186473057e-05, + "loss": 0.0218, + "step": 12942 + }, + { + "epoch": 3.93, + "learning_rate": 3.325243402910893e-05, + "loss": 0.0428, + "step": 12943 + }, + { + "epoch": 3.93, + "learning_rate": 3.323428420857447e-05, + "loss": 0.0468, + "step": 12944 + }, + { + "epoch": 3.93, + "learning_rate": 3.3216138725543993e-05, + "loss": 0.0441, + "step": 12945 + }, + { + "epoch": 3.93, + "learning_rate": 3.3197997580691455e-05, + "loss": 0.0592, + "step": 12946 + }, + { + "epoch": 3.93, + "learning_rate": 3.3179860774690826e-05, + "loss": 0.0536, + "step": 12947 + }, + { + "epoch": 3.93, + "learning_rate": 3.316172830821578e-05, + "loss": 0.0335, + "step": 12948 + }, + { + "epoch": 3.93, + "learning_rate": 3.314360018193988e-05, + "loss": 0.0289, + "step": 12949 + }, + { + "epoch": 3.93, + "learning_rate": 3.31254763965366e-05, + "loss": 0.0279, + "step": 12950 + }, + { + "epoch": 3.93, + "learning_rate": 3.310735695267909e-05, + "loss": 0.0454, + "step": 12951 + }, + { + "epoch": 3.93, + "learning_rate": 3.308924185104057e-05, + "loss": 0.0622, + "step": 12952 + }, + { + "epoch": 3.93, + "learning_rate": 3.3071131092293795e-05, + "loss": 0.0545, + "step": 12953 + }, + { + "epoch": 3.93, + "learning_rate": 3.3053024677111595e-05, + "loss": 0.0286, + "step": 12954 + }, + { + "epoch": 3.93, + "learning_rate": 3.3034922606166645e-05, + "loss": 0.0285, + "step": 12955 + }, + { + "epoch": 3.93, + "learning_rate": 3.3016824880131266e-05, + "loss": 0.0247, + "step": 12956 + }, + { + "epoch": 3.93, + "learning_rate": 3.299873149967789e-05, + "loss": 0.0384, + "step": 12957 + }, + { + "epoch": 3.93, + "learning_rate": 3.298064246547845e-05, + "loss": 0.0098, + "step": 12958 + }, + { + "epoch": 3.93, + "learning_rate": 3.296255777820502e-05, + "loss": 0.0115, + "step": 12959 + }, + { + "epoch": 3.94, + "learning_rate": 3.294447743852932e-05, + "loss": 0.0331, + "step": 12960 + }, + { + "epoch": 3.94, + "learning_rate": 3.292640144712305e-05, + "loss": 0.0387, + "step": 12961 + }, + { + "epoch": 3.94, + "learning_rate": 3.2908329804657675e-05, + "loss": 0.0392, + "step": 12962 + }, + { + "epoch": 3.94, + "learning_rate": 3.2890262511804414e-05, + "loss": 0.0196, + "step": 12963 + }, + { + "epoch": 3.94, + "learning_rate": 3.2872199569234544e-05, + "loss": 0.0437, + "step": 12964 + }, + { + "epoch": 3.94, + "learning_rate": 3.285414097761893e-05, + "loss": 0.0447, + "step": 12965 + }, + { + "epoch": 3.94, + "learning_rate": 3.2836086737628495e-05, + "loss": 0.0518, + "step": 12966 + }, + { + "epoch": 3.94, + "learning_rate": 3.2818036849933854e-05, + "loss": 0.038, + "step": 12967 + }, + { + "epoch": 3.94, + "learning_rate": 3.2799991315205517e-05, + "loss": 0.0441, + "step": 12968 + }, + { + "epoch": 3.94, + "learning_rate": 3.278195013411378e-05, + "loss": 0.0302, + "step": 12969 + }, + { + "epoch": 3.94, + "learning_rate": 3.2763913307328885e-05, + "loss": 0.0103, + "step": 12970 + }, + { + "epoch": 3.94, + "learning_rate": 3.274588083552078e-05, + "loss": 0.0106, + "step": 12971 + }, + { + "epoch": 3.94, + "learning_rate": 3.272785271935941e-05, + "loss": 0.091, + "step": 12972 + }, + { + "epoch": 3.94, + "learning_rate": 3.27098289595144e-05, + "loss": 0.0671, + "step": 12973 + }, + { + "epoch": 3.94, + "learning_rate": 3.269180955665526e-05, + "loss": 0.053, + "step": 12974 + }, + { + "epoch": 3.94, + "learning_rate": 3.267379451145143e-05, + "loss": 0.0155, + "step": 12975 + }, + { + "epoch": 3.94, + "learning_rate": 3.265578382457203e-05, + "loss": 0.0188, + "step": 12976 + }, + { + "epoch": 3.94, + "learning_rate": 3.2637777496686214e-05, + "loss": 0.0492, + "step": 12977 + }, + { + "epoch": 3.94, + "learning_rate": 3.261977552846278e-05, + "loss": 0.0498, + "step": 12978 + }, + { + "epoch": 3.94, + "learning_rate": 3.260177792057044e-05, + "loss": 0.0428, + "step": 12979 + }, + { + "epoch": 3.94, + "learning_rate": 3.258378467367782e-05, + "loss": 0.0502, + "step": 12980 + }, + { + "epoch": 3.94, + "learning_rate": 3.256579578845324e-05, + "loss": 0.0406, + "step": 12981 + }, + { + "epoch": 3.94, + "learning_rate": 3.2547811265565046e-05, + "loss": 0.079, + "step": 12982 + }, + { + "epoch": 3.94, + "learning_rate": 3.252983110568116e-05, + "loss": 0.0773, + "step": 12983 + }, + { + "epoch": 3.94, + "learning_rate": 3.251185530946962e-05, + "loss": 0.0096, + "step": 12984 + }, + { + "epoch": 3.94, + "learning_rate": 3.249388387759808e-05, + "loss": 0.0328, + "step": 12985 + }, + { + "epoch": 3.94, + "learning_rate": 3.247591681073421e-05, + "loss": 0.0236, + "step": 12986 + }, + { + "epoch": 3.94, + "learning_rate": 3.2457954109545356e-05, + "loss": 0.0462, + "step": 12987 + }, + { + "epoch": 3.94, + "learning_rate": 3.243999577469886e-05, + "loss": 0.0293, + "step": 12988 + }, + { + "epoch": 3.94, + "learning_rate": 3.2422041806861774e-05, + "loss": 0.0444, + "step": 12989 + }, + { + "epoch": 3.94, + "learning_rate": 3.240409220670099e-05, + "loss": 0.0375, + "step": 12990 + }, + { + "epoch": 3.94, + "learning_rate": 3.23861469748834e-05, + "loss": 0.0424, + "step": 12991 + }, + { + "epoch": 3.94, + "learning_rate": 3.236820611207549e-05, + "loss": 0.03, + "step": 12992 + }, + { + "epoch": 3.95, + "learning_rate": 3.235026961894385e-05, + "loss": 0.0462, + "step": 12993 + }, + { + "epoch": 3.95, + "learning_rate": 3.233233749615462e-05, + "loss": 0.0449, + "step": 12994 + }, + { + "epoch": 3.95, + "learning_rate": 3.231440974437398e-05, + "loss": 0.0315, + "step": 12995 + }, + { + "epoch": 3.95, + "learning_rate": 3.2296486364267955e-05, + "loss": 0.089, + "step": 12996 + }, + { + "epoch": 3.95, + "learning_rate": 3.227856735650225e-05, + "loss": 0.0303, + "step": 12997 + }, + { + "epoch": 3.95, + "learning_rate": 3.226065272174266e-05, + "loss": 0.0681, + "step": 12998 + }, + { + "epoch": 3.95, + "learning_rate": 3.224274246065445e-05, + "loss": 0.0323, + "step": 12999 + }, + { + "epoch": 3.95, + "learning_rate": 3.222483657390309e-05, + "loss": 0.091, + "step": 13000 + }, + { + "epoch": 3.95, + "learning_rate": 3.2206935062153625e-05, + "loss": 0.0423, + "step": 13001 + }, + { + "epoch": 3.95, + "learning_rate": 3.2189037926071144e-05, + "loss": 0.0477, + "step": 13002 + }, + { + "epoch": 3.95, + "learning_rate": 3.217114516632042e-05, + "loss": 0.0227, + "step": 13003 + }, + { + "epoch": 3.95, + "learning_rate": 3.21532567835661e-05, + "loss": 0.0512, + "step": 13004 + }, + { + "epoch": 3.95, + "learning_rate": 3.2135372778472726e-05, + "loss": 0.0135, + "step": 13005 + }, + { + "epoch": 3.95, + "learning_rate": 3.211749315170459e-05, + "loss": 0.0154, + "step": 13006 + }, + { + "epoch": 3.95, + "learning_rate": 3.209961790392595e-05, + "loss": 0.057, + "step": 13007 + }, + { + "epoch": 3.95, + "learning_rate": 3.208174703580074e-05, + "loss": 0.0131, + "step": 13008 + }, + { + "epoch": 3.95, + "learning_rate": 3.206388054799284e-05, + "loss": 0.0534, + "step": 13009 + }, + { + "epoch": 3.95, + "learning_rate": 3.2046018441165896e-05, + "loss": 0.0302, + "step": 13010 + }, + { + "epoch": 3.95, + "learning_rate": 3.202816071598351e-05, + "loss": 0.0881, + "step": 13011 + }, + { + "epoch": 3.95, + "learning_rate": 3.201030737310897e-05, + "loss": 0.0541, + "step": 13012 + }, + { + "epoch": 3.95, + "learning_rate": 3.199245841320555e-05, + "loss": 0.0217, + "step": 13013 + }, + { + "epoch": 3.95, + "learning_rate": 3.197461383693623e-05, + "loss": 0.0603, + "step": 13014 + }, + { + "epoch": 3.95, + "learning_rate": 3.195677364496387e-05, + "loss": 0.0292, + "step": 13015 + }, + { + "epoch": 3.95, + "learning_rate": 3.1938937837951244e-05, + "loss": 0.0259, + "step": 13016 + }, + { + "epoch": 3.95, + "learning_rate": 3.1921106416560834e-05, + "loss": 0.0481, + "step": 13017 + }, + { + "epoch": 3.95, + "learning_rate": 3.190327938145511e-05, + "loss": 0.0535, + "step": 13018 + }, + { + "epoch": 3.95, + "learning_rate": 3.1885456733296214e-05, + "loss": 0.0471, + "step": 13019 + }, + { + "epoch": 3.95, + "learning_rate": 3.18676384727462e-05, + "loss": 0.0249, + "step": 13020 + }, + { + "epoch": 3.95, + "learning_rate": 3.184982460046704e-05, + "loss": 0.0246, + "step": 13021 + }, + { + "epoch": 3.95, + "learning_rate": 3.183201511712039e-05, + "loss": 0.0403, + "step": 13022 + }, + { + "epoch": 3.95, + "learning_rate": 3.181421002336793e-05, + "loss": 0.0526, + "step": 13023 + }, + { + "epoch": 3.95, + "learning_rate": 3.179640931987091e-05, + "loss": 0.0324, + "step": 13024 + }, + { + "epoch": 3.95, + "learning_rate": 3.1778613007290694e-05, + "loss": 0.0477, + "step": 13025 + }, + { + "epoch": 3.96, + "learning_rate": 3.1760821086288276e-05, + "loss": 0.0425, + "step": 13026 + }, + { + "epoch": 3.96, + "learning_rate": 3.1743033557524676e-05, + "loss": 0.0835, + "step": 13027 + }, + { + "epoch": 3.96, + "learning_rate": 3.172525042166057e-05, + "loss": 0.0763, + "step": 13028 + }, + { + "epoch": 3.96, + "learning_rate": 3.1707471679356556e-05, + "loss": 0.0182, + "step": 13029 + }, + { + "epoch": 3.96, + "learning_rate": 3.168969733127311e-05, + "loss": 0.0233, + "step": 13030 + }, + { + "epoch": 3.96, + "learning_rate": 3.167192737807043e-05, + "loss": 0.0599, + "step": 13031 + }, + { + "epoch": 3.96, + "learning_rate": 3.165416182040869e-05, + "loss": 0.0318, + "step": 13032 + }, + { + "epoch": 3.96, + "learning_rate": 3.1636400658947805e-05, + "loss": 0.0345, + "step": 13033 + }, + { + "epoch": 3.96, + "learning_rate": 3.161864389434752e-05, + "loss": 0.0433, + "step": 13034 + }, + { + "epoch": 3.96, + "learning_rate": 3.1600891527267425e-05, + "loss": 0.0551, + "step": 13035 + }, + { + "epoch": 3.96, + "learning_rate": 3.1583143558367016e-05, + "loss": 0.0297, + "step": 13036 + }, + { + "epoch": 3.96, + "learning_rate": 3.156539998830559e-05, + "loss": 0.0739, + "step": 13037 + }, + { + "epoch": 3.96, + "learning_rate": 3.154766081774228e-05, + "loss": 0.0436, + "step": 13038 + }, + { + "epoch": 3.96, + "learning_rate": 3.1529926047336e-05, + "loss": 0.0513, + "step": 13039 + }, + { + "epoch": 3.96, + "learning_rate": 3.15121956777455e-05, + "loss": 0.0378, + "step": 13040 + }, + { + "epoch": 3.96, + "learning_rate": 3.149446970962953e-05, + "loss": 0.0325, + "step": 13041 + }, + { + "epoch": 3.96, + "learning_rate": 3.1476748143646435e-05, + "loss": 0.0233, + "step": 13042 + }, + { + "epoch": 3.96, + "learning_rate": 3.145903098045462e-05, + "loss": 0.0267, + "step": 13043 + }, + { + "epoch": 3.96, + "learning_rate": 3.1441318220712204e-05, + "loss": 0.0717, + "step": 13044 + }, + { + "epoch": 3.96, + "learning_rate": 3.142360986507709e-05, + "loss": 0.0505, + "step": 13045 + }, + { + "epoch": 3.96, + "learning_rate": 3.1405905914207204e-05, + "loss": 0.0473, + "step": 13046 + }, + { + "epoch": 3.96, + "learning_rate": 3.1388206368760085e-05, + "loss": 0.0223, + "step": 13047 + }, + { + "epoch": 3.96, + "learning_rate": 3.137051122939336e-05, + "loss": 0.0436, + "step": 13048 + }, + { + "epoch": 3.96, + "learning_rate": 3.135282049676419e-05, + "loss": 0.0693, + "step": 13049 + }, + { + "epoch": 3.96, + "learning_rate": 3.133513417152983e-05, + "loss": 0.0737, + "step": 13050 + }, + { + "epoch": 3.96, + "learning_rate": 3.131745225434723e-05, + "loss": 0.0274, + "step": 13051 + }, + { + "epoch": 3.96, + "learning_rate": 3.1299774745873276e-05, + "loss": 0.0602, + "step": 13052 + }, + { + "epoch": 3.96, + "learning_rate": 3.128210164676457e-05, + "loss": 0.0316, + "step": 13053 + }, + { + "epoch": 3.96, + "learning_rate": 3.126443295767769e-05, + "loss": 0.017, + "step": 13054 + }, + { + "epoch": 3.96, + "learning_rate": 3.124676867926893e-05, + "loss": 0.0004, + "step": 13055 + }, + { + "epoch": 3.96, + "learning_rate": 3.122910881219444e-05, + "loss": 0.0177, + "step": 13056 + }, + { + "epoch": 3.96, + "learning_rate": 3.12114533571103e-05, + "loss": 0.0335, + "step": 13057 + }, + { + "epoch": 3.96, + "learning_rate": 3.11938023146723e-05, + "loss": 0.0239, + "step": 13058 + }, + { + "epoch": 3.97, + "learning_rate": 3.117615568553617e-05, + "loss": 0.0527, + "step": 13059 + }, + { + "epoch": 3.97, + "learning_rate": 3.1158513470357415e-05, + "loss": 0.0102, + "step": 13060 + }, + { + "epoch": 3.97, + "learning_rate": 3.114087566979134e-05, + "loss": 0.0196, + "step": 13061 + }, + { + "epoch": 3.97, + "learning_rate": 3.112324228449322e-05, + "loss": 0.0592, + "step": 13062 + }, + { + "epoch": 3.97, + "learning_rate": 3.110561331511801e-05, + "loss": 0.0243, + "step": 13063 + }, + { + "epoch": 3.97, + "learning_rate": 3.1087988762320686e-05, + "loss": 0.0512, + "step": 13064 + }, + { + "epoch": 3.97, + "learning_rate": 3.107036862675581e-05, + "loss": 0.0267, + "step": 13065 + }, + { + "epoch": 3.97, + "learning_rate": 3.1052752909078006e-05, + "loss": 0.0278, + "step": 13066 + }, + { + "epoch": 3.97, + "learning_rate": 3.1035141609941584e-05, + "loss": 0.0631, + "step": 13067 + }, + { + "epoch": 3.97, + "learning_rate": 3.101753473000082e-05, + "loss": 0.0243, + "step": 13068 + }, + { + "epoch": 3.97, + "learning_rate": 3.099993226990974e-05, + "loss": 0.0402, + "step": 13069 + }, + { + "epoch": 3.97, + "learning_rate": 3.0982334230322166e-05, + "loss": 0.0258, + "step": 13070 + }, + { + "epoch": 3.97, + "learning_rate": 3.0964740611891906e-05, + "loss": 0.041, + "step": 13071 + }, + { + "epoch": 3.97, + "learning_rate": 3.094715141527242e-05, + "loss": 0.0493, + "step": 13072 + }, + { + "epoch": 3.97, + "learning_rate": 3.0929566641117174e-05, + "loss": 0.0803, + "step": 13073 + }, + { + "epoch": 3.97, + "learning_rate": 3.091198629007937e-05, + "loss": 0.056, + "step": 13074 + }, + { + "epoch": 3.97, + "learning_rate": 3.089441036281205e-05, + "loss": 0.0212, + "step": 13075 + }, + { + "epoch": 3.97, + "learning_rate": 3.0876838859968055e-05, + "loss": 0.0287, + "step": 13076 + }, + { + "epoch": 3.97, + "learning_rate": 3.085927178220018e-05, + "loss": 0.0231, + "step": 13077 + }, + { + "epoch": 3.97, + "learning_rate": 3.084170913016103e-05, + "loss": 0.0345, + "step": 13078 + }, + { + "epoch": 3.97, + "learning_rate": 3.082415090450294e-05, + "loss": 0.047, + "step": 13079 + }, + { + "epoch": 3.97, + "learning_rate": 3.0806597105878186e-05, + "loss": 0.0203, + "step": 13080 + }, + { + "epoch": 3.97, + "learning_rate": 3.078904773493875e-05, + "loss": 0.0347, + "step": 13081 + }, + { + "epoch": 3.97, + "learning_rate": 3.077150279233666e-05, + "loss": 0.0707, + "step": 13082 + }, + { + "epoch": 3.97, + "learning_rate": 3.0753962278723564e-05, + "loss": 0.0278, + "step": 13083 + }, + { + "epoch": 3.97, + "learning_rate": 3.073642619475114e-05, + "loss": 0.0525, + "step": 13084 + }, + { + "epoch": 3.97, + "learning_rate": 3.0718894541070734e-05, + "loss": 0.0783, + "step": 13085 + }, + { + "epoch": 3.97, + "learning_rate": 3.0701367318333556e-05, + "loss": 0.0401, + "step": 13086 + }, + { + "epoch": 3.97, + "learning_rate": 3.06838445271908e-05, + "loss": 0.0362, + "step": 13087 + }, + { + "epoch": 3.97, + "learning_rate": 3.066632616829328e-05, + "loss": 0.0525, + "step": 13088 + }, + { + "epoch": 3.97, + "learning_rate": 3.064881224229188e-05, + "loss": 0.064, + "step": 13089 + }, + { + "epoch": 3.97, + "learning_rate": 3.0631302749837026e-05, + "loss": 0.0212, + "step": 13090 + }, + { + "epoch": 3.97, + "learning_rate": 3.061379769157927e-05, + "loss": 0.0321, + "step": 13091 + }, + { + "epoch": 3.98, + "learning_rate": 3.0596297068168794e-05, + "loss": 0.0488, + "step": 13092 + }, + { + "epoch": 3.98, + "learning_rate": 3.0578800880255774e-05, + "loss": 0.0362, + "step": 13093 + }, + { + "epoch": 3.98, + "learning_rate": 3.056130912849011e-05, + "loss": 0.0307, + "step": 13094 + }, + { + "epoch": 3.98, + "learning_rate": 3.05438218135215e-05, + "loss": 0.0423, + "step": 13095 + }, + { + "epoch": 3.98, + "learning_rate": 3.0526338935999674e-05, + "loss": 0.0634, + "step": 13096 + }, + { + "epoch": 3.98, + "learning_rate": 3.050886049657395e-05, + "loss": 0.0372, + "step": 13097 + }, + { + "epoch": 3.98, + "learning_rate": 3.0491386495893683e-05, + "loss": 0.0313, + "step": 13098 + }, + { + "epoch": 3.98, + "learning_rate": 3.047391693460797e-05, + "loss": 0.0441, + "step": 13099 + }, + { + "epoch": 3.98, + "learning_rate": 3.0456451813365683e-05, + "loss": 0.0321, + "step": 13100 + }, + { + "epoch": 3.98, + "learning_rate": 3.0438991132815698e-05, + "loss": 0.0557, + "step": 13101 + }, + { + "epoch": 3.98, + "learning_rate": 3.0421534893606542e-05, + "loss": 0.0436, + "step": 13102 + }, + { + "epoch": 3.98, + "learning_rate": 3.0404083096386745e-05, + "loss": 0.0557, + "step": 13103 + }, + { + "epoch": 3.98, + "learning_rate": 3.0386635741804537e-05, + "loss": 0.0162, + "step": 13104 + }, + { + "epoch": 3.98, + "learning_rate": 3.0369192830508072e-05, + "loss": 0.1158, + "step": 13105 + }, + { + "epoch": 3.98, + "learning_rate": 3.0351754363145224e-05, + "loss": 0.0309, + "step": 13106 + }, + { + "epoch": 3.98, + "learning_rate": 3.0334320340363866e-05, + "loss": 0.05, + "step": 13107 + }, + { + "epoch": 3.98, + "learning_rate": 3.0316890762811574e-05, + "loss": 0.0568, + "step": 13108 + }, + { + "epoch": 3.98, + "learning_rate": 3.029946563113585e-05, + "loss": 0.019, + "step": 13109 + }, + { + "epoch": 3.98, + "learning_rate": 3.028204494598396e-05, + "loss": 0.0334, + "step": 13110 + }, + { + "epoch": 3.98, + "learning_rate": 3.026462870800298e-05, + "loss": 0.055, + "step": 13111 + }, + { + "epoch": 3.98, + "learning_rate": 3.0247216917839966e-05, + "loss": 0.0149, + "step": 13112 + }, + { + "epoch": 3.98, + "learning_rate": 3.022980957614165e-05, + "loss": 0.054, + "step": 13113 + }, + { + "epoch": 3.98, + "learning_rate": 3.0212406683554696e-05, + "loss": 0.0214, + "step": 13114 + }, + { + "epoch": 3.98, + "learning_rate": 3.0195008240725588e-05, + "loss": 0.05, + "step": 13115 + }, + { + "epoch": 3.98, + "learning_rate": 3.0177614248300575e-05, + "loss": 0.072, + "step": 13116 + }, + { + "epoch": 3.98, + "learning_rate": 3.016022470692579e-05, + "loss": 0.0424, + "step": 13117 + }, + { + "epoch": 3.98, + "learning_rate": 3.0142839617247232e-05, + "loss": 0.0526, + "step": 13118 + }, + { + "epoch": 3.98, + "learning_rate": 3.012545897991074e-05, + "loss": 0.0472, + "step": 13119 + }, + { + "epoch": 3.98, + "learning_rate": 3.010808279556192e-05, + "loss": 0.0472, + "step": 13120 + }, + { + "epoch": 3.98, + "learning_rate": 3.0090711064846233e-05, + "loss": 0.0246, + "step": 13121 + }, + { + "epoch": 3.98, + "learning_rate": 3.0073343788408977e-05, + "loss": 0.0473, + "step": 13122 + }, + { + "epoch": 3.98, + "learning_rate": 3.005598096689535e-05, + "loss": 0.0416, + "step": 13123 + }, + { + "epoch": 3.98, + "learning_rate": 3.0038622600950253e-05, + "loss": 0.0251, + "step": 13124 + }, + { + "epoch": 3.99, + "learning_rate": 3.002126869121857e-05, + "loss": 0.0112, + "step": 13125 + }, + { + "epoch": 3.99, + "learning_rate": 3.0003919238344935e-05, + "loss": 0.0322, + "step": 13126 + }, + { + "epoch": 3.99, + "learning_rate": 2.9986574242973787e-05, + "loss": 0.0453, + "step": 13127 + }, + { + "epoch": 3.99, + "learning_rate": 2.996923370574949e-05, + "loss": 0.0146, + "step": 13128 + }, + { + "epoch": 3.99, + "learning_rate": 2.9951897627316147e-05, + "loss": 0.0584, + "step": 13129 + }, + { + "epoch": 3.99, + "learning_rate": 2.993456600831784e-05, + "loss": 0.0121, + "step": 13130 + }, + { + "epoch": 3.99, + "learning_rate": 2.991723884939824e-05, + "loss": 0.032, + "step": 13131 + }, + { + "epoch": 3.99, + "learning_rate": 2.9899916151201116e-05, + "loss": 0.0446, + "step": 13132 + }, + { + "epoch": 3.99, + "learning_rate": 2.9882597914369867e-05, + "loss": 0.026, + "step": 13133 + }, + { + "epoch": 3.99, + "learning_rate": 2.9865284139547912e-05, + "loss": 0.0317, + "step": 13134 + }, + { + "epoch": 3.99, + "learning_rate": 2.9847974827378356e-05, + "loss": 0.0441, + "step": 13135 + }, + { + "epoch": 3.99, + "learning_rate": 2.9830669978504167e-05, + "loss": 0.0345, + "step": 13136 + }, + { + "epoch": 3.99, + "learning_rate": 2.9813369593568214e-05, + "loss": 0.0429, + "step": 13137 + }, + { + "epoch": 3.99, + "learning_rate": 2.9796073673213117e-05, + "loss": 0.0262, + "step": 13138 + }, + { + "epoch": 3.99, + "learning_rate": 2.977878221808141e-05, + "loss": 0.0599, + "step": 13139 + }, + { + "epoch": 3.99, + "learning_rate": 2.9761495228815413e-05, + "loss": 0.0213, + "step": 13140 + }, + { + "epoch": 3.99, + "learning_rate": 2.974421270605723e-05, + "loss": 0.0437, + "step": 13141 + }, + { + "epoch": 3.99, + "learning_rate": 2.972693465044893e-05, + "loss": 0.042, + "step": 13142 + }, + { + "epoch": 3.99, + "learning_rate": 2.9709661062632267e-05, + "loss": 0.0238, + "step": 13143 + }, + { + "epoch": 3.99, + "learning_rate": 2.9692391943248994e-05, + "loss": 0.057, + "step": 13144 + }, + { + "epoch": 3.99, + "learning_rate": 2.9675127292940548e-05, + "loss": 0.0389, + "step": 13145 + }, + { + "epoch": 3.99, + "learning_rate": 2.9657867112348298e-05, + "loss": 0.0765, + "step": 13146 + }, + { + "epoch": 3.99, + "learning_rate": 2.964061140211332e-05, + "loss": 0.0528, + "step": 13147 + }, + { + "epoch": 3.99, + "learning_rate": 2.9623360162876723e-05, + "loss": 0.0664, + "step": 13148 + }, + { + "epoch": 3.99, + "learning_rate": 2.9606113395279275e-05, + "loss": 0.0189, + "step": 13149 + }, + { + "epoch": 3.99, + "learning_rate": 2.9588871099961687e-05, + "loss": 0.035, + "step": 13150 + }, + { + "epoch": 3.99, + "learning_rate": 2.9571633277564433e-05, + "loss": 0.0618, + "step": 13151 + }, + { + "epoch": 3.99, + "learning_rate": 2.95543999287278e-05, + "loss": 0.0439, + "step": 13152 + }, + { + "epoch": 3.99, + "learning_rate": 2.9537171054092062e-05, + "loss": 0.0554, + "step": 13153 + }, + { + "epoch": 3.99, + "learning_rate": 2.9519946654297123e-05, + "loss": 0.0346, + "step": 13154 + }, + { + "epoch": 3.99, + "learning_rate": 2.9502726729982934e-05, + "loss": 0.0471, + "step": 13155 + }, + { + "epoch": 3.99, + "learning_rate": 2.948551128178902e-05, + "loss": 0.0447, + "step": 13156 + }, + { + "epoch": 3.99, + "learning_rate": 2.9468300310354986e-05, + "loss": 0.0283, + "step": 13157 + }, + { + "epoch": 4.0, + "learning_rate": 2.9451093816320103e-05, + "loss": 0.0331, + "step": 13158 + }, + { + "epoch": 4.0, + "learning_rate": 2.9433891800323595e-05, + "loss": 0.0218, + "step": 13159 + }, + { + "epoch": 4.0, + "learning_rate": 2.94166942630045e-05, + "loss": 0.0432, + "step": 13160 + }, + { + "epoch": 4.0, + "learning_rate": 2.939950120500155e-05, + "loss": 0.0435, + "step": 13161 + }, + { + "epoch": 4.0, + "learning_rate": 2.9382312626953497e-05, + "loss": 0.0475, + "step": 13162 + }, + { + "epoch": 4.0, + "learning_rate": 2.936512852949879e-05, + "loss": 0.0643, + "step": 13163 + }, + { + "epoch": 4.0, + "learning_rate": 2.9347948913275832e-05, + "loss": 0.0321, + "step": 13164 + }, + { + "epoch": 4.0, + "learning_rate": 2.9330773778922767e-05, + "loss": 0.0506, + "step": 13165 + }, + { + "epoch": 4.0, + "learning_rate": 2.931360312707755e-05, + "loss": 0.0572, + "step": 13166 + }, + { + "epoch": 4.0, + "learning_rate": 2.9296436958378123e-05, + "loss": 0.0591, + "step": 13167 + }, + { + "epoch": 4.0, + "learning_rate": 2.9279275273462054e-05, + "loss": 0.0473, + "step": 13168 + }, + { + "epoch": 4.0, + "learning_rate": 2.926211807296694e-05, + "loss": 0.0403, + "step": 13169 + }, + { + "epoch": 4.0, + "learning_rate": 2.9244965357530066e-05, + "loss": 0.0219, + "step": 13170 + }, + { + "epoch": 4.0, + "learning_rate": 2.9227817127788623e-05, + "loss": 0.0456, + "step": 13171 + }, + { + "epoch": 4.0, + "learning_rate": 2.9210673384379585e-05, + "loss": 0.0261, + "step": 13172 + }, + { + "epoch": 4.0, + "learning_rate": 2.9193534127939845e-05, + "loss": 0.0479, + "step": 13173 + }, + { + "epoch": 4.0, + "learning_rate": 2.9176399359106023e-05, + "loss": 0.0529, + "step": 13174 + }, + { + "epoch": 4.0, + "learning_rate": 2.915926907851468e-05, + "loss": 0.0449, + "step": 13175 + }, + { + "epoch": 4.0, + "learning_rate": 2.9142143286802138e-05, + "loss": 0.0239, + "step": 13176 + }, + { + "epoch": 4.0, + "learning_rate": 2.912502198460451e-05, + "loss": 0.0228, + "step": 13177 + }, + { + "epoch": 4.0, + "learning_rate": 2.9107905172557894e-05, + "loss": 0.0277, + "step": 13178 + }, + { + "epoch": 4.0, + "learning_rate": 2.9090792851298057e-05, + "loss": 0.0351, + "step": 13179 + }, + { + "epoch": 4.0, + "learning_rate": 2.9073685021460737e-05, + "loss": 0.0103, + "step": 13180 + }, + { + "epoch": 4.0, + "learning_rate": 2.9056581683681407e-05, + "loss": 0.0371, + "step": 13181 + }, + { + "epoch": 4.0, + "learning_rate": 2.9039482838595364e-05, + "loss": 0.035, + "step": 13182 + }, + { + "epoch": 4.0, + "learning_rate": 2.9022388486837844e-05, + "loss": 0.0459, + "step": 13183 + }, + { + "epoch": 4.0, + "learning_rate": 2.900529862904381e-05, + "loss": 0.0485, + "step": 13184 + }, + { + "epoch": 4.0, + "learning_rate": 2.8988213265848155e-05, + "loss": 0.038, + "step": 13185 + }, + { + "epoch": 4.0, + "learning_rate": 2.89711323978855e-05, + "loss": 0.0214, + "step": 13186 + }, + { + "epoch": 4.0, + "learning_rate": 2.8954056025790372e-05, + "loss": 0.0628, + "step": 13187 + }, + { + "epoch": 4.0, + "learning_rate": 2.893698415019705e-05, + "loss": 0.032, + "step": 13188 + }, + { + "epoch": 4.0, + "learning_rate": 2.8919916771739793e-05, + "loss": 0.0329, + "step": 13189 + }, + { + "epoch": 4.0, + "learning_rate": 2.890285389105254e-05, + "loss": 0.0227, + "step": 13190 + }, + { + "epoch": 4.01, + "learning_rate": 2.888579550876917e-05, + "loss": 0.0228, + "step": 13191 + }, + { + "epoch": 4.01, + "learning_rate": 2.8868741625523344e-05, + "loss": 0.0381, + "step": 13192 + }, + { + "epoch": 4.01, + "learning_rate": 2.8851692241948522e-05, + "loss": 0.0329, + "step": 13193 + }, + { + "epoch": 4.01, + "learning_rate": 2.8834647358678092e-05, + "loss": 0.0118, + "step": 13194 + }, + { + "epoch": 4.01, + "learning_rate": 2.8817606976345154e-05, + "loss": 0.0244, + "step": 13195 + }, + { + "epoch": 4.01, + "learning_rate": 2.8800571095582847e-05, + "loss": 0.0429, + "step": 13196 + }, + { + "epoch": 4.01, + "learning_rate": 2.878353971702382e-05, + "loss": 0.0467, + "step": 13197 + }, + { + "epoch": 4.01, + "learning_rate": 2.8766512841300875e-05, + "loss": 0.0239, + "step": 13198 + }, + { + "epoch": 4.01, + "learning_rate": 2.8749490469046415e-05, + "loss": 0.0333, + "step": 13199 + }, + { + "epoch": 4.01, + "learning_rate": 2.873247260089283e-05, + "loss": 0.0717, + "step": 13200 + }, + { + "epoch": 4.01, + "learning_rate": 2.871545923747235e-05, + "loss": 0.0174, + "step": 13201 + }, + { + "epoch": 4.01, + "learning_rate": 2.8698450379416814e-05, + "loss": 0.0093, + "step": 13202 + }, + { + "epoch": 4.01, + "learning_rate": 2.8681446027358174e-05, + "loss": 0.0173, + "step": 13203 + }, + { + "epoch": 4.01, + "learning_rate": 2.8664446181928002e-05, + "loss": 0.037, + "step": 13204 + }, + { + "epoch": 4.01, + "learning_rate": 2.8647450843757897e-05, + "loss": 0.0025, + "step": 13205 + }, + { + "epoch": 4.01, + "learning_rate": 2.8630460013479117e-05, + "loss": 0.0422, + "step": 13206 + }, + { + "epoch": 4.01, + "learning_rate": 2.8613473691722794e-05, + "loss": 0.0393, + "step": 13207 + }, + { + "epoch": 4.01, + "learning_rate": 2.8596491879120004e-05, + "loss": 0.0182, + "step": 13208 + }, + { + "epoch": 4.01, + "learning_rate": 2.8579514576301493e-05, + "loss": 0.015, + "step": 13209 + }, + { + "epoch": 4.01, + "learning_rate": 2.856254178389799e-05, + "loss": 0.0397, + "step": 13210 + }, + { + "epoch": 4.01, + "learning_rate": 2.854557350253994e-05, + "loss": 0.0282, + "step": 13211 + }, + { + "epoch": 4.01, + "learning_rate": 2.8528609732857678e-05, + "loss": 0.0259, + "step": 13212 + }, + { + "epoch": 4.01, + "learning_rate": 2.8511650475481325e-05, + "loss": 0.0234, + "step": 13213 + }, + { + "epoch": 4.01, + "learning_rate": 2.8494695731040913e-05, + "loss": 0.0528, + "step": 13214 + }, + { + "epoch": 4.01, + "learning_rate": 2.8477745500166206e-05, + "loss": 0.0175, + "step": 13215 + }, + { + "epoch": 4.01, + "learning_rate": 2.8460799783486947e-05, + "loss": 0.0249, + "step": 13216 + }, + { + "epoch": 4.01, + "learning_rate": 2.8443858581632557e-05, + "loss": 0.0342, + "step": 13217 + }, + { + "epoch": 4.01, + "learning_rate": 2.8426921895232325e-05, + "loss": 0.0246, + "step": 13218 + }, + { + "epoch": 4.01, + "learning_rate": 2.8409989724915466e-05, + "loss": 0.0116, + "step": 13219 + }, + { + "epoch": 4.01, + "learning_rate": 2.8393062071310894e-05, + "loss": 0.0185, + "step": 13220 + }, + { + "epoch": 4.01, + "learning_rate": 2.8376138935047526e-05, + "loss": 0.0338, + "step": 13221 + }, + { + "epoch": 4.01, + "learning_rate": 2.8359220316753872e-05, + "loss": 0.0232, + "step": 13222 + }, + { + "epoch": 4.01, + "learning_rate": 2.834230621705848e-05, + "loss": 0.0238, + "step": 13223 + }, + { + "epoch": 4.02, + "learning_rate": 2.8325396636589682e-05, + "loss": 0.0169, + "step": 13224 + }, + { + "epoch": 4.02, + "learning_rate": 2.8308491575975557e-05, + "loss": 0.0206, + "step": 13225 + }, + { + "epoch": 4.02, + "learning_rate": 2.8291591035844185e-05, + "loss": 0.0465, + "step": 13226 + }, + { + "epoch": 4.02, + "learning_rate": 2.8274695016823218e-05, + "loss": 0.0231, + "step": 13227 + }, + { + "epoch": 4.02, + "learning_rate": 2.825780351954042e-05, + "loss": 0.0322, + "step": 13228 + }, + { + "epoch": 4.02, + "learning_rate": 2.824091654462318e-05, + "loss": 0.0204, + "step": 13229 + }, + { + "epoch": 4.02, + "learning_rate": 2.822403409269886e-05, + "loss": 0.025, + "step": 13230 + }, + { + "epoch": 4.02, + "learning_rate": 2.8207156164394583e-05, + "loss": 0.0281, + "step": 13231 + }, + { + "epoch": 4.02, + "learning_rate": 2.8190282760337252e-05, + "loss": 0.008, + "step": 13232 + }, + { + "epoch": 4.02, + "learning_rate": 2.817341388115376e-05, + "loss": 0.0294, + "step": 13233 + }, + { + "epoch": 4.02, + "learning_rate": 2.8156549527470647e-05, + "loss": 0.0196, + "step": 13234 + }, + { + "epoch": 4.02, + "learning_rate": 2.8139689699914452e-05, + "loss": 0.0435, + "step": 13235 + }, + { + "epoch": 4.02, + "learning_rate": 2.8122834399111433e-05, + "loss": 0.0051, + "step": 13236 + }, + { + "epoch": 4.02, + "learning_rate": 2.8105983625687715e-05, + "loss": 0.0257, + "step": 13237 + }, + { + "epoch": 4.02, + "learning_rate": 2.8089137380269204e-05, + "loss": 0.0134, + "step": 13238 + }, + { + "epoch": 4.02, + "learning_rate": 2.8072295663481797e-05, + "loss": 0.0566, + "step": 13239 + }, + { + "epoch": 4.02, + "learning_rate": 2.805545847595101e-05, + "loss": 0.035, + "step": 13240 + }, + { + "epoch": 4.02, + "learning_rate": 2.803862581830234e-05, + "loss": 0.0187, + "step": 13241 + }, + { + "epoch": 4.02, + "learning_rate": 2.802179769116114e-05, + "loss": 0.0212, + "step": 13242 + }, + { + "epoch": 4.02, + "learning_rate": 2.8004974095152394e-05, + "loss": 0.0179, + "step": 13243 + }, + { + "epoch": 4.02, + "learning_rate": 2.798815503090115e-05, + "loss": 0.0264, + "step": 13244 + }, + { + "epoch": 4.02, + "learning_rate": 2.7971340499032107e-05, + "loss": 0.025, + "step": 13245 + }, + { + "epoch": 4.02, + "learning_rate": 2.7954530500169954e-05, + "loss": 0.0209, + "step": 13246 + }, + { + "epoch": 4.02, + "learning_rate": 2.79377250349391e-05, + "loss": 0.0342, + "step": 13247 + }, + { + "epoch": 4.02, + "learning_rate": 2.792092410396379e-05, + "loss": 0.0378, + "step": 13248 + }, + { + "epoch": 4.02, + "learning_rate": 2.7904127707868178e-05, + "loss": 0.0236, + "step": 13249 + }, + { + "epoch": 4.02, + "learning_rate": 2.7887335847276143e-05, + "loss": 0.0335, + "step": 13250 + }, + { + "epoch": 4.02, + "learning_rate": 2.7870548522811532e-05, + "loss": 0.0225, + "step": 13251 + }, + { + "epoch": 4.02, + "learning_rate": 2.7853765735097894e-05, + "loss": 0.0332, + "step": 13252 + }, + { + "epoch": 4.02, + "learning_rate": 2.7836987484758683e-05, + "loss": 0.0243, + "step": 13253 + }, + { + "epoch": 4.02, + "learning_rate": 2.78202137724171e-05, + "loss": 0.0405, + "step": 13254 + }, + { + "epoch": 4.02, + "learning_rate": 2.780344459869631e-05, + "loss": 0.0089, + "step": 13255 + }, + { + "epoch": 4.02, + "learning_rate": 2.778667996421919e-05, + "loss": 0.0341, + "step": 13256 + }, + { + "epoch": 4.03, + "learning_rate": 2.7769919869608566e-05, + "loss": 0.0159, + "step": 13257 + }, + { + "epoch": 4.03, + "learning_rate": 2.7753164315486974e-05, + "loss": 0.0299, + "step": 13258 + }, + { + "epoch": 4.03, + "learning_rate": 2.7736413302476807e-05, + "loss": 0.0199, + "step": 13259 + }, + { + "epoch": 4.03, + "learning_rate": 2.7719666831200392e-05, + "loss": 0.0255, + "step": 13260 + }, + { + "epoch": 4.03, + "learning_rate": 2.770292490227972e-05, + "loss": 0.0079, + "step": 13261 + }, + { + "epoch": 4.03, + "learning_rate": 2.7686187516336845e-05, + "loss": 0.0195, + "step": 13262 + }, + { + "epoch": 4.03, + "learning_rate": 2.766945467399333e-05, + "loss": 0.023, + "step": 13263 + }, + { + "epoch": 4.03, + "learning_rate": 2.7652726375870865e-05, + "loss": 0.0118, + "step": 13264 + }, + { + "epoch": 4.03, + "learning_rate": 2.7636002622590853e-05, + "loss": 0.0353, + "step": 13265 + }, + { + "epoch": 4.03, + "learning_rate": 2.761928341477448e-05, + "loss": 0.0238, + "step": 13266 + }, + { + "epoch": 4.03, + "learning_rate": 2.760256875304293e-05, + "loss": 0.0171, + "step": 13267 + }, + { + "epoch": 4.03, + "learning_rate": 2.7585858638016944e-05, + "loss": 0.0275, + "step": 13268 + }, + { + "epoch": 4.03, + "learning_rate": 2.7569153070317374e-05, + "loss": 0.0208, + "step": 13269 + }, + { + "epoch": 4.03, + "learning_rate": 2.7552452050564705e-05, + "loss": 0.0229, + "step": 13270 + }, + { + "epoch": 4.03, + "learning_rate": 2.7535755579379394e-05, + "loss": 0.0169, + "step": 13271 + }, + { + "epoch": 4.03, + "learning_rate": 2.7519063657381658e-05, + "loss": 0.0359, + "step": 13272 + }, + { + "epoch": 4.03, + "learning_rate": 2.7502376285191484e-05, + "loss": 0.0155, + "step": 13273 + }, + { + "epoch": 4.03, + "learning_rate": 2.748569346342885e-05, + "loss": 0.0259, + "step": 13274 + }, + { + "epoch": 4.03, + "learning_rate": 2.74690151927134e-05, + "loss": 0.0101, + "step": 13275 + }, + { + "epoch": 4.03, + "learning_rate": 2.7452341473664748e-05, + "loss": 0.0342, + "step": 13276 + }, + { + "epoch": 4.03, + "learning_rate": 2.743567230690224e-05, + "loss": 0.0192, + "step": 13277 + }, + { + "epoch": 4.03, + "learning_rate": 2.7419007693045104e-05, + "loss": 0.0243, + "step": 13278 + }, + { + "epoch": 4.03, + "learning_rate": 2.7402347632712323e-05, + "loss": 0.0285, + "step": 13279 + }, + { + "epoch": 4.03, + "learning_rate": 2.738569212652284e-05, + "loss": 0.0128, + "step": 13280 + }, + { + "epoch": 4.03, + "learning_rate": 2.7369041175095307e-05, + "loss": 0.0407, + "step": 13281 + }, + { + "epoch": 4.03, + "learning_rate": 2.7352394779048314e-05, + "loss": 0.0434, + "step": 13282 + }, + { + "epoch": 4.03, + "learning_rate": 2.7335752939000182e-05, + "loss": 0.0084, + "step": 13283 + }, + { + "epoch": 4.03, + "learning_rate": 2.73191156555691e-05, + "loss": 0.0398, + "step": 13284 + }, + { + "epoch": 4.03, + "learning_rate": 2.7302482929373144e-05, + "loss": 0.0332, + "step": 13285 + }, + { + "epoch": 4.03, + "learning_rate": 2.7285854761030097e-05, + "loss": 0.0396, + "step": 13286 + }, + { + "epoch": 4.03, + "learning_rate": 2.7269231151157733e-05, + "loss": 0.0168, + "step": 13287 + }, + { + "epoch": 4.03, + "learning_rate": 2.725261210037351e-05, + "loss": 0.0314, + "step": 13288 + }, + { + "epoch": 4.03, + "learning_rate": 2.723599760929477e-05, + "loss": 0.0053, + "step": 13289 + }, + { + "epoch": 4.04, + "learning_rate": 2.721938767853876e-05, + "loss": 0.0272, + "step": 13290 + }, + { + "epoch": 4.04, + "learning_rate": 2.72027823087224e-05, + "loss": 0.0207, + "step": 13291 + }, + { + "epoch": 4.04, + "learning_rate": 2.718618150046265e-05, + "loss": 0.0422, + "step": 13292 + }, + { + "epoch": 4.04, + "learning_rate": 2.7169585254376043e-05, + "loss": 0.0149, + "step": 13293 + }, + { + "epoch": 4.04, + "learning_rate": 2.7152993571079178e-05, + "loss": 0.0275, + "step": 13294 + }, + { + "epoch": 4.04, + "learning_rate": 2.7136406451188318e-05, + "loss": 0.0222, + "step": 13295 + }, + { + "epoch": 4.04, + "learning_rate": 2.7119823895319704e-05, + "loss": 0.0122, + "step": 13296 + }, + { + "epoch": 4.04, + "learning_rate": 2.7103245904089288e-05, + "loss": 0.0401, + "step": 13297 + }, + { + "epoch": 4.04, + "learning_rate": 2.7086672478112863e-05, + "loss": 0.011, + "step": 13298 + }, + { + "epoch": 4.04, + "learning_rate": 2.7070103618006157e-05, + "loss": 0.0281, + "step": 13299 + }, + { + "epoch": 4.04, + "learning_rate": 2.7053539324384553e-05, + "loss": 0.0191, + "step": 13300 + }, + { + "epoch": 4.04, + "learning_rate": 2.7036979597863482e-05, + "loss": 0.0304, + "step": 13301 + }, + { + "epoch": 4.04, + "learning_rate": 2.7020424439057985e-05, + "loss": 0.0258, + "step": 13302 + }, + { + "epoch": 4.04, + "learning_rate": 2.7003873848583167e-05, + "loss": 0.0255, + "step": 13303 + }, + { + "epoch": 4.04, + "learning_rate": 2.698732782705367e-05, + "loss": 0.03, + "step": 13304 + }, + { + "epoch": 4.04, + "learning_rate": 2.6970786375084225e-05, + "loss": 0.0222, + "step": 13305 + }, + { + "epoch": 4.04, + "learning_rate": 2.6954249493289316e-05, + "loss": 0.0183, + "step": 13306 + }, + { + "epoch": 4.04, + "learning_rate": 2.693771718228317e-05, + "loss": 0.0312, + "step": 13307 + }, + { + "epoch": 4.04, + "learning_rate": 2.692118944268003e-05, + "loss": 0.0138, + "step": 13308 + }, + { + "epoch": 4.04, + "learning_rate": 2.6904666275093706e-05, + "loss": 0.01, + "step": 13309 + }, + { + "epoch": 4.04, + "learning_rate": 2.6888147680138088e-05, + "loss": 0.0047, + "step": 13310 + }, + { + "epoch": 4.04, + "learning_rate": 2.6871633658426722e-05, + "loss": 0.0105, + "step": 13311 + }, + { + "epoch": 4.04, + "learning_rate": 2.685512421057315e-05, + "loss": 0.0204, + "step": 13312 + }, + { + "epoch": 4.04, + "learning_rate": 2.6838619337190575e-05, + "loss": 0.0486, + "step": 13313 + }, + { + "epoch": 4.04, + "learning_rate": 2.682211903889208e-05, + "loss": 0.0335, + "step": 13314 + }, + { + "epoch": 4.04, + "learning_rate": 2.6805623316290708e-05, + "loss": 0.0275, + "step": 13315 + }, + { + "epoch": 4.04, + "learning_rate": 2.6789132169999118e-05, + "loss": 0.0422, + "step": 13316 + }, + { + "epoch": 4.04, + "learning_rate": 2.6772645600629984e-05, + "loss": 0.0176, + "step": 13317 + }, + { + "epoch": 4.04, + "learning_rate": 2.6756163608795717e-05, + "loss": 0.0276, + "step": 13318 + }, + { + "epoch": 4.04, + "learning_rate": 2.6739686195108546e-05, + "loss": 0.0374, + "step": 13319 + }, + { + "epoch": 4.04, + "learning_rate": 2.6723213360180545e-05, + "loss": 0.0162, + "step": 13320 + }, + { + "epoch": 4.04, + "learning_rate": 2.670674510462371e-05, + "loss": 0.0261, + "step": 13321 + }, + { + "epoch": 4.04, + "learning_rate": 2.6690281429049688e-05, + "loss": 0.017, + "step": 13322 + }, + { + "epoch": 4.05, + "learning_rate": 2.667382233407015e-05, + "loss": 0.0036, + "step": 13323 + }, + { + "epoch": 4.05, + "learning_rate": 2.665736782029646e-05, + "loss": 0.031, + "step": 13324 + }, + { + "epoch": 4.05, + "learning_rate": 2.664091788833983e-05, + "loss": 0.0311, + "step": 13325 + }, + { + "epoch": 4.05, + "learning_rate": 2.6624472538811385e-05, + "loss": 0.0196, + "step": 13326 + }, + { + "epoch": 4.05, + "learning_rate": 2.6608031772321942e-05, + "loss": 0.0153, + "step": 13327 + }, + { + "epoch": 4.05, + "learning_rate": 2.659159558948232e-05, + "loss": 0.019, + "step": 13328 + }, + { + "epoch": 4.05, + "learning_rate": 2.6575163990903026e-05, + "loss": 0.0233, + "step": 13329 + }, + { + "epoch": 4.05, + "learning_rate": 2.6558736977194412e-05, + "loss": 0.0424, + "step": 13330 + }, + { + "epoch": 4.05, + "learning_rate": 2.6542314548966776e-05, + "loss": 0.0264, + "step": 13331 + }, + { + "epoch": 4.05, + "learning_rate": 2.652589670683008e-05, + "loss": 0.0493, + "step": 13332 + }, + { + "epoch": 4.05, + "learning_rate": 2.6509483451394304e-05, + "loss": 0.0356, + "step": 13333 + }, + { + "epoch": 4.05, + "learning_rate": 2.649307478326901e-05, + "loss": 0.0108, + "step": 13334 + }, + { + "epoch": 4.05, + "learning_rate": 2.647667070306384e-05, + "loss": 0.0115, + "step": 13335 + }, + { + "epoch": 4.05, + "learning_rate": 2.6460271211388096e-05, + "loss": 0.0091, + "step": 13336 + }, + { + "epoch": 4.05, + "learning_rate": 2.6443876308851035e-05, + "loss": 0.0183, + "step": 13337 + }, + { + "epoch": 4.05, + "learning_rate": 2.642748599606164e-05, + "loss": 0.02, + "step": 13338 + }, + { + "epoch": 4.05, + "learning_rate": 2.6411100273628744e-05, + "loss": 0.0324, + "step": 13339 + }, + { + "epoch": 4.05, + "learning_rate": 2.639471914216107e-05, + "loss": 0.0336, + "step": 13340 + }, + { + "epoch": 4.05, + "learning_rate": 2.6378342602267083e-05, + "loss": 0.0351, + "step": 13341 + }, + { + "epoch": 4.05, + "learning_rate": 2.636197065455518e-05, + "loss": 0.0312, + "step": 13342 + }, + { + "epoch": 4.05, + "learning_rate": 2.634560329963351e-05, + "loss": 0.0146, + "step": 13343 + }, + { + "epoch": 4.05, + "learning_rate": 2.6329240538110068e-05, + "loss": 0.037, + "step": 13344 + }, + { + "epoch": 4.05, + "learning_rate": 2.6312882370592636e-05, + "loss": 0.0121, + "step": 13345 + }, + { + "epoch": 4.05, + "learning_rate": 2.629652879768891e-05, + "loss": 0.0252, + "step": 13346 + }, + { + "epoch": 4.05, + "learning_rate": 2.628017982000642e-05, + "loss": 0.0265, + "step": 13347 + }, + { + "epoch": 4.05, + "learning_rate": 2.6263835438152447e-05, + "loss": 0.0104, + "step": 13348 + }, + { + "epoch": 4.05, + "learning_rate": 2.6247495652734125e-05, + "loss": 0.0335, + "step": 13349 + }, + { + "epoch": 4.05, + "learning_rate": 2.6231160464358415e-05, + "loss": 0.0248, + "step": 13350 + }, + { + "epoch": 4.05, + "learning_rate": 2.6214829873632182e-05, + "loss": 0.0566, + "step": 13351 + }, + { + "epoch": 4.05, + "learning_rate": 2.6198503881161976e-05, + "loss": 0.0156, + "step": 13352 + }, + { + "epoch": 4.05, + "learning_rate": 2.6182182487554342e-05, + "loss": 0.0371, + "step": 13353 + }, + { + "epoch": 4.05, + "learning_rate": 2.616586569341555e-05, + "loss": 0.0171, + "step": 13354 + }, + { + "epoch": 4.05, + "learning_rate": 2.614955349935166e-05, + "loss": 0.0461, + "step": 13355 + }, + { + "epoch": 4.06, + "learning_rate": 2.6133245905968692e-05, + "loss": 0.021, + "step": 13356 + }, + { + "epoch": 4.06, + "learning_rate": 2.611694291387239e-05, + "loss": 0.0181, + "step": 13357 + }, + { + "epoch": 4.06, + "learning_rate": 2.610064452366844e-05, + "loss": 0.0425, + "step": 13358 + }, + { + "epoch": 4.06, + "learning_rate": 2.608435073596214e-05, + "loss": 0.0307, + "step": 13359 + }, + { + "epoch": 4.06, + "learning_rate": 2.6068061551358853e-05, + "loss": 0.0281, + "step": 13360 + }, + { + "epoch": 4.06, + "learning_rate": 2.6051776970463633e-05, + "loss": 0.0145, + "step": 13361 + }, + { + "epoch": 4.06, + "learning_rate": 2.603549699388146e-05, + "loss": 0.0213, + "step": 13362 + }, + { + "epoch": 4.06, + "learning_rate": 2.6019221622217e-05, + "loss": 0.0163, + "step": 13363 + }, + { + "epoch": 4.06, + "learning_rate": 2.6002950856074937e-05, + "loss": 0.0065, + "step": 13364 + }, + { + "epoch": 4.06, + "learning_rate": 2.5986684696059624e-05, + "loss": 0.0275, + "step": 13365 + }, + { + "epoch": 4.06, + "learning_rate": 2.597042314277527e-05, + "loss": 0.0295, + "step": 13366 + }, + { + "epoch": 4.06, + "learning_rate": 2.5954166196826032e-05, + "loss": 0.0237, + "step": 13367 + }, + { + "epoch": 4.06, + "learning_rate": 2.5937913858815708e-05, + "loss": 0.0386, + "step": 13368 + }, + { + "epoch": 4.06, + "learning_rate": 2.592166612934811e-05, + "loss": 0.0156, + "step": 13369 + }, + { + "epoch": 4.06, + "learning_rate": 2.5905423009026783e-05, + "loss": 0.0229, + "step": 13370 + }, + { + "epoch": 4.06, + "learning_rate": 2.588918449845502e-05, + "loss": 0.023, + "step": 13371 + }, + { + "epoch": 4.06, + "learning_rate": 2.5872950598236153e-05, + "loss": 0.0334, + "step": 13372 + }, + { + "epoch": 4.06, + "learning_rate": 2.585672130897314e-05, + "loss": 0.0074, + "step": 13373 + }, + { + "epoch": 4.06, + "learning_rate": 2.5840496631268943e-05, + "loss": 0.0121, + "step": 13374 + }, + { + "epoch": 4.06, + "learning_rate": 2.5824276565726133e-05, + "loss": 0.0258, + "step": 13375 + }, + { + "epoch": 4.06, + "learning_rate": 2.5808061112947342e-05, + "loss": 0.0162, + "step": 13376 + }, + { + "epoch": 4.06, + "learning_rate": 2.579185027353486e-05, + "loss": 0.0233, + "step": 13377 + }, + { + "epoch": 4.06, + "learning_rate": 2.577564404809093e-05, + "loss": 0.0499, + "step": 13378 + }, + { + "epoch": 4.06, + "learning_rate": 2.5759442437217548e-05, + "loss": 0.0516, + "step": 13379 + }, + { + "epoch": 4.06, + "learning_rate": 2.5743245441516507e-05, + "loss": 0.02, + "step": 13380 + }, + { + "epoch": 4.06, + "learning_rate": 2.572705306158955e-05, + "loss": 0.0186, + "step": 13381 + }, + { + "epoch": 4.06, + "learning_rate": 2.571086529803812e-05, + "loss": 0.0209, + "step": 13382 + }, + { + "epoch": 4.06, + "learning_rate": 2.5694682151463593e-05, + "loss": 0.0266, + "step": 13383 + }, + { + "epoch": 4.06, + "learning_rate": 2.56785036224671e-05, + "loss": 0.0511, + "step": 13384 + }, + { + "epoch": 4.06, + "learning_rate": 2.5662329711649648e-05, + "loss": 0.0299, + "step": 13385 + }, + { + "epoch": 4.06, + "learning_rate": 2.5646160419611973e-05, + "loss": 0.0249, + "step": 13386 + }, + { + "epoch": 4.06, + "learning_rate": 2.5629995746954822e-05, + "loss": 0.0171, + "step": 13387 + }, + { + "epoch": 4.06, + "learning_rate": 2.5613835694278573e-05, + "loss": 0.0219, + "step": 13388 + }, + { + "epoch": 4.07, + "learning_rate": 2.5597680262183605e-05, + "loss": 0.0336, + "step": 13389 + }, + { + "epoch": 4.07, + "learning_rate": 2.558152945127001e-05, + "loss": 0.0253, + "step": 13390 + }, + { + "epoch": 4.07, + "learning_rate": 2.5565383262137708e-05, + "loss": 0.0121, + "step": 13391 + }, + { + "epoch": 4.07, + "learning_rate": 2.5549241695386546e-05, + "loss": 0.0264, + "step": 13392 + }, + { + "epoch": 4.07, + "learning_rate": 2.5533104751616084e-05, + "loss": 0.0224, + "step": 13393 + }, + { + "epoch": 4.07, + "learning_rate": 2.551697243142581e-05, + "loss": 0.0397, + "step": 13394 + }, + { + "epoch": 4.07, + "learning_rate": 2.5500844735414972e-05, + "loss": 0.0252, + "step": 13395 + }, + { + "epoch": 4.07, + "learning_rate": 2.548472166418261e-05, + "loss": 0.0044, + "step": 13396 + }, + { + "epoch": 4.07, + "learning_rate": 2.546860321832775e-05, + "loss": 0.0383, + "step": 13397 + }, + { + "epoch": 4.07, + "learning_rate": 2.5452489398449056e-05, + "loss": 0.0388, + "step": 13398 + }, + { + "epoch": 4.07, + "learning_rate": 2.5436380205145235e-05, + "loss": 0.0147, + "step": 13399 + }, + { + "epoch": 4.07, + "learning_rate": 2.5420275639014526e-05, + "loss": 0.0324, + "step": 13400 + }, + { + "epoch": 4.07, + "learning_rate": 2.540417570065528e-05, + "loss": 0.0347, + "step": 13401 + }, + { + "epoch": 4.07, + "learning_rate": 2.5388080390665494e-05, + "loss": 0.0249, + "step": 13402 + }, + { + "epoch": 4.07, + "learning_rate": 2.5371989709643146e-05, + "loss": 0.0184, + "step": 13403 + }, + { + "epoch": 4.07, + "learning_rate": 2.53559036581859e-05, + "loss": 0.0391, + "step": 13404 + }, + { + "epoch": 4.07, + "learning_rate": 2.5339822236891288e-05, + "loss": 0.0084, + "step": 13405 + }, + { + "epoch": 4.07, + "learning_rate": 2.532374544635677e-05, + "loss": 0.027, + "step": 13406 + }, + { + "epoch": 4.07, + "learning_rate": 2.530767328717943e-05, + "loss": 0.0226, + "step": 13407 + }, + { + "epoch": 4.07, + "learning_rate": 2.529160575995643e-05, + "loss": 0.0278, + "step": 13408 + }, + { + "epoch": 4.07, + "learning_rate": 2.527554286528457e-05, + "loss": 0.0179, + "step": 13409 + }, + { + "epoch": 4.07, + "learning_rate": 2.5259484603760494e-05, + "loss": 0.0333, + "step": 13410 + }, + { + "epoch": 4.07, + "learning_rate": 2.5243430975980806e-05, + "loss": 0.0507, + "step": 13411 + }, + { + "epoch": 4.07, + "learning_rate": 2.522738198254178e-05, + "loss": 0.026, + "step": 13412 + }, + { + "epoch": 4.07, + "learning_rate": 2.521133762403965e-05, + "loss": 0.0214, + "step": 13413 + }, + { + "epoch": 4.07, + "learning_rate": 2.5195297901070398e-05, + "loss": 0.0285, + "step": 13414 + }, + { + "epoch": 4.07, + "learning_rate": 2.5179262814229845e-05, + "loss": 0.05, + "step": 13415 + }, + { + "epoch": 4.07, + "learning_rate": 2.5163232364113593e-05, + "loss": 0.0368, + "step": 13416 + }, + { + "epoch": 4.07, + "learning_rate": 2.5147206551317223e-05, + "loss": 0.0308, + "step": 13417 + }, + { + "epoch": 4.07, + "learning_rate": 2.513118537643597e-05, + "loss": 0.014, + "step": 13418 + }, + { + "epoch": 4.07, + "learning_rate": 2.5115168840065026e-05, + "loss": 0.0083, + "step": 13419 + }, + { + "epoch": 4.07, + "learning_rate": 2.5099156942799348e-05, + "loss": 0.0265, + "step": 13420 + }, + { + "epoch": 4.07, + "learning_rate": 2.5083149685233677e-05, + "loss": 0.0298, + "step": 13421 + }, + { + "epoch": 4.08, + "learning_rate": 2.506714706796274e-05, + "loss": 0.0242, + "step": 13422 + }, + { + "epoch": 4.08, + "learning_rate": 2.505114909158086e-05, + "loss": 0.0346, + "step": 13423 + }, + { + "epoch": 4.08, + "learning_rate": 2.5035155756682474e-05, + "loss": 0.0189, + "step": 13424 + }, + { + "epoch": 4.08, + "learning_rate": 2.5019167063861513e-05, + "loss": 0.045, + "step": 13425 + }, + { + "epoch": 4.08, + "learning_rate": 2.5003183013712032e-05, + "loss": 0.0396, + "step": 13426 + }, + { + "epoch": 4.08, + "learning_rate": 2.498720360682772e-05, + "loss": 0.0522, + "step": 13427 + }, + { + "epoch": 4.08, + "learning_rate": 2.4971228843802227e-05, + "loss": 0.0261, + "step": 13428 + }, + { + "epoch": 4.08, + "learning_rate": 2.4955258725228905e-05, + "loss": 0.0367, + "step": 13429 + }, + { + "epoch": 4.08, + "learning_rate": 2.4939293251701066e-05, + "loss": 0.0206, + "step": 13430 + }, + { + "epoch": 4.08, + "learning_rate": 2.492333242381175e-05, + "loss": 0.0034, + "step": 13431 + }, + { + "epoch": 4.08, + "learning_rate": 2.49073762421538e-05, + "loss": 0.0276, + "step": 13432 + }, + { + "epoch": 4.08, + "learning_rate": 2.489142470732004e-05, + "loss": 0.0332, + "step": 13433 + }, + { + "epoch": 4.08, + "learning_rate": 2.4875477819902932e-05, + "loss": 0.016, + "step": 13434 + }, + { + "epoch": 4.08, + "learning_rate": 2.485953558049494e-05, + "loss": 0.0318, + "step": 13435 + }, + { + "epoch": 4.08, + "learning_rate": 2.484359798968823e-05, + "loss": 0.0202, + "step": 13436 + }, + { + "epoch": 4.08, + "learning_rate": 2.4827665048074802e-05, + "loss": 0.0315, + "step": 13437 + }, + { + "epoch": 4.08, + "learning_rate": 2.481173675624658e-05, + "loss": 0.0109, + "step": 13438 + }, + { + "epoch": 4.08, + "learning_rate": 2.4795813114795193e-05, + "loss": 0.026, + "step": 13439 + }, + { + "epoch": 4.08, + "learning_rate": 2.4779894124312266e-05, + "loss": 0.0112, + "step": 13440 + }, + { + "epoch": 4.08, + "learning_rate": 2.4763979785388994e-05, + "loss": 0.0294, + "step": 13441 + }, + { + "epoch": 4.08, + "learning_rate": 2.474807009861665e-05, + "loss": 0.0301, + "step": 13442 + }, + { + "epoch": 4.08, + "learning_rate": 2.4732165064586185e-05, + "loss": 0.0164, + "step": 13443 + }, + { + "epoch": 4.08, + "learning_rate": 2.471626468388847e-05, + "loss": 0.0383, + "step": 13444 + }, + { + "epoch": 4.08, + "learning_rate": 2.470036895711412e-05, + "loss": 0.0124, + "step": 13445 + }, + { + "epoch": 4.08, + "learning_rate": 2.468447788485361e-05, + "loss": 0.0376, + "step": 13446 + }, + { + "epoch": 4.08, + "learning_rate": 2.4668591467697284e-05, + "loss": 0.0244, + "step": 13447 + }, + { + "epoch": 4.08, + "learning_rate": 2.4652709706235224e-05, + "loss": 0.0221, + "step": 13448 + }, + { + "epoch": 4.08, + "learning_rate": 2.4636832601057437e-05, + "loss": 0.0479, + "step": 13449 + }, + { + "epoch": 4.08, + "learning_rate": 2.4620960152753723e-05, + "loss": 0.0155, + "step": 13450 + }, + { + "epoch": 4.08, + "learning_rate": 2.460509236191362e-05, + "loss": 0.0169, + "step": 13451 + }, + { + "epoch": 4.08, + "learning_rate": 2.4589229229126645e-05, + "loss": 0.019, + "step": 13452 + }, + { + "epoch": 4.08, + "learning_rate": 2.4573370754982006e-05, + "loss": 0.0431, + "step": 13453 + }, + { + "epoch": 4.09, + "learning_rate": 2.455751694006889e-05, + "loss": 0.0311, + "step": 13454 + }, + { + "epoch": 4.09, + "learning_rate": 2.4541667784976148e-05, + "loss": 0.0327, + "step": 13455 + }, + { + "epoch": 4.09, + "learning_rate": 2.4525823290292534e-05, + "loss": 0.0098, + "step": 13456 + }, + { + "epoch": 4.09, + "learning_rate": 2.4509983456606606e-05, + "loss": 0.0105, + "step": 13457 + }, + { + "epoch": 4.09, + "learning_rate": 2.449414828450685e-05, + "loss": 0.02, + "step": 13458 + }, + { + "epoch": 4.09, + "learning_rate": 2.4478317774581387e-05, + "loss": 0.0157, + "step": 13459 + }, + { + "epoch": 4.09, + "learning_rate": 2.4462491927418367e-05, + "loss": 0.056, + "step": 13460 + }, + { + "epoch": 4.09, + "learning_rate": 2.4446670743605636e-05, + "loss": 0.0245, + "step": 13461 + }, + { + "epoch": 4.09, + "learning_rate": 2.4430854223730872e-05, + "loss": 0.0061, + "step": 13462 + }, + { + "epoch": 4.09, + "learning_rate": 2.4415042368381677e-05, + "loss": 0.0258, + "step": 13463 + }, + { + "epoch": 4.09, + "learning_rate": 2.439923517814536e-05, + "loss": 0.0148, + "step": 13464 + }, + { + "epoch": 4.09, + "learning_rate": 2.43834326536092e-05, + "loss": 0.0245, + "step": 13465 + }, + { + "epoch": 4.09, + "learning_rate": 2.4367634795360068e-05, + "loss": 0.0245, + "step": 13466 + }, + { + "epoch": 4.09, + "learning_rate": 2.435184160398494e-05, + "loss": 0.0352, + "step": 13467 + }, + { + "epoch": 4.09, + "learning_rate": 2.4336053080070394e-05, + "loss": 0.019, + "step": 13468 + }, + { + "epoch": 4.09, + "learning_rate": 2.4320269224202997e-05, + "loss": 0.0477, + "step": 13469 + }, + { + "epoch": 4.09, + "learning_rate": 2.4304490036969065e-05, + "loss": 0.0108, + "step": 13470 + }, + { + "epoch": 4.09, + "learning_rate": 2.4288715518954675e-05, + "loss": 0.0159, + "step": 13471 + }, + { + "epoch": 4.09, + "learning_rate": 2.4272945670745898e-05, + "loss": 0.0172, + "step": 13472 + }, + { + "epoch": 4.09, + "learning_rate": 2.4257180492928473e-05, + "loss": 0.0066, + "step": 13473 + }, + { + "epoch": 4.09, + "learning_rate": 2.4241419986088073e-05, + "loss": 0.0199, + "step": 13474 + }, + { + "epoch": 4.09, + "learning_rate": 2.422566415081016e-05, + "loss": 0.0251, + "step": 13475 + }, + { + "epoch": 4.09, + "learning_rate": 2.4209912987679932e-05, + "loss": 0.013, + "step": 13476 + }, + { + "epoch": 4.09, + "learning_rate": 2.4194166497282618e-05, + "loss": 0.0455, + "step": 13477 + }, + { + "epoch": 4.09, + "learning_rate": 2.417842468020304e-05, + "loss": 0.0148, + "step": 13478 + }, + { + "epoch": 4.09, + "learning_rate": 2.416268753702606e-05, + "loss": 0.044, + "step": 13479 + }, + { + "epoch": 4.09, + "learning_rate": 2.4146955068336227e-05, + "loss": 0.0159, + "step": 13480 + }, + { + "epoch": 4.09, + "learning_rate": 2.4131227274717956e-05, + "loss": 0.0176, + "step": 13481 + }, + { + "epoch": 4.09, + "learning_rate": 2.411550415675545e-05, + "loss": 0.0315, + "step": 13482 + }, + { + "epoch": 4.09, + "learning_rate": 2.409978571503284e-05, + "loss": 0.0211, + "step": 13483 + }, + { + "epoch": 4.09, + "learning_rate": 2.4084071950133953e-05, + "loss": 0.0177, + "step": 13484 + }, + { + "epoch": 4.09, + "learning_rate": 2.4068362862642583e-05, + "loss": 0.0225, + "step": 13485 + }, + { + "epoch": 4.09, + "learning_rate": 2.4052658453142236e-05, + "loss": 0.0165, + "step": 13486 + }, + { + "epoch": 4.1, + "learning_rate": 2.403695872221627e-05, + "loss": 0.0499, + "step": 13487 + }, + { + "epoch": 4.1, + "learning_rate": 2.4021263670447932e-05, + "loss": 0.0295, + "step": 13488 + }, + { + "epoch": 4.1, + "learning_rate": 2.400557329842019e-05, + "loss": 0.0235, + "step": 13489 + }, + { + "epoch": 4.1, + "learning_rate": 2.3989887606715958e-05, + "loss": 0.0175, + "step": 13490 + }, + { + "epoch": 4.1, + "learning_rate": 2.397420659591787e-05, + "loss": 0.0339, + "step": 13491 + }, + { + "epoch": 4.1, + "learning_rate": 2.395853026660845e-05, + "loss": 0.0187, + "step": 13492 + }, + { + "epoch": 4.1, + "learning_rate": 2.3942858619369975e-05, + "loss": 0.0456, + "step": 13493 + }, + { + "epoch": 4.1, + "learning_rate": 2.3927191654784633e-05, + "loss": 0.0319, + "step": 13494 + }, + { + "epoch": 4.1, + "learning_rate": 2.3911529373434474e-05, + "loss": 0.0138, + "step": 13495 + }, + { + "epoch": 4.1, + "learning_rate": 2.389587177590123e-05, + "loss": 0.0185, + "step": 13496 + }, + { + "epoch": 4.1, + "learning_rate": 2.388021886276656e-05, + "loss": 0.0267, + "step": 13497 + }, + { + "epoch": 4.1, + "learning_rate": 2.3864570634611873e-05, + "loss": 0.0112, + "step": 13498 + }, + { + "epoch": 4.1, + "learning_rate": 2.3848927092018544e-05, + "loss": 0.0106, + "step": 13499 + }, + { + "epoch": 4.1, + "learning_rate": 2.3833288235567606e-05, + "loss": 0.0269, + "step": 13500 + }, + { + "epoch": 4.1, + "learning_rate": 2.3817654065840042e-05, + "loss": 0.0404, + "step": 13501 + }, + { + "epoch": 4.1, + "learning_rate": 2.3802024583416625e-05, + "loss": 0.0279, + "step": 13502 + }, + { + "epoch": 4.1, + "learning_rate": 2.3786399788877885e-05, + "loss": 0.0145, + "step": 13503 + }, + { + "epoch": 4.1, + "learning_rate": 2.3770779682804296e-05, + "loss": 0.0284, + "step": 13504 + }, + { + "epoch": 4.1, + "learning_rate": 2.375516426577606e-05, + "loss": 0.0132, + "step": 13505 + }, + { + "epoch": 4.1, + "learning_rate": 2.373955353837331e-05, + "loss": 0.0215, + "step": 13506 + }, + { + "epoch": 4.1, + "learning_rate": 2.372394750117584e-05, + "loss": 0.0242, + "step": 13507 + }, + { + "epoch": 4.1, + "learning_rate": 2.370834615476344e-05, + "loss": 0.0188, + "step": 13508 + }, + { + "epoch": 4.1, + "learning_rate": 2.3692749499715607e-05, + "loss": 0.0228, + "step": 13509 + }, + { + "epoch": 4.1, + "learning_rate": 2.3677157536611774e-05, + "loss": 0.0428, + "step": 13510 + }, + { + "epoch": 4.1, + "learning_rate": 2.3661570266031092e-05, + "loss": 0.0228, + "step": 13511 + }, + { + "epoch": 4.1, + "learning_rate": 2.364598768855257e-05, + "loss": 0.0088, + "step": 13512 + }, + { + "epoch": 4.1, + "learning_rate": 2.363040980475509e-05, + "loss": 0.0308, + "step": 13513 + }, + { + "epoch": 4.1, + "learning_rate": 2.361483661521729e-05, + "loss": 0.0552, + "step": 13514 + }, + { + "epoch": 4.1, + "learning_rate": 2.359926812051771e-05, + "loss": 0.021, + "step": 13515 + }, + { + "epoch": 4.1, + "learning_rate": 2.358370432123467e-05, + "loss": 0.0245, + "step": 13516 + }, + { + "epoch": 4.1, + "learning_rate": 2.3568145217946255e-05, + "loss": 0.025, + "step": 13517 + }, + { + "epoch": 4.1, + "learning_rate": 2.355259081123052e-05, + "loss": 0.0277, + "step": 13518 + }, + { + "epoch": 4.1, + "learning_rate": 2.3537041101665205e-05, + "loss": 0.0206, + "step": 13519 + }, + { + "epoch": 4.11, + "learning_rate": 2.3521496089827997e-05, + "loss": 0.0202, + "step": 13520 + }, + { + "epoch": 4.11, + "learning_rate": 2.3505955776296316e-05, + "loss": 0.0122, + "step": 13521 + }, + { + "epoch": 4.11, + "learning_rate": 2.349042016164743e-05, + "loss": 0.0326, + "step": 13522 + }, + { + "epoch": 4.11, + "learning_rate": 2.3474889246458406e-05, + "loss": 0.0216, + "step": 13523 + }, + { + "epoch": 4.11, + "learning_rate": 2.3459363031306273e-05, + "loss": 0.0123, + "step": 13524 + }, + { + "epoch": 4.11, + "learning_rate": 2.344384151676767e-05, + "loss": 0.0196, + "step": 13525 + }, + { + "epoch": 4.11, + "learning_rate": 2.342832470341927e-05, + "loss": 0.0301, + "step": 13526 + }, + { + "epoch": 4.11, + "learning_rate": 2.3412812591837448e-05, + "loss": 0.0327, + "step": 13527 + }, + { + "epoch": 4.11, + "learning_rate": 2.3397305182598393e-05, + "loss": 0.041, + "step": 13528 + }, + { + "epoch": 4.11, + "learning_rate": 2.3381802476278223e-05, + "loss": 0.0207, + "step": 13529 + }, + { + "epoch": 4.11, + "learning_rate": 2.3366304473452747e-05, + "loss": 0.0273, + "step": 13530 + }, + { + "epoch": 4.11, + "learning_rate": 2.335081117469777e-05, + "loss": 0.0317, + "step": 13531 + }, + { + "epoch": 4.11, + "learning_rate": 2.33353225805887e-05, + "loss": 0.0222, + "step": 13532 + }, + { + "epoch": 4.11, + "learning_rate": 2.3319838691700993e-05, + "loss": 0.0269, + "step": 13533 + }, + { + "epoch": 4.11, + "learning_rate": 2.3304359508609754e-05, + "loss": 0.0342, + "step": 13534 + }, + { + "epoch": 4.11, + "learning_rate": 2.3288885031890027e-05, + "loss": 0.029, + "step": 13535 + }, + { + "epoch": 4.11, + "learning_rate": 2.3273415262116728e-05, + "loss": 0.0413, + "step": 13536 + }, + { + "epoch": 4.11, + "learning_rate": 2.325795019986434e-05, + "loss": 0.0142, + "step": 13537 + }, + { + "epoch": 4.11, + "learning_rate": 2.324248984570748e-05, + "loss": 0.0142, + "step": 13538 + }, + { + "epoch": 4.11, + "learning_rate": 2.3227034200220374e-05, + "loss": 0.0114, + "step": 13539 + }, + { + "epoch": 4.11, + "learning_rate": 2.321158326397721e-05, + "loss": 0.0099, + "step": 13540 + }, + { + "epoch": 4.11, + "learning_rate": 2.3196137037551936e-05, + "loss": 0.0157, + "step": 13541 + }, + { + "epoch": 4.11, + "learning_rate": 2.318069552151829e-05, + "loss": 0.0196, + "step": 13542 + }, + { + "epoch": 4.11, + "learning_rate": 2.3165258716449942e-05, + "loss": 0.0418, + "step": 13543 + }, + { + "epoch": 4.11, + "learning_rate": 2.3149826622920255e-05, + "loss": 0.0105, + "step": 13544 + }, + { + "epoch": 4.11, + "learning_rate": 2.3134399241502565e-05, + "loss": 0.0603, + "step": 13545 + }, + { + "epoch": 4.11, + "learning_rate": 2.3118976572769904e-05, + "loss": 0.0184, + "step": 13546 + }, + { + "epoch": 4.11, + "learning_rate": 2.310355861729519e-05, + "loss": 0.0244, + "step": 13547 + }, + { + "epoch": 4.11, + "learning_rate": 2.3088145375651123e-05, + "loss": 0.0278, + "step": 13548 + }, + { + "epoch": 4.11, + "learning_rate": 2.3072736848410323e-05, + "loss": 0.0173, + "step": 13549 + }, + { + "epoch": 4.11, + "learning_rate": 2.30573330361451e-05, + "loss": 0.0253, + "step": 13550 + }, + { + "epoch": 4.11, + "learning_rate": 2.3041933939427725e-05, + "loss": 0.0127, + "step": 13551 + }, + { + "epoch": 4.11, + "learning_rate": 2.3026539558830215e-05, + "loss": 0.0382, + "step": 13552 + }, + { + "epoch": 4.12, + "learning_rate": 2.3011149894924374e-05, + "loss": 0.0193, + "step": 13553 + }, + { + "epoch": 4.12, + "learning_rate": 2.2995764948281962e-05, + "loss": 0.024, + "step": 13554 + }, + { + "epoch": 4.12, + "learning_rate": 2.298038471947441e-05, + "loss": 0.0094, + "step": 13555 + }, + { + "epoch": 4.12, + "learning_rate": 2.296500920907312e-05, + "loss": 0.0119, + "step": 13556 + }, + { + "epoch": 4.12, + "learning_rate": 2.2949638417649207e-05, + "loss": 0.0209, + "step": 13557 + }, + { + "epoch": 4.12, + "learning_rate": 2.2934272345773613e-05, + "loss": 0.0036, + "step": 13558 + }, + { + "epoch": 4.12, + "learning_rate": 2.2918910994017232e-05, + "loss": 0.0174, + "step": 13559 + }, + { + "epoch": 4.12, + "learning_rate": 2.2903554362950605e-05, + "loss": 0.018, + "step": 13560 + }, + { + "epoch": 4.12, + "learning_rate": 2.288820245314426e-05, + "loss": 0.0284, + "step": 13561 + }, + { + "epoch": 4.12, + "learning_rate": 2.2872855265168444e-05, + "loss": 0.0221, + "step": 13562 + }, + { + "epoch": 4.12, + "learning_rate": 2.2857512799593263e-05, + "loss": 0.0171, + "step": 13563 + }, + { + "epoch": 4.12, + "learning_rate": 2.2842175056988598e-05, + "loss": 0.0301, + "step": 13564 + }, + { + "epoch": 4.12, + "learning_rate": 2.2826842037924275e-05, + "loss": 0.0199, + "step": 13565 + }, + { + "epoch": 4.12, + "learning_rate": 2.2811513742969805e-05, + "loss": 0.0312, + "step": 13566 + }, + { + "epoch": 4.12, + "learning_rate": 2.279619017269465e-05, + "loss": 0.0266, + "step": 13567 + }, + { + "epoch": 4.12, + "learning_rate": 2.2780871327668022e-05, + "loss": 0.0078, + "step": 13568 + }, + { + "epoch": 4.12, + "learning_rate": 2.276555720845891e-05, + "loss": 0.0253, + "step": 13569 + }, + { + "epoch": 4.12, + "learning_rate": 2.2750247815636285e-05, + "loss": 0.0175, + "step": 13570 + }, + { + "epoch": 4.12, + "learning_rate": 2.2734943149768753e-05, + "loss": 0.0301, + "step": 13571 + }, + { + "epoch": 4.12, + "learning_rate": 2.2719643211424942e-05, + "loss": 0.0073, + "step": 13572 + }, + { + "epoch": 4.12, + "learning_rate": 2.2704348001173083e-05, + "loss": 0.02, + "step": 13573 + }, + { + "epoch": 4.12, + "learning_rate": 2.268905751958144e-05, + "loss": 0.015, + "step": 13574 + }, + { + "epoch": 4.12, + "learning_rate": 2.2673771767217934e-05, + "loss": 0.0116, + "step": 13575 + }, + { + "epoch": 4.12, + "learning_rate": 2.265849074465042e-05, + "loss": 0.0339, + "step": 13576 + }, + { + "epoch": 4.12, + "learning_rate": 2.264321445244662e-05, + "loss": 0.0488, + "step": 13577 + }, + { + "epoch": 4.12, + "learning_rate": 2.2627942891173873e-05, + "loss": 0.0106, + "step": 13578 + }, + { + "epoch": 4.12, + "learning_rate": 2.2612676061399564e-05, + "loss": 0.0186, + "step": 13579 + }, + { + "epoch": 4.12, + "learning_rate": 2.2597413963690747e-05, + "loss": 0.0256, + "step": 13580 + }, + { + "epoch": 4.12, + "learning_rate": 2.2582156598614415e-05, + "loss": 0.0328, + "step": 13581 + }, + { + "epoch": 4.12, + "learning_rate": 2.256690396673733e-05, + "loss": 0.0458, + "step": 13582 + }, + { + "epoch": 4.12, + "learning_rate": 2.255165606862603e-05, + "loss": 0.0174, + "step": 13583 + }, + { + "epoch": 4.12, + "learning_rate": 2.2536412904847e-05, + "loss": 0.0144, + "step": 13584 + }, + { + "epoch": 4.12, + "learning_rate": 2.2521174475966413e-05, + "loss": 0.0143, + "step": 13585 + }, + { + "epoch": 4.13, + "learning_rate": 2.2505940782550396e-05, + "loss": 0.0313, + "step": 13586 + }, + { + "epoch": 4.13, + "learning_rate": 2.2490711825164814e-05, + "loss": 0.0215, + "step": 13587 + }, + { + "epoch": 4.13, + "learning_rate": 2.2475487604375382e-05, + "loss": 0.029, + "step": 13588 + }, + { + "epoch": 4.13, + "learning_rate": 2.246026812074756e-05, + "loss": 0.0269, + "step": 13589 + }, + { + "epoch": 4.13, + "learning_rate": 2.2445053374846815e-05, + "loss": 0.0171, + "step": 13590 + }, + { + "epoch": 4.13, + "learning_rate": 2.2429843367238264e-05, + "loss": 0.021, + "step": 13591 + }, + { + "epoch": 4.13, + "learning_rate": 2.2414638098486955e-05, + "loss": 0.0115, + "step": 13592 + }, + { + "epoch": 4.13, + "learning_rate": 2.2399437569157713e-05, + "loss": 0.0265, + "step": 13593 + }, + { + "epoch": 4.13, + "learning_rate": 2.2384241779815127e-05, + "loss": 0.0478, + "step": 13594 + }, + { + "epoch": 4.13, + "learning_rate": 2.2369050731023774e-05, + "loss": 0.0376, + "step": 13595 + }, + { + "epoch": 4.13, + "learning_rate": 2.235386442334787e-05, + "loss": 0.0082, + "step": 13596 + }, + { + "epoch": 4.13, + "learning_rate": 2.233868285735167e-05, + "loss": 0.0368, + "step": 13597 + }, + { + "epoch": 4.13, + "learning_rate": 2.2323506033598965e-05, + "loss": 0.0472, + "step": 13598 + }, + { + "epoch": 4.13, + "learning_rate": 2.230833395265359e-05, + "loss": 0.0397, + "step": 13599 + }, + { + "epoch": 4.13, + "learning_rate": 2.2293166615079194e-05, + "loss": 0.0535, + "step": 13600 + }, + { + "epoch": 4.13, + "learning_rate": 2.227800402143913e-05, + "loss": 0.0347, + "step": 13601 + }, + { + "epoch": 4.13, + "learning_rate": 2.226284617229674e-05, + "loss": 0.0061, + "step": 13602 + }, + { + "epoch": 4.13, + "learning_rate": 2.2247693068214963e-05, + "loss": 0.0186, + "step": 13603 + }, + { + "epoch": 4.13, + "learning_rate": 2.2232544709756796e-05, + "loss": 0.0117, + "step": 13604 + }, + { + "epoch": 4.13, + "learning_rate": 2.221740109748487e-05, + "loss": 0.026, + "step": 13605 + }, + { + "epoch": 4.13, + "learning_rate": 2.2202262231961824e-05, + "loss": 0.0216, + "step": 13606 + }, + { + "epoch": 4.13, + "learning_rate": 2.2187128113749958e-05, + "loss": 0.0216, + "step": 13607 + }, + { + "epoch": 4.13, + "learning_rate": 2.217199874341145e-05, + "loss": 0.0135, + "step": 13608 + }, + { + "epoch": 4.13, + "learning_rate": 2.2156874121508374e-05, + "loss": 0.02, + "step": 13609 + }, + { + "epoch": 4.13, + "learning_rate": 2.214175424860249e-05, + "loss": 0.0297, + "step": 13610 + }, + { + "epoch": 4.13, + "learning_rate": 2.2126639125255518e-05, + "loss": 0.0322, + "step": 13611 + }, + { + "epoch": 4.13, + "learning_rate": 2.211152875202893e-05, + "loss": 0.0199, + "step": 13612 + }, + { + "epoch": 4.13, + "learning_rate": 2.209642312948402e-05, + "loss": 0.0402, + "step": 13613 + }, + { + "epoch": 4.13, + "learning_rate": 2.2081322258181878e-05, + "loss": 0.0322, + "step": 13614 + }, + { + "epoch": 4.13, + "learning_rate": 2.206622613868352e-05, + "loss": 0.0046, + "step": 13615 + }, + { + "epoch": 4.13, + "learning_rate": 2.2051134771549677e-05, + "loss": 0.0333, + "step": 13616 + }, + { + "epoch": 4.13, + "learning_rate": 2.2036048157340973e-05, + "loss": 0.0475, + "step": 13617 + }, + { + "epoch": 4.13, + "learning_rate": 2.2020966296617892e-05, + "loss": 0.0466, + "step": 13618 + }, + { + "epoch": 4.14, + "learning_rate": 2.2005889189940534e-05, + "loss": 0.0154, + "step": 13619 + }, + { + "epoch": 4.14, + "learning_rate": 2.19908168378691e-05, + "loss": 0.048, + "step": 13620 + }, + { + "epoch": 4.14, + "learning_rate": 2.1975749240963416e-05, + "loss": 0.0253, + "step": 13621 + }, + { + "epoch": 4.14, + "learning_rate": 2.196068639978324e-05, + "loss": 0.0205, + "step": 13622 + }, + { + "epoch": 4.14, + "learning_rate": 2.194562831488808e-05, + "loss": 0.0331, + "step": 13623 + }, + { + "epoch": 4.14, + "learning_rate": 2.1930574986837306e-05, + "loss": 0.0255, + "step": 13624 + }, + { + "epoch": 4.14, + "learning_rate": 2.191552641619012e-05, + "loss": 0.0415, + "step": 13625 + }, + { + "epoch": 4.14, + "learning_rate": 2.190048260350551e-05, + "loss": 0.0369, + "step": 13626 + }, + { + "epoch": 4.14, + "learning_rate": 2.188544354934234e-05, + "loss": 0.0301, + "step": 13627 + }, + { + "epoch": 4.14, + "learning_rate": 2.187040925425925e-05, + "loss": 0.028, + "step": 13628 + }, + { + "epoch": 4.14, + "learning_rate": 2.185537971881473e-05, + "loss": 0.0187, + "step": 13629 + }, + { + "epoch": 4.14, + "learning_rate": 2.1840354943567028e-05, + "loss": 0.0216, + "step": 13630 + }, + { + "epoch": 4.14, + "learning_rate": 2.1825334929074352e-05, + "loss": 0.0173, + "step": 13631 + }, + { + "epoch": 4.14, + "learning_rate": 2.1810319675894584e-05, + "loss": 0.0226, + "step": 13632 + }, + { + "epoch": 4.14, + "learning_rate": 2.1795309184585565e-05, + "loss": 0.021, + "step": 13633 + }, + { + "epoch": 4.14, + "learning_rate": 2.1780303455704844e-05, + "loss": 0.0106, + "step": 13634 + }, + { + "epoch": 4.14, + "learning_rate": 2.1765302489809818e-05, + "loss": 0.0333, + "step": 13635 + }, + { + "epoch": 4.14, + "learning_rate": 2.1750306287457782e-05, + "loss": 0.0332, + "step": 13636 + }, + { + "epoch": 4.14, + "learning_rate": 2.173531484920576e-05, + "loss": 0.0341, + "step": 13637 + }, + { + "epoch": 4.14, + "learning_rate": 2.1720328175610722e-05, + "loss": 0.0244, + "step": 13638 + }, + { + "epoch": 4.14, + "learning_rate": 2.1705346267229245e-05, + "loss": 0.0092, + "step": 13639 + }, + { + "epoch": 4.14, + "learning_rate": 2.169036912461793e-05, + "loss": 0.0216, + "step": 13640 + }, + { + "epoch": 4.14, + "learning_rate": 2.167539674833318e-05, + "loss": 0.0144, + "step": 13641 + }, + { + "epoch": 4.14, + "learning_rate": 2.1660429138931085e-05, + "loss": 0.0204, + "step": 13642 + }, + { + "epoch": 4.14, + "learning_rate": 2.1645466296967768e-05, + "loss": 0.0151, + "step": 13643 + }, + { + "epoch": 4.14, + "learning_rate": 2.1630508222998917e-05, + "loss": 0.0254, + "step": 13644 + }, + { + "epoch": 4.14, + "learning_rate": 2.1615554917580286e-05, + "loss": 0.0407, + "step": 13645 + }, + { + "epoch": 4.14, + "learning_rate": 2.1600606381267267e-05, + "loss": 0.0216, + "step": 13646 + }, + { + "epoch": 4.14, + "learning_rate": 2.158566261461523e-05, + "loss": 0.0256, + "step": 13647 + }, + { + "epoch": 4.14, + "learning_rate": 2.1570723618179247e-05, + "loss": 0.0432, + "step": 13648 + }, + { + "epoch": 4.14, + "learning_rate": 2.155578939251423e-05, + "loss": 0.0153, + "step": 13649 + }, + { + "epoch": 4.14, + "learning_rate": 2.154085993817501e-05, + "loss": 0.0163, + "step": 13650 + }, + { + "epoch": 4.14, + "learning_rate": 2.1525935255716115e-05, + "loss": 0.025, + "step": 13651 + }, + { + "epoch": 4.15, + "learning_rate": 2.1511015345692e-05, + "loss": 0.0135, + "step": 13652 + }, + { + "epoch": 4.15, + "learning_rate": 2.1496100208656892e-05, + "loss": 0.0188, + "step": 13653 + }, + { + "epoch": 4.15, + "learning_rate": 2.148118984516481e-05, + "loss": 0.0298, + "step": 13654 + }, + { + "epoch": 4.15, + "learning_rate": 2.146628425576963e-05, + "loss": 0.0518, + "step": 13655 + }, + { + "epoch": 4.15, + "learning_rate": 2.1451383441025088e-05, + "loss": 0.018, + "step": 13656 + }, + { + "epoch": 4.15, + "learning_rate": 2.1436487401484647e-05, + "loss": 0.0136, + "step": 13657 + }, + { + "epoch": 4.15, + "learning_rate": 2.1421596137701747e-05, + "loss": 0.0059, + "step": 13658 + }, + { + "epoch": 4.15, + "learning_rate": 2.1406709650229474e-05, + "loss": 0.041, + "step": 13659 + }, + { + "epoch": 4.15, + "learning_rate": 2.139182793962081e-05, + "loss": 0.0442, + "step": 13660 + }, + { + "epoch": 4.15, + "learning_rate": 2.137695100642864e-05, + "loss": 0.0226, + "step": 13661 + }, + { + "epoch": 4.15, + "learning_rate": 2.1362078851205523e-05, + "loss": 0.0373, + "step": 13662 + }, + { + "epoch": 4.15, + "learning_rate": 2.1347211474503982e-05, + "loss": 0.0186, + "step": 13663 + }, + { + "epoch": 4.15, + "learning_rate": 2.1332348876876266e-05, + "loss": 0.0169, + "step": 13664 + }, + { + "epoch": 4.15, + "learning_rate": 2.1317491058874427e-05, + "loss": 0.0215, + "step": 13665 + }, + { + "epoch": 4.15, + "learning_rate": 2.130263802105049e-05, + "loss": 0.027, + "step": 13666 + }, + { + "epoch": 4.15, + "learning_rate": 2.1287789763956098e-05, + "loss": 0.026, + "step": 13667 + }, + { + "epoch": 4.15, + "learning_rate": 2.1272946288142943e-05, + "loss": 0.0443, + "step": 13668 + }, + { + "epoch": 4.15, + "learning_rate": 2.125810759416227e-05, + "loss": 0.039, + "step": 13669 + }, + { + "epoch": 4.15, + "learning_rate": 2.124327368256542e-05, + "loss": 0.0437, + "step": 13670 + }, + { + "epoch": 4.15, + "learning_rate": 2.1228444553903334e-05, + "loss": 0.0098, + "step": 13671 + }, + { + "epoch": 4.15, + "learning_rate": 2.121362020872694e-05, + "loss": 0.0217, + "step": 13672 + }, + { + "epoch": 4.15, + "learning_rate": 2.1198800647586894e-05, + "loss": 0.0265, + "step": 13673 + }, + { + "epoch": 4.15, + "learning_rate": 2.1183985871033675e-05, + "loss": 0.0292, + "step": 13674 + }, + { + "epoch": 4.15, + "learning_rate": 2.116917587961766e-05, + "loss": 0.0225, + "step": 13675 + }, + { + "epoch": 4.15, + "learning_rate": 2.1154370673888926e-05, + "loss": 0.0387, + "step": 13676 + }, + { + "epoch": 4.15, + "learning_rate": 2.113957025439753e-05, + "loss": 0.0269, + "step": 13677 + }, + { + "epoch": 4.15, + "learning_rate": 2.1124774621693168e-05, + "loss": 0.0335, + "step": 13678 + }, + { + "epoch": 4.15, + "learning_rate": 2.1109983776325584e-05, + "loss": 0.0119, + "step": 13679 + }, + { + "epoch": 4.15, + "learning_rate": 2.109519771884407e-05, + "loss": 0.0252, + "step": 13680 + }, + { + "epoch": 4.15, + "learning_rate": 2.1080416449797954e-05, + "loss": 0.0197, + "step": 13681 + }, + { + "epoch": 4.15, + "learning_rate": 2.1065639969736346e-05, + "loss": 0.0157, + "step": 13682 + }, + { + "epoch": 4.15, + "learning_rate": 2.1050868279208078e-05, + "loss": 0.0212, + "step": 13683 + }, + { + "epoch": 4.15, + "learning_rate": 2.103610137876199e-05, + "loss": 0.0485, + "step": 13684 + }, + { + "epoch": 4.16, + "learning_rate": 2.1021339268946474e-05, + "loss": 0.0129, + "step": 13685 + }, + { + "epoch": 4.16, + "learning_rate": 2.1006581950310015e-05, + "loss": 0.0352, + "step": 13686 + }, + { + "epoch": 4.16, + "learning_rate": 2.099182942340072e-05, + "loss": 0.0264, + "step": 13687 + }, + { + "epoch": 4.16, + "learning_rate": 2.0977081688766702e-05, + "loss": 0.0207, + "step": 13688 + }, + { + "epoch": 4.16, + "learning_rate": 2.096233874695574e-05, + "loss": 0.0115, + "step": 13689 + }, + { + "epoch": 4.16, + "learning_rate": 2.094760059851544e-05, + "loss": 0.0155, + "step": 13690 + }, + { + "epoch": 4.16, + "learning_rate": 2.093286724399339e-05, + "loss": 0.0485, + "step": 13691 + }, + { + "epoch": 4.16, + "learning_rate": 2.0918138683936797e-05, + "loss": 0.0326, + "step": 13692 + }, + { + "epoch": 4.16, + "learning_rate": 2.0903414918892843e-05, + "loss": 0.028, + "step": 13693 + }, + { + "epoch": 4.16, + "learning_rate": 2.0888695949408468e-05, + "loss": 0.0069, + "step": 13694 + }, + { + "epoch": 4.16, + "learning_rate": 2.0873981776030422e-05, + "loss": 0.0371, + "step": 13695 + }, + { + "epoch": 4.16, + "learning_rate": 2.0859272399305255e-05, + "loss": 0.0336, + "step": 13696 + }, + { + "epoch": 4.16, + "learning_rate": 2.084456781977944e-05, + "loss": 0.0597, + "step": 13697 + }, + { + "epoch": 4.16, + "learning_rate": 2.0829868037999176e-05, + "loss": 0.0452, + "step": 13698 + }, + { + "epoch": 4.16, + "learning_rate": 2.081517305451054e-05, + "loss": 0.0191, + "step": 13699 + }, + { + "epoch": 4.16, + "learning_rate": 2.0800482869859404e-05, + "loss": 0.0316, + "step": 13700 + }, + { + "epoch": 4.16, + "learning_rate": 2.078579748459142e-05, + "loss": 0.0386, + "step": 13701 + }, + { + "epoch": 4.16, + "learning_rate": 2.077111689925216e-05, + "loss": 0.0296, + "step": 13702 + }, + { + "epoch": 4.16, + "learning_rate": 2.0756441114386936e-05, + "loss": 0.0315, + "step": 13703 + }, + { + "epoch": 4.16, + "learning_rate": 2.074177013054094e-05, + "loss": 0.0272, + "step": 13704 + }, + { + "epoch": 4.16, + "learning_rate": 2.0727103948259143e-05, + "loss": 0.0488, + "step": 13705 + }, + { + "epoch": 4.16, + "learning_rate": 2.071244256808631e-05, + "loss": 0.0355, + "step": 13706 + }, + { + "epoch": 4.16, + "learning_rate": 2.0697785990567145e-05, + "loss": 0.0389, + "step": 13707 + }, + { + "epoch": 4.16, + "learning_rate": 2.0683134216246005e-05, + "loss": 0.0093, + "step": 13708 + }, + { + "epoch": 4.16, + "learning_rate": 2.0668487245667288e-05, + "loss": 0.0216, + "step": 13709 + }, + { + "epoch": 4.16, + "learning_rate": 2.0653845079374938e-05, + "loss": 0.017, + "step": 13710 + }, + { + "epoch": 4.16, + "learning_rate": 2.063920771791297e-05, + "loss": 0.0094, + "step": 13711 + }, + { + "epoch": 4.16, + "learning_rate": 2.0624575161825052e-05, + "loss": 0.035, + "step": 13712 + }, + { + "epoch": 4.16, + "learning_rate": 2.0609947411654813e-05, + "loss": 0.032, + "step": 13713 + }, + { + "epoch": 4.16, + "learning_rate": 2.0595324467945606e-05, + "loss": 0.0185, + "step": 13714 + }, + { + "epoch": 4.16, + "learning_rate": 2.0580706331240576e-05, + "loss": 0.0547, + "step": 13715 + }, + { + "epoch": 4.16, + "learning_rate": 2.0566093002082824e-05, + "loss": 0.0383, + "step": 13716 + }, + { + "epoch": 4.16, + "learning_rate": 2.0551484481015114e-05, + "loss": 0.038, + "step": 13717 + }, + { + "epoch": 4.17, + "learning_rate": 2.0536880768580195e-05, + "loss": 0.0572, + "step": 13718 + }, + { + "epoch": 4.17, + "learning_rate": 2.05222818653205e-05, + "loss": 0.0264, + "step": 13719 + }, + { + "epoch": 4.17, + "learning_rate": 2.0507687771778347e-05, + "loss": 0.0366, + "step": 13720 + }, + { + "epoch": 4.17, + "learning_rate": 2.0493098488495817e-05, + "loss": 0.049, + "step": 13721 + }, + { + "epoch": 4.17, + "learning_rate": 2.0478514016014912e-05, + "loss": 0.0669, + "step": 13722 + }, + { + "epoch": 4.17, + "learning_rate": 2.0463934354877415e-05, + "loss": 0.0254, + "step": 13723 + }, + { + "epoch": 4.17, + "learning_rate": 2.0449359505624906e-05, + "loss": 0.029, + "step": 13724 + }, + { + "epoch": 4.17, + "learning_rate": 2.043478946879879e-05, + "loss": 0.0099, + "step": 13725 + }, + { + "epoch": 4.17, + "learning_rate": 2.0420224244940265e-05, + "loss": 0.0249, + "step": 13726 + }, + { + "epoch": 4.17, + "learning_rate": 2.0405663834590446e-05, + "loss": 0.0289, + "step": 13727 + }, + { + "epoch": 4.17, + "learning_rate": 2.0391108238290155e-05, + "loss": 0.0063, + "step": 13728 + }, + { + "epoch": 4.17, + "learning_rate": 2.037655745658014e-05, + "loss": 0.0061, + "step": 13729 + }, + { + "epoch": 4.17, + "learning_rate": 2.036201149000092e-05, + "loss": 0.0164, + "step": 13730 + }, + { + "epoch": 4.17, + "learning_rate": 2.034747033909276e-05, + "loss": 0.0219, + "step": 13731 + }, + { + "epoch": 4.17, + "learning_rate": 2.033293400439591e-05, + "loss": 0.0477, + "step": 13732 + }, + { + "epoch": 4.17, + "learning_rate": 2.0318402486450297e-05, + "loss": 0.0392, + "step": 13733 + }, + { + "epoch": 4.17, + "learning_rate": 2.0303875785795798e-05, + "loss": 0.0301, + "step": 13734 + }, + { + "epoch": 4.17, + "learning_rate": 2.0289353902971915e-05, + "loss": 0.0108, + "step": 13735 + }, + { + "epoch": 4.17, + "learning_rate": 2.0274836838518206e-05, + "loss": 0.0366, + "step": 13736 + }, + { + "epoch": 4.17, + "learning_rate": 2.0260324592973847e-05, + "loss": 0.0206, + "step": 13737 + }, + { + "epoch": 4.17, + "learning_rate": 2.0245817166877998e-05, + "loss": 0.0055, + "step": 13738 + }, + { + "epoch": 4.17, + "learning_rate": 2.0231314560769525e-05, + "loss": 0.0129, + "step": 13739 + }, + { + "epoch": 4.17, + "learning_rate": 2.0216816775187196e-05, + "loss": 0.0185, + "step": 13740 + }, + { + "epoch": 4.17, + "learning_rate": 2.0202323810669546e-05, + "loss": 0.0164, + "step": 13741 + }, + { + "epoch": 4.17, + "learning_rate": 2.01878356677549e-05, + "loss": 0.0192, + "step": 13742 + }, + { + "epoch": 4.17, + "learning_rate": 2.017335234698152e-05, + "loss": 0.0242, + "step": 13743 + }, + { + "epoch": 4.17, + "learning_rate": 2.0158873848887347e-05, + "loss": 0.0433, + "step": 13744 + }, + { + "epoch": 4.17, + "learning_rate": 2.01444001740103e-05, + "loss": 0.0331, + "step": 13745 + }, + { + "epoch": 4.17, + "learning_rate": 2.0129931322887984e-05, + "loss": 0.058, + "step": 13746 + }, + { + "epoch": 4.17, + "learning_rate": 2.011546729605783e-05, + "loss": 0.0132, + "step": 13747 + }, + { + "epoch": 4.17, + "learning_rate": 2.010100809405723e-05, + "loss": 0.0548, + "step": 13748 + }, + { + "epoch": 4.17, + "learning_rate": 2.008655371742322e-05, + "loss": 0.0352, + "step": 13749 + }, + { + "epoch": 4.17, + "learning_rate": 2.0072104166692836e-05, + "loss": 0.026, + "step": 13750 + }, + { + "epoch": 4.18, + "learning_rate": 2.0057659442402717e-05, + "loss": 0.0237, + "step": 13751 + }, + { + "epoch": 4.18, + "learning_rate": 2.0043219545089517e-05, + "loss": 0.0087, + "step": 13752 + }, + { + "epoch": 4.18, + "learning_rate": 2.0028784475289587e-05, + "loss": 0.0274, + "step": 13753 + }, + { + "epoch": 4.18, + "learning_rate": 2.0014354233539205e-05, + "loss": 0.0124, + "step": 13754 + }, + { + "epoch": 4.18, + "learning_rate": 1.9999928820374382e-05, + "loss": 0.0253, + "step": 13755 + }, + { + "epoch": 4.18, + "learning_rate": 1.9985508236330966e-05, + "loss": 0.0107, + "step": 13756 + }, + { + "epoch": 4.18, + "learning_rate": 1.997109248194467e-05, + "loss": 0.0208, + "step": 13757 + }, + { + "epoch": 4.18, + "learning_rate": 1.9956681557750958e-05, + "loss": 0.0183, + "step": 13758 + }, + { + "epoch": 4.18, + "learning_rate": 1.9942275464285195e-05, + "loss": 0.0372, + "step": 13759 + }, + { + "epoch": 4.18, + "learning_rate": 1.9927874202082524e-05, + "loss": 0.0431, + "step": 13760 + }, + { + "epoch": 4.18, + "learning_rate": 1.9913477771677876e-05, + "loss": 0.0192, + "step": 13761 + }, + { + "epoch": 4.18, + "learning_rate": 1.9899086173606032e-05, + "loss": 0.0144, + "step": 13762 + }, + { + "epoch": 4.18, + "learning_rate": 1.9884699408401628e-05, + "loss": 0.0292, + "step": 13763 + }, + { + "epoch": 4.18, + "learning_rate": 1.9870317476599102e-05, + "loss": 0.0152, + "step": 13764 + }, + { + "epoch": 4.18, + "learning_rate": 1.9855940378732676e-05, + "loss": 0.0833, + "step": 13765 + }, + { + "epoch": 4.18, + "learning_rate": 1.9841568115336427e-05, + "loss": 0.0106, + "step": 13766 + }, + { + "epoch": 4.18, + "learning_rate": 1.9827200686944207e-05, + "loss": 0.0261, + "step": 13767 + }, + { + "epoch": 4.18, + "learning_rate": 1.981283809408979e-05, + "loss": 0.0177, + "step": 13768 + }, + { + "epoch": 4.18, + "learning_rate": 1.9798480337306614e-05, + "loss": 0.0197, + "step": 13769 + }, + { + "epoch": 4.18, + "learning_rate": 1.9784127417128127e-05, + "loss": 0.019, + "step": 13770 + }, + { + "epoch": 4.18, + "learning_rate": 1.976977933408746e-05, + "loss": 0.0341, + "step": 13771 + }, + { + "epoch": 4.18, + "learning_rate": 1.9755436088717542e-05, + "loss": 0.0362, + "step": 13772 + }, + { + "epoch": 4.18, + "learning_rate": 1.9741097681551276e-05, + "loss": 0.034, + "step": 13773 + }, + { + "epoch": 4.18, + "learning_rate": 1.9726764113121223e-05, + "loss": 0.0152, + "step": 13774 + }, + { + "epoch": 4.18, + "learning_rate": 1.9712435383959923e-05, + "loss": 0.0285, + "step": 13775 + }, + { + "epoch": 4.18, + "learning_rate": 1.96981114945995e-05, + "loss": 0.053, + "step": 13776 + }, + { + "epoch": 4.18, + "learning_rate": 1.9683792445572176e-05, + "loss": 0.0577, + "step": 13777 + }, + { + "epoch": 4.18, + "learning_rate": 1.966947823740978e-05, + "loss": 0.0285, + "step": 13778 + }, + { + "epoch": 4.18, + "learning_rate": 1.9655168870644116e-05, + "loss": 0.0295, + "step": 13779 + }, + { + "epoch": 4.18, + "learning_rate": 1.9640864345806695e-05, + "loss": 0.0181, + "step": 13780 + }, + { + "epoch": 4.18, + "learning_rate": 1.962656466342884e-05, + "loss": 0.0291, + "step": 13781 + }, + { + "epoch": 4.18, + "learning_rate": 1.9612269824041844e-05, + "loss": 0.008, + "step": 13782 + }, + { + "epoch": 4.18, + "learning_rate": 1.9597979828176612e-05, + "loss": 0.0324, + "step": 13783 + }, + { + "epoch": 4.19, + "learning_rate": 1.9583694676364076e-05, + "loss": 0.0248, + "step": 13784 + }, + { + "epoch": 4.19, + "learning_rate": 1.956941436913484e-05, + "loss": 0.0121, + "step": 13785 + }, + { + "epoch": 4.19, + "learning_rate": 1.955513890701933e-05, + "loss": 0.0328, + "step": 13786 + }, + { + "epoch": 4.19, + "learning_rate": 1.9540868290547924e-05, + "loss": 0.0154, + "step": 13787 + }, + { + "epoch": 4.19, + "learning_rate": 1.952660252025066e-05, + "loss": 0.0219, + "step": 13788 + }, + { + "epoch": 4.19, + "learning_rate": 1.9512341596657538e-05, + "loss": 0.0322, + "step": 13789 + }, + { + "epoch": 4.19, + "learning_rate": 1.9498085520298263e-05, + "loss": 0.0148, + "step": 13790 + }, + { + "epoch": 4.19, + "learning_rate": 1.9483834291702427e-05, + "loss": 0.0296, + "step": 13791 + }, + { + "epoch": 4.19, + "learning_rate": 1.9469587911399376e-05, + "loss": 0.0335, + "step": 13792 + }, + { + "epoch": 4.19, + "learning_rate": 1.9455346379918386e-05, + "loss": 0.0301, + "step": 13793 + }, + { + "epoch": 4.19, + "learning_rate": 1.9441109697788433e-05, + "loss": 0.0258, + "step": 13794 + }, + { + "epoch": 4.19, + "learning_rate": 1.9426877865538427e-05, + "loss": 0.038, + "step": 13795 + }, + { + "epoch": 4.19, + "learning_rate": 1.941265088369701e-05, + "loss": 0.0415, + "step": 13796 + }, + { + "epoch": 4.19, + "learning_rate": 1.9398428752792618e-05, + "loss": 0.0367, + "step": 13797 + }, + { + "epoch": 4.19, + "learning_rate": 1.9384211473353666e-05, + "loss": 0.0147, + "step": 13798 + }, + { + "epoch": 4.19, + "learning_rate": 1.9369999045908176e-05, + "loss": 0.0243, + "step": 13799 + }, + { + "epoch": 4.19, + "learning_rate": 1.9355791470984235e-05, + "loss": 0.0228, + "step": 13800 + }, + { + "epoch": 4.19, + "learning_rate": 1.9341588749109455e-05, + "loss": 0.0232, + "step": 13801 + }, + { + "epoch": 4.19, + "learning_rate": 1.9327390880811534e-05, + "loss": 0.0532, + "step": 13802 + }, + { + "epoch": 4.19, + "learning_rate": 1.9313197866617815e-05, + "loss": 0.0184, + "step": 13803 + }, + { + "epoch": 4.19, + "learning_rate": 1.9299009707055573e-05, + "loss": 0.0157, + "step": 13804 + }, + { + "epoch": 4.19, + "learning_rate": 1.928482640265186e-05, + "loss": 0.0269, + "step": 13805 + }, + { + "epoch": 4.19, + "learning_rate": 1.9270647953933533e-05, + "loss": 0.0214, + "step": 13806 + }, + { + "epoch": 4.19, + "learning_rate": 1.925647436142727e-05, + "loss": 0.0361, + "step": 13807 + }, + { + "epoch": 4.19, + "learning_rate": 1.9242305625659556e-05, + "loss": 0.0512, + "step": 13808 + }, + { + "epoch": 4.19, + "learning_rate": 1.922814174715676e-05, + "loss": 0.0243, + "step": 13809 + }, + { + "epoch": 4.19, + "learning_rate": 1.9213982726445004e-05, + "loss": 0.0201, + "step": 13810 + }, + { + "epoch": 4.19, + "learning_rate": 1.919982856405028e-05, + "loss": 0.0173, + "step": 13811 + }, + { + "epoch": 4.19, + "learning_rate": 1.918567926049835e-05, + "loss": 0.0305, + "step": 13812 + }, + { + "epoch": 4.19, + "learning_rate": 1.917153481631481e-05, + "loss": 0.0223, + "step": 13813 + }, + { + "epoch": 4.19, + "learning_rate": 1.9157395232025123e-05, + "loss": 0.0329, + "step": 13814 + }, + { + "epoch": 4.19, + "learning_rate": 1.914326050815449e-05, + "loss": 0.0244, + "step": 13815 + }, + { + "epoch": 4.19, + "learning_rate": 1.9129130645228054e-05, + "loss": 0.0019, + "step": 13816 + }, + { + "epoch": 4.2, + "learning_rate": 1.911500564377056e-05, + "loss": 0.0413, + "step": 13817 + }, + { + "epoch": 4.2, + "learning_rate": 1.910088550430684e-05, + "loss": 0.0216, + "step": 13818 + }, + { + "epoch": 4.2, + "learning_rate": 1.9086770227361325e-05, + "loss": 0.0233, + "step": 13819 + }, + { + "epoch": 4.2, + "learning_rate": 1.907265981345843e-05, + "loss": 0.0397, + "step": 13820 + }, + { + "epoch": 4.2, + "learning_rate": 1.9058554263122282e-05, + "loss": 0.007, + "step": 13821 + }, + { + "epoch": 4.2, + "learning_rate": 1.904445357687682e-05, + "loss": 0.0232, + "step": 13822 + }, + { + "epoch": 4.2, + "learning_rate": 1.903035775524593e-05, + "loss": 0.0415, + "step": 13823 + }, + { + "epoch": 4.2, + "learning_rate": 1.9016266798753133e-05, + "loss": 0.0244, + "step": 13824 + }, + { + "epoch": 4.2, + "learning_rate": 1.900218070792196e-05, + "loss": 0.0292, + "step": 13825 + }, + { + "epoch": 4.2, + "learning_rate": 1.8988099483275627e-05, + "loss": 0.0486, + "step": 13826 + }, + { + "epoch": 4.2, + "learning_rate": 1.8974023125337177e-05, + "loss": 0.0449, + "step": 13827 + }, + { + "epoch": 4.2, + "learning_rate": 1.8959951634629584e-05, + "loss": 0.0235, + "step": 13828 + }, + { + "epoch": 4.2, + "learning_rate": 1.8945885011675455e-05, + "loss": 0.0348, + "step": 13829 + }, + { + "epoch": 4.2, + "learning_rate": 1.893182325699744e-05, + "loss": 0.0318, + "step": 13830 + }, + { + "epoch": 4.2, + "learning_rate": 1.8917766371117827e-05, + "loss": 0.0259, + "step": 13831 + }, + { + "epoch": 4.2, + "learning_rate": 1.8903714354558808e-05, + "loss": 0.0381, + "step": 13832 + }, + { + "epoch": 4.2, + "learning_rate": 1.8889667207842318e-05, + "loss": 0.01, + "step": 13833 + }, + { + "epoch": 4.2, + "learning_rate": 1.887562493149024e-05, + "loss": 0.0231, + "step": 13834 + }, + { + "epoch": 4.2, + "learning_rate": 1.8861587526024157e-05, + "loss": 0.0139, + "step": 13835 + }, + { + "epoch": 4.2, + "learning_rate": 1.884755499196557e-05, + "loss": 0.017, + "step": 13836 + }, + { + "epoch": 4.2, + "learning_rate": 1.8833527329835706e-05, + "loss": 0.0338, + "step": 13837 + }, + { + "epoch": 4.2, + "learning_rate": 1.8819504540155637e-05, + "loss": 0.0111, + "step": 13838 + }, + { + "epoch": 4.2, + "learning_rate": 1.8805486623446315e-05, + "loss": 0.0146, + "step": 13839 + }, + { + "epoch": 4.2, + "learning_rate": 1.8791473580228415e-05, + "loss": 0.0317, + "step": 13840 + }, + { + "epoch": 4.2, + "learning_rate": 1.8777465411022563e-05, + "loss": 0.0456, + "step": 13841 + }, + { + "epoch": 4.2, + "learning_rate": 1.8763462116349005e-05, + "loss": 0.0187, + "step": 13842 + }, + { + "epoch": 4.2, + "learning_rate": 1.8749463696728024e-05, + "loss": 0.0304, + "step": 13843 + }, + { + "epoch": 4.2, + "learning_rate": 1.873547015267952e-05, + "loss": 0.0265, + "step": 13844 + }, + { + "epoch": 4.2, + "learning_rate": 1.8721481484723384e-05, + "loss": 0.0196, + "step": 13845 + }, + { + "epoch": 4.2, + "learning_rate": 1.870749769337931e-05, + "loss": 0.0219, + "step": 13846 + }, + { + "epoch": 4.2, + "learning_rate": 1.8693518779166617e-05, + "loss": 0.0308, + "step": 13847 + }, + { + "epoch": 4.2, + "learning_rate": 1.8679544742604674e-05, + "loss": 0.0131, + "step": 13848 + }, + { + "epoch": 4.2, + "learning_rate": 1.8665575584212516e-05, + "loss": 0.0313, + "step": 13849 + }, + { + "epoch": 4.21, + "learning_rate": 1.865161130450913e-05, + "loss": 0.0102, + "step": 13850 + }, + { + "epoch": 4.21, + "learning_rate": 1.8637651904013195e-05, + "loss": 0.005, + "step": 13851 + }, + { + "epoch": 4.21, + "learning_rate": 1.8623697383243253e-05, + "loss": 0.0422, + "step": 13852 + }, + { + "epoch": 4.21, + "learning_rate": 1.8609747742717734e-05, + "loss": 0.0368, + "step": 13853 + }, + { + "epoch": 4.21, + "learning_rate": 1.8595802982954743e-05, + "loss": 0.0172, + "step": 13854 + }, + { + "epoch": 4.21, + "learning_rate": 1.8581863104472384e-05, + "loss": 0.0365, + "step": 13855 + }, + { + "epoch": 4.21, + "learning_rate": 1.8567928107788404e-05, + "loss": 0.0302, + "step": 13856 + }, + { + "epoch": 4.21, + "learning_rate": 1.8553997993420495e-05, + "loss": 0.0289, + "step": 13857 + }, + { + "epoch": 4.21, + "learning_rate": 1.8540072761886054e-05, + "loss": 0.0327, + "step": 13858 + }, + { + "epoch": 4.21, + "learning_rate": 1.852615241370244e-05, + "loss": 0.0456, + "step": 13859 + }, + { + "epoch": 4.21, + "learning_rate": 1.8512236949386685e-05, + "loss": 0.034, + "step": 13860 + }, + { + "epoch": 4.21, + "learning_rate": 1.8498326369455775e-05, + "loss": 0.0369, + "step": 13861 + }, + { + "epoch": 4.21, + "learning_rate": 1.8484420674426413e-05, + "loss": 0.0383, + "step": 13862 + }, + { + "epoch": 4.21, + "learning_rate": 1.847051986481514e-05, + "loss": 0.0309, + "step": 13863 + }, + { + "epoch": 4.21, + "learning_rate": 1.845662394113836e-05, + "loss": 0.0219, + "step": 13864 + }, + { + "epoch": 4.21, + "learning_rate": 1.844273290391224e-05, + "loss": 0.0327, + "step": 13865 + }, + { + "epoch": 4.21, + "learning_rate": 1.842884675365283e-05, + "loss": 0.0071, + "step": 13866 + }, + { + "epoch": 4.21, + "learning_rate": 1.841496549087593e-05, + "loss": 0.0135, + "step": 13867 + }, + { + "epoch": 4.21, + "learning_rate": 1.840108911609716e-05, + "loss": 0.0188, + "step": 13868 + }, + { + "epoch": 4.21, + "learning_rate": 1.838721762983204e-05, + "loss": 0.004, + "step": 13869 + }, + { + "epoch": 4.21, + "learning_rate": 1.837335103259582e-05, + "loss": 0.0245, + "step": 13870 + }, + { + "epoch": 4.21, + "learning_rate": 1.835948932490363e-05, + "loss": 0.0099, + "step": 13871 + }, + { + "epoch": 4.21, + "learning_rate": 1.8345632507270404e-05, + "loss": 0.0145, + "step": 13872 + }, + { + "epoch": 4.21, + "learning_rate": 1.8331780580210835e-05, + "loss": 0.0357, + "step": 13873 + }, + { + "epoch": 4.21, + "learning_rate": 1.831793354423949e-05, + "loss": 0.0274, + "step": 13874 + }, + { + "epoch": 4.21, + "learning_rate": 1.8304091399870778e-05, + "loss": 0.0149, + "step": 13875 + }, + { + "epoch": 4.21, + "learning_rate": 1.829025414761885e-05, + "loss": 0.0352, + "step": 13876 + }, + { + "epoch": 4.21, + "learning_rate": 1.827642178799777e-05, + "loss": 0.0201, + "step": 13877 + }, + { + "epoch": 4.21, + "learning_rate": 1.8262594321521344e-05, + "loss": 0.0225, + "step": 13878 + }, + { + "epoch": 4.21, + "learning_rate": 1.82487717487032e-05, + "loss": 0.029, + "step": 13879 + }, + { + "epoch": 4.21, + "learning_rate": 1.8234954070056857e-05, + "loss": 0.0188, + "step": 13880 + }, + { + "epoch": 4.21, + "learning_rate": 1.8221141286095535e-05, + "loss": 0.0239, + "step": 13881 + }, + { + "epoch": 4.21, + "learning_rate": 1.8207333397332447e-05, + "loss": 0.0186, + "step": 13882 + }, + { + "epoch": 4.22, + "learning_rate": 1.8193530404280376e-05, + "loss": 0.0291, + "step": 13883 + }, + { + "epoch": 4.22, + "learning_rate": 1.8179732307452166e-05, + "loss": 0.0302, + "step": 13884 + }, + { + "epoch": 4.22, + "learning_rate": 1.8165939107360306e-05, + "loss": 0.0214, + "step": 13885 + }, + { + "epoch": 4.22, + "learning_rate": 1.81521508045172e-05, + "loss": 0.0153, + "step": 13886 + }, + { + "epoch": 4.22, + "learning_rate": 1.813836739943513e-05, + "loss": 0.016, + "step": 13887 + }, + { + "epoch": 4.22, + "learning_rate": 1.812458889262596e-05, + "loss": 0.0283, + "step": 13888 + }, + { + "epoch": 4.22, + "learning_rate": 1.811081528460163e-05, + "loss": 0.0259, + "step": 13889 + }, + { + "epoch": 4.22, + "learning_rate": 1.809704657587371e-05, + "loss": 0.0426, + "step": 13890 + }, + { + "epoch": 4.22, + "learning_rate": 1.8083282766953745e-05, + "loss": 0.0152, + "step": 13891 + }, + { + "epoch": 4.22, + "learning_rate": 1.806952385835298e-05, + "loss": 0.0028, + "step": 13892 + }, + { + "epoch": 4.22, + "learning_rate": 1.8055769850582475e-05, + "loss": 0.0436, + "step": 13893 + }, + { + "epoch": 4.22, + "learning_rate": 1.8042020744153253e-05, + "loss": 0.0245, + "step": 13894 + }, + { + "epoch": 4.22, + "learning_rate": 1.8028276539575955e-05, + "loss": 0.0293, + "step": 13895 + }, + { + "epoch": 4.22, + "learning_rate": 1.8014537237361203e-05, + "loss": 0.025, + "step": 13896 + }, + { + "epoch": 4.22, + "learning_rate": 1.8000802838019362e-05, + "loss": 0.0313, + "step": 13897 + }, + { + "epoch": 4.22, + "learning_rate": 1.7987073342060604e-05, + "loss": 0.0132, + "step": 13898 + }, + { + "epoch": 4.22, + "learning_rate": 1.7973348749994902e-05, + "loss": 0.0467, + "step": 13899 + }, + { + "epoch": 4.22, + "learning_rate": 1.7959629062332188e-05, + "loss": 0.0411, + "step": 13900 + }, + { + "epoch": 4.22, + "learning_rate": 1.7945914279581997e-05, + "loss": 0.0308, + "step": 13901 + }, + { + "epoch": 4.22, + "learning_rate": 1.7932204402253892e-05, + "loss": 0.0338, + "step": 13902 + }, + { + "epoch": 4.22, + "learning_rate": 1.7918499430857097e-05, + "loss": 0.0222, + "step": 13903 + }, + { + "epoch": 4.22, + "learning_rate": 1.7904799365900692e-05, + "loss": 0.0227, + "step": 13904 + }, + { + "epoch": 4.22, + "learning_rate": 1.789110420789365e-05, + "loss": 0.0292, + "step": 13905 + }, + { + "epoch": 4.22, + "learning_rate": 1.7877413957344662e-05, + "loss": 0.005, + "step": 13906 + }, + { + "epoch": 4.22, + "learning_rate": 1.7863728614762357e-05, + "loss": 0.0216, + "step": 13907 + }, + { + "epoch": 4.22, + "learning_rate": 1.785004818065498e-05, + "loss": 0.0201, + "step": 13908 + }, + { + "epoch": 4.22, + "learning_rate": 1.783637265553079e-05, + "loss": 0.0169, + "step": 13909 + }, + { + "epoch": 4.22, + "learning_rate": 1.782270203989783e-05, + "loss": 0.0076, + "step": 13910 + }, + { + "epoch": 4.22, + "learning_rate": 1.7809036334263843e-05, + "loss": 0.0509, + "step": 13911 + }, + { + "epoch": 4.22, + "learning_rate": 1.779537553913656e-05, + "loss": 0.0273, + "step": 13912 + }, + { + "epoch": 4.22, + "learning_rate": 1.7781719655023336e-05, + "loss": 0.0289, + "step": 13913 + }, + { + "epoch": 4.22, + "learning_rate": 1.7768068682431534e-05, + "loss": 0.0155, + "step": 13914 + }, + { + "epoch": 4.22, + "learning_rate": 1.7754422621868185e-05, + "loss": 0.0306, + "step": 13915 + }, + { + "epoch": 4.23, + "learning_rate": 1.774078147384025e-05, + "loss": 0.0291, + "step": 13916 + }, + { + "epoch": 4.23, + "learning_rate": 1.772714523885445e-05, + "loss": 0.0279, + "step": 13917 + }, + { + "epoch": 4.23, + "learning_rate": 1.771351391741727e-05, + "loss": 0.0441, + "step": 13918 + }, + { + "epoch": 4.23, + "learning_rate": 1.769988751003517e-05, + "loss": 0.0467, + "step": 13919 + }, + { + "epoch": 4.23, + "learning_rate": 1.7686266017214245e-05, + "loss": 0.0183, + "step": 13920 + }, + { + "epoch": 4.23, + "learning_rate": 1.7672649439460556e-05, + "loss": 0.0212, + "step": 13921 + }, + { + "epoch": 4.23, + "learning_rate": 1.76590377772799e-05, + "loss": 0.0353, + "step": 13922 + }, + { + "epoch": 4.23, + "learning_rate": 1.7645431031177887e-05, + "loss": 0.0336, + "step": 13923 + }, + { + "epoch": 4.23, + "learning_rate": 1.763182920165998e-05, + "loss": 0.0141, + "step": 13924 + }, + { + "epoch": 4.23, + "learning_rate": 1.761823228923146e-05, + "loss": 0.0145, + "step": 13925 + }, + { + "epoch": 4.23, + "learning_rate": 1.7604640294397387e-05, + "loss": 0.0088, + "step": 13926 + }, + { + "epoch": 4.23, + "learning_rate": 1.759105321766271e-05, + "loss": 0.0241, + "step": 13927 + }, + { + "epoch": 4.23, + "learning_rate": 1.757747105953212e-05, + "loss": 0.0068, + "step": 13928 + }, + { + "epoch": 4.23, + "learning_rate": 1.7563893820510124e-05, + "loss": 0.0125, + "step": 13929 + }, + { + "epoch": 4.23, + "learning_rate": 1.755032150110112e-05, + "loss": 0.0184, + "step": 13930 + }, + { + "epoch": 4.23, + "learning_rate": 1.7536754101809253e-05, + "loss": 0.0212, + "step": 13931 + }, + { + "epoch": 4.23, + "learning_rate": 1.7523191623138562e-05, + "loss": 0.0271, + "step": 13932 + }, + { + "epoch": 4.23, + "learning_rate": 1.750963406559281e-05, + "loss": 0.0257, + "step": 13933 + }, + { + "epoch": 4.23, + "learning_rate": 1.7496081429675597e-05, + "loss": 0.0184, + "step": 13934 + }, + { + "epoch": 4.23, + "learning_rate": 1.748253371589042e-05, + "loss": 0.0231, + "step": 13935 + }, + { + "epoch": 4.23, + "learning_rate": 1.746899092474049e-05, + "loss": 0.0087, + "step": 13936 + }, + { + "epoch": 4.23, + "learning_rate": 1.7455453056728924e-05, + "loss": 0.0049, + "step": 13937 + }, + { + "epoch": 4.23, + "learning_rate": 1.7441920112358603e-05, + "loss": 0.0215, + "step": 13938 + }, + { + "epoch": 4.23, + "learning_rate": 1.742839209213221e-05, + "loss": 0.0273, + "step": 13939 + }, + { + "epoch": 4.23, + "learning_rate": 1.741486899655227e-05, + "loss": 0.019, + "step": 13940 + }, + { + "epoch": 4.23, + "learning_rate": 1.7401350826121173e-05, + "loss": 0.045, + "step": 13941 + }, + { + "epoch": 4.23, + "learning_rate": 1.738783758134103e-05, + "loss": 0.0205, + "step": 13942 + }, + { + "epoch": 4.23, + "learning_rate": 1.7374329262713855e-05, + "loss": 0.0158, + "step": 13943 + }, + { + "epoch": 4.23, + "learning_rate": 1.736082587074143e-05, + "loss": 0.0399, + "step": 13944 + }, + { + "epoch": 4.23, + "learning_rate": 1.7347327405925337e-05, + "loss": 0.0137, + "step": 13945 + }, + { + "epoch": 4.23, + "learning_rate": 1.733383386876706e-05, + "loss": 0.0308, + "step": 13946 + }, + { + "epoch": 4.23, + "learning_rate": 1.732034525976778e-05, + "loss": 0.0203, + "step": 13947 + }, + { + "epoch": 4.24, + "learning_rate": 1.7306861579428648e-05, + "loss": 0.0272, + "step": 13948 + }, + { + "epoch": 4.24, + "learning_rate": 1.7293382828250446e-05, + "loss": 0.0241, + "step": 13949 + }, + { + "epoch": 4.24, + "learning_rate": 1.727990900673389e-05, + "loss": 0.0579, + "step": 13950 + }, + { + "epoch": 4.24, + "learning_rate": 1.726644011537956e-05, + "loss": 0.0134, + "step": 13951 + }, + { + "epoch": 4.24, + "learning_rate": 1.7252976154687708e-05, + "loss": 0.0344, + "step": 13952 + }, + { + "epoch": 4.24, + "learning_rate": 1.7239517125158587e-05, + "loss": 0.0303, + "step": 13953 + }, + { + "epoch": 4.24, + "learning_rate": 1.7226063027292008e-05, + "loss": 0.0328, + "step": 13954 + }, + { + "epoch": 4.24, + "learning_rate": 1.7212613861587878e-05, + "loss": 0.0202, + "step": 13955 + }, + { + "epoch": 4.24, + "learning_rate": 1.7199169628545706e-05, + "loss": 0.0314, + "step": 13956 + }, + { + "epoch": 4.24, + "learning_rate": 1.718573032866498e-05, + "loss": 0.0531, + "step": 13957 + }, + { + "epoch": 4.24, + "learning_rate": 1.7172295962444904e-05, + "loss": 0.0359, + "step": 13958 + }, + { + "epoch": 4.24, + "learning_rate": 1.715886653038449e-05, + "loss": 0.0115, + "step": 13959 + }, + { + "epoch": 4.24, + "learning_rate": 1.714544203298266e-05, + "loss": 0.0259, + "step": 13960 + }, + { + "epoch": 4.24, + "learning_rate": 1.7132022470738027e-05, + "loss": 0.0669, + "step": 13961 + }, + { + "epoch": 4.24, + "learning_rate": 1.7118607844149164e-05, + "loss": 0.0154, + "step": 13962 + }, + { + "epoch": 4.24, + "learning_rate": 1.7105198153714333e-05, + "loss": 0.0234, + "step": 13963 + }, + { + "epoch": 4.24, + "learning_rate": 1.709179339993169e-05, + "loss": 0.0331, + "step": 13964 + }, + { + "epoch": 4.24, + "learning_rate": 1.7078393583299156e-05, + "loss": 0.0119, + "step": 13965 + }, + { + "epoch": 4.24, + "learning_rate": 1.7064998704314524e-05, + "loss": 0.0221, + "step": 13966 + }, + { + "epoch": 4.24, + "learning_rate": 1.705160876347534e-05, + "loss": 0.0391, + "step": 13967 + }, + { + "epoch": 4.24, + "learning_rate": 1.7038223761279046e-05, + "loss": 0.0185, + "step": 13968 + }, + { + "epoch": 4.24, + "learning_rate": 1.7024843698222836e-05, + "loss": 0.0429, + "step": 13969 + }, + { + "epoch": 4.24, + "learning_rate": 1.7011468574803704e-05, + "loss": 0.0172, + "step": 13970 + }, + { + "epoch": 4.24, + "learning_rate": 1.699809839151856e-05, + "loss": 0.0157, + "step": 13971 + }, + { + "epoch": 4.24, + "learning_rate": 1.6984733148864005e-05, + "loss": 0.0089, + "step": 13972 + }, + { + "epoch": 4.24, + "learning_rate": 1.697137284733659e-05, + "loss": 0.0246, + "step": 13973 + }, + { + "epoch": 4.24, + "learning_rate": 1.6958017487432552e-05, + "loss": 0.0199, + "step": 13974 + }, + { + "epoch": 4.24, + "learning_rate": 1.694466706964801e-05, + "loss": 0.0179, + "step": 13975 + }, + { + "epoch": 4.24, + "learning_rate": 1.693132159447892e-05, + "loss": 0.0188, + "step": 13976 + }, + { + "epoch": 4.24, + "learning_rate": 1.6917981062420983e-05, + "loss": 0.0288, + "step": 13977 + }, + { + "epoch": 4.24, + "learning_rate": 1.6904645473969853e-05, + "loss": 0.0703, + "step": 13978 + }, + { + "epoch": 4.24, + "learning_rate": 1.68913148296208e-05, + "loss": 0.0234, + "step": 13979 + }, + { + "epoch": 4.24, + "learning_rate": 1.687798912986908e-05, + "loss": 0.0238, + "step": 13980 + }, + { + "epoch": 4.25, + "learning_rate": 1.6864668375209643e-05, + "loss": 0.0326, + "step": 13981 + }, + { + "epoch": 4.25, + "learning_rate": 1.6851352566137416e-05, + "loss": 0.0148, + "step": 13982 + }, + { + "epoch": 4.25, + "learning_rate": 1.6838041703146965e-05, + "loss": 0.0405, + "step": 13983 + }, + { + "epoch": 4.25, + "learning_rate": 1.6824735786732747e-05, + "loss": 0.0319, + "step": 13984 + }, + { + "epoch": 4.25, + "learning_rate": 1.6811434817389097e-05, + "loss": 0.033, + "step": 13985 + }, + { + "epoch": 4.25, + "learning_rate": 1.679813879561004e-05, + "loss": 0.0304, + "step": 13986 + }, + { + "epoch": 4.25, + "learning_rate": 1.6784847721889543e-05, + "loss": 0.0296, + "step": 13987 + }, + { + "epoch": 4.25, + "learning_rate": 1.67715615967213e-05, + "loss": 0.0504, + "step": 13988 + }, + { + "epoch": 4.25, + "learning_rate": 1.6758280420598862e-05, + "loss": 0.034, + "step": 13989 + }, + { + "epoch": 4.25, + "learning_rate": 1.6745004194015537e-05, + "loss": 0.0275, + "step": 13990 + }, + { + "epoch": 4.25, + "learning_rate": 1.6731732917464545e-05, + "loss": 0.0146, + "step": 13991 + }, + { + "epoch": 4.25, + "learning_rate": 1.6718466591438895e-05, + "loss": 0.0218, + "step": 13992 + }, + { + "epoch": 4.25, + "learning_rate": 1.6705205216431356e-05, + "loss": 0.0583, + "step": 13993 + }, + { + "epoch": 4.25, + "learning_rate": 1.66919487929346e-05, + "loss": 0.0164, + "step": 13994 + }, + { + "epoch": 4.25, + "learning_rate": 1.667869732144097e-05, + "loss": 0.017, + "step": 13995 + }, + { + "epoch": 4.25, + "learning_rate": 1.6665450802442803e-05, + "loss": 0.0486, + "step": 13996 + }, + { + "epoch": 4.25, + "learning_rate": 1.6652209236432108e-05, + "loss": 0.0155, + "step": 13997 + }, + { + "epoch": 4.25, + "learning_rate": 1.663897262390082e-05, + "loss": 0.0254, + "step": 13998 + }, + { + "epoch": 4.25, + "learning_rate": 1.6625740965340633e-05, + "loss": 0.0235, + "step": 13999 + }, + { + "epoch": 4.25, + "learning_rate": 1.6612514261243022e-05, + "loss": 0.022, + "step": 14000 + }, + { + "epoch": 4.25, + "learning_rate": 1.6599292512099373e-05, + "loss": 0.0426, + "step": 14001 + }, + { + "epoch": 4.25, + "learning_rate": 1.6586075718400795e-05, + "loss": 0.0415, + "step": 14002 + }, + { + "epoch": 4.25, + "learning_rate": 1.6572863880638292e-05, + "loss": 0.0304, + "step": 14003 + }, + { + "epoch": 4.25, + "learning_rate": 1.655965699930263e-05, + "loss": 0.0104, + "step": 14004 + }, + { + "epoch": 4.25, + "learning_rate": 1.654645507488439e-05, + "loss": 0.0224, + "step": 14005 + }, + { + "epoch": 4.25, + "learning_rate": 1.653325810787397e-05, + "loss": 0.0113, + "step": 14006 + }, + { + "epoch": 4.25, + "learning_rate": 1.6520066098761658e-05, + "loss": 0.0287, + "step": 14007 + }, + { + "epoch": 4.25, + "learning_rate": 1.650687904803743e-05, + "loss": 0.0117, + "step": 14008 + }, + { + "epoch": 4.25, + "learning_rate": 1.649369695619121e-05, + "loss": 0.0321, + "step": 14009 + }, + { + "epoch": 4.25, + "learning_rate": 1.648051982371264e-05, + "loss": 0.0302, + "step": 14010 + }, + { + "epoch": 4.25, + "learning_rate": 1.6467347651091197e-05, + "loss": 0.0099, + "step": 14011 + }, + { + "epoch": 4.25, + "learning_rate": 1.6454180438816234e-05, + "loss": 0.0316, + "step": 14012 + }, + { + "epoch": 4.25, + "learning_rate": 1.6441018187376813e-05, + "loss": 0.0293, + "step": 14013 + }, + { + "epoch": 4.26, + "learning_rate": 1.642786089726193e-05, + "loss": 0.0193, + "step": 14014 + }, + { + "epoch": 4.26, + "learning_rate": 1.641470856896032e-05, + "loss": 0.0442, + "step": 14015 + }, + { + "epoch": 4.26, + "learning_rate": 1.6401561202960535e-05, + "loss": 0.0381, + "step": 14016 + }, + { + "epoch": 4.26, + "learning_rate": 1.638841879975101e-05, + "loss": 0.0217, + "step": 14017 + }, + { + "epoch": 4.26, + "learning_rate": 1.637528135981987e-05, + "loss": 0.0402, + "step": 14018 + }, + { + "epoch": 4.26, + "learning_rate": 1.6362148883655228e-05, + "loss": 0.0334, + "step": 14019 + }, + { + "epoch": 4.26, + "learning_rate": 1.634902137174483e-05, + "loss": 0.0136, + "step": 14020 + }, + { + "epoch": 4.26, + "learning_rate": 1.6335898824576364e-05, + "loss": 0.0488, + "step": 14021 + }, + { + "epoch": 4.26, + "learning_rate": 1.6322781242637277e-05, + "loss": 0.02, + "step": 14022 + }, + { + "epoch": 4.26, + "learning_rate": 1.6309668626414895e-05, + "loss": 0.0056, + "step": 14023 + }, + { + "epoch": 4.26, + "learning_rate": 1.6296560976396277e-05, + "loss": 0.0465, + "step": 14024 + }, + { + "epoch": 4.26, + "learning_rate": 1.6283458293068303e-05, + "loss": 0.0107, + "step": 14025 + }, + { + "epoch": 4.26, + "learning_rate": 1.6270360576917762e-05, + "loss": 0.013, + "step": 14026 + }, + { + "epoch": 4.26, + "learning_rate": 1.6257267828431135e-05, + "loss": 0.0359, + "step": 14027 + }, + { + "epoch": 4.26, + "learning_rate": 1.6244180048094834e-05, + "loss": 0.0224, + "step": 14028 + }, + { + "epoch": 4.26, + "learning_rate": 1.623109723639502e-05, + "loss": 0.0241, + "step": 14029 + }, + { + "epoch": 4.26, + "learning_rate": 1.6218019393817667e-05, + "loss": 0.0372, + "step": 14030 + }, + { + "epoch": 4.26, + "learning_rate": 1.6204946520848556e-05, + "loss": 0.0249, + "step": 14031 + }, + { + "epoch": 4.26, + "learning_rate": 1.619187861797333e-05, + "loss": 0.0099, + "step": 14032 + }, + { + "epoch": 4.26, + "learning_rate": 1.617881568567744e-05, + "loss": 0.0536, + "step": 14033 + }, + { + "epoch": 4.26, + "learning_rate": 1.6165757724446137e-05, + "loss": 0.0334, + "step": 14034 + }, + { + "epoch": 4.26, + "learning_rate": 1.615270473476446e-05, + "loss": 0.0309, + "step": 14035 + }, + { + "epoch": 4.26, + "learning_rate": 1.6139656717117267e-05, + "loss": 0.0247, + "step": 14036 + }, + { + "epoch": 4.26, + "learning_rate": 1.6126613671989318e-05, + "loss": 0.0843, + "step": 14037 + }, + { + "epoch": 4.26, + "learning_rate": 1.6113575599865063e-05, + "loss": 0.0304, + "step": 14038 + }, + { + "epoch": 4.26, + "learning_rate": 1.610054250122888e-05, + "loss": 0.024, + "step": 14039 + }, + { + "epoch": 4.26, + "learning_rate": 1.60875143765649e-05, + "loss": 0.0151, + "step": 14040 + }, + { + "epoch": 4.26, + "learning_rate": 1.607449122635701e-05, + "loss": 0.0348, + "step": 14041 + }, + { + "epoch": 4.26, + "learning_rate": 1.6061473051089085e-05, + "loss": 0.0108, + "step": 14042 + }, + { + "epoch": 4.26, + "learning_rate": 1.6048459851244634e-05, + "loss": 0.0369, + "step": 14043 + }, + { + "epoch": 4.26, + "learning_rate": 1.603545162730715e-05, + "loss": 0.0401, + "step": 14044 + }, + { + "epoch": 4.26, + "learning_rate": 1.6022448379759733e-05, + "loss": 0.0286, + "step": 14045 + }, + { + "epoch": 4.26, + "learning_rate": 1.6009450109085497e-05, + "loss": 0.011, + "step": 14046 + }, + { + "epoch": 4.27, + "learning_rate": 1.5996456815767238e-05, + "loss": 0.0238, + "step": 14047 + }, + { + "epoch": 4.27, + "learning_rate": 1.598346850028767e-05, + "loss": 0.0061, + "step": 14048 + }, + { + "epoch": 4.27, + "learning_rate": 1.597048516312926e-05, + "loss": 0.0193, + "step": 14049 + }, + { + "epoch": 4.27, + "learning_rate": 1.595750680477425e-05, + "loss": 0.0337, + "step": 14050 + }, + { + "epoch": 4.27, + "learning_rate": 1.594453342570481e-05, + "loss": 0.0251, + "step": 14051 + }, + { + "epoch": 4.27, + "learning_rate": 1.5931565026402816e-05, + "loss": 0.023, + "step": 14052 + }, + { + "epoch": 4.27, + "learning_rate": 1.5918601607350067e-05, + "loss": 0.0499, + "step": 14053 + }, + { + "epoch": 4.27, + "learning_rate": 1.590564316902803e-05, + "loss": 0.024, + "step": 14054 + }, + { + "epoch": 4.27, + "learning_rate": 1.5892689711918156e-05, + "loss": 0.0263, + "step": 14055 + }, + { + "epoch": 4.27, + "learning_rate": 1.5879741236501603e-05, + "loss": 0.0237, + "step": 14056 + }, + { + "epoch": 4.27, + "learning_rate": 1.5866797743259323e-05, + "loss": 0.0214, + "step": 14057 + }, + { + "epoch": 4.27, + "learning_rate": 1.5853859232672184e-05, + "loss": 0.0226, + "step": 14058 + }, + { + "epoch": 4.27, + "learning_rate": 1.584092570522078e-05, + "loss": 0.0426, + "step": 14059 + }, + { + "epoch": 4.27, + "learning_rate": 1.5827997161385614e-05, + "loss": 0.0283, + "step": 14060 + }, + { + "epoch": 4.27, + "learning_rate": 1.581507360164685e-05, + "loss": 0.0144, + "step": 14061 + }, + { + "epoch": 4.27, + "learning_rate": 1.5802155026484636e-05, + "loss": 0.0427, + "step": 14062 + }, + { + "epoch": 4.27, + "learning_rate": 1.578924143637879e-05, + "loss": 0.0409, + "step": 14063 + }, + { + "epoch": 4.27, + "learning_rate": 1.5776332831809096e-05, + "loss": 0.0164, + "step": 14064 + }, + { + "epoch": 4.27, + "learning_rate": 1.576342921325502e-05, + "loss": 0.0179, + "step": 14065 + }, + { + "epoch": 4.27, + "learning_rate": 1.5750530581195892e-05, + "loss": 0.0311, + "step": 14066 + }, + { + "epoch": 4.27, + "learning_rate": 1.573763693611088e-05, + "loss": 0.0147, + "step": 14067 + }, + { + "epoch": 4.27, + "learning_rate": 1.5724748278478904e-05, + "loss": 0.0248, + "step": 14068 + }, + { + "epoch": 4.27, + "learning_rate": 1.571186460877881e-05, + "loss": 0.0435, + "step": 14069 + }, + { + "epoch": 4.27, + "learning_rate": 1.5698985927489153e-05, + "loss": 0.0358, + "step": 14070 + }, + { + "epoch": 4.27, + "learning_rate": 1.568611223508833e-05, + "loss": 0.0166, + "step": 14071 + }, + { + "epoch": 4.27, + "learning_rate": 1.567324353205453e-05, + "loss": 0.0388, + "step": 14072 + }, + { + "epoch": 4.27, + "learning_rate": 1.5660379818865865e-05, + "loss": 0.0203, + "step": 14073 + }, + { + "epoch": 4.27, + "learning_rate": 1.5647521096000105e-05, + "loss": 0.0207, + "step": 14074 + }, + { + "epoch": 4.27, + "learning_rate": 1.5634667363934968e-05, + "loss": 0.0188, + "step": 14075 + }, + { + "epoch": 4.27, + "learning_rate": 1.5621818623147936e-05, + "loss": 0.0152, + "step": 14076 + }, + { + "epoch": 4.27, + "learning_rate": 1.560897487411623e-05, + "loss": 0.0147, + "step": 14077 + }, + { + "epoch": 4.27, + "learning_rate": 1.5596136117317055e-05, + "loss": 0.0113, + "step": 14078 + }, + { + "epoch": 4.27, + "learning_rate": 1.5583302353227256e-05, + "loss": 0.035, + "step": 14079 + }, + { + "epoch": 4.28, + "learning_rate": 1.5570473582323624e-05, + "loss": 0.0299, + "step": 14080 + }, + { + "epoch": 4.28, + "learning_rate": 1.555764980508269e-05, + "loss": 0.0141, + "step": 14081 + }, + { + "epoch": 4.28, + "learning_rate": 1.5544831021980792e-05, + "loss": 0.0102, + "step": 14082 + }, + { + "epoch": 4.28, + "learning_rate": 1.5532017233494166e-05, + "loss": 0.0206, + "step": 14083 + }, + { + "epoch": 4.28, + "learning_rate": 1.551920844009873e-05, + "loss": 0.0424, + "step": 14084 + }, + { + "epoch": 4.28, + "learning_rate": 1.550640464227042e-05, + "loss": 0.0283, + "step": 14085 + }, + { + "epoch": 4.28, + "learning_rate": 1.549360584048471e-05, + "loss": 0.023, + "step": 14086 + }, + { + "epoch": 4.28, + "learning_rate": 1.5480812035217128e-05, + "loss": 0.0067, + "step": 14087 + }, + { + "epoch": 4.28, + "learning_rate": 1.5468023226942882e-05, + "loss": 0.0213, + "step": 14088 + }, + { + "epoch": 4.28, + "learning_rate": 1.5455239416137093e-05, + "loss": 0.0297, + "step": 14089 + }, + { + "epoch": 4.28, + "learning_rate": 1.54424606032746e-05, + "loss": 0.0228, + "step": 14090 + }, + { + "epoch": 4.28, + "learning_rate": 1.5429686788830082e-05, + "loss": 0.0246, + "step": 14091 + }, + { + "epoch": 4.28, + "learning_rate": 1.5416917973278114e-05, + "loss": 0.0181, + "step": 14092 + }, + { + "epoch": 4.28, + "learning_rate": 1.540415415709295e-05, + "loss": 0.0262, + "step": 14093 + }, + { + "epoch": 4.28, + "learning_rate": 1.5391395340748798e-05, + "loss": 0.0181, + "step": 14094 + }, + { + "epoch": 4.28, + "learning_rate": 1.5378641524719554e-05, + "loss": 0.0073, + "step": 14095 + }, + { + "epoch": 4.28, + "learning_rate": 1.5365892709478977e-05, + "loss": 0.0328, + "step": 14096 + }, + { + "epoch": 4.28, + "learning_rate": 1.5353148895500723e-05, + "loss": 0.0103, + "step": 14097 + }, + { + "epoch": 4.28, + "learning_rate": 1.534041008325809e-05, + "loss": 0.0233, + "step": 14098 + }, + { + "epoch": 4.28, + "learning_rate": 1.5327676273224375e-05, + "loss": 0.039, + "step": 14099 + }, + { + "epoch": 4.28, + "learning_rate": 1.5314947465872546e-05, + "loss": 0.0346, + "step": 14100 + }, + { + "epoch": 4.28, + "learning_rate": 1.530222366167547e-05, + "loss": 0.0208, + "step": 14101 + }, + { + "epoch": 4.28, + "learning_rate": 1.528950486110576e-05, + "loss": 0.011, + "step": 14102 + }, + { + "epoch": 4.28, + "learning_rate": 1.527679106463595e-05, + "loss": 0.0224, + "step": 14103 + }, + { + "epoch": 4.28, + "learning_rate": 1.5264082272738232e-05, + "loss": 0.0314, + "step": 14104 + }, + { + "epoch": 4.28, + "learning_rate": 1.5251378485884791e-05, + "loss": 0.015, + "step": 14105 + }, + { + "epoch": 4.28, + "learning_rate": 1.5238679704547485e-05, + "loss": 0.0355, + "step": 14106 + }, + { + "epoch": 4.28, + "learning_rate": 1.5225985929198003e-05, + "loss": 0.0318, + "step": 14107 + }, + { + "epoch": 4.28, + "learning_rate": 1.5213297160307969e-05, + "loss": 0.0283, + "step": 14108 + }, + { + "epoch": 4.28, + "learning_rate": 1.5200613398348655e-05, + "loss": 0.0155, + "step": 14109 + }, + { + "epoch": 4.28, + "learning_rate": 1.51879346437913e-05, + "loss": 0.0285, + "step": 14110 + }, + { + "epoch": 4.28, + "learning_rate": 1.5175260897106795e-05, + "loss": 0.048, + "step": 14111 + }, + { + "epoch": 4.28, + "learning_rate": 1.5162592158766013e-05, + "loss": 0.0259, + "step": 14112 + }, + { + "epoch": 4.29, + "learning_rate": 1.5149928429239477e-05, + "loss": 0.0261, + "step": 14113 + }, + { + "epoch": 4.29, + "learning_rate": 1.5137269708997695e-05, + "loss": 0.0193, + "step": 14114 + }, + { + "epoch": 4.29, + "learning_rate": 1.5124615998510841e-05, + "loss": 0.038, + "step": 14115 + }, + { + "epoch": 4.29, + "learning_rate": 1.5111967298249007e-05, + "loss": 0.0295, + "step": 14116 + }, + { + "epoch": 4.29, + "learning_rate": 1.5099323608682029e-05, + "loss": 0.0382, + "step": 14117 + }, + { + "epoch": 4.29, + "learning_rate": 1.5086684930279552e-05, + "loss": 0.0145, + "step": 14118 + }, + { + "epoch": 4.29, + "learning_rate": 1.5074051263511133e-05, + "loss": 0.0155, + "step": 14119 + }, + { + "epoch": 4.29, + "learning_rate": 1.5061422608846013e-05, + "loss": 0.0171, + "step": 14120 + }, + { + "epoch": 4.29, + "learning_rate": 1.5048798966753367e-05, + "loss": 0.01, + "step": 14121 + }, + { + "epoch": 4.29, + "learning_rate": 1.5036180337702102e-05, + "loss": 0.019, + "step": 14122 + }, + { + "epoch": 4.29, + "learning_rate": 1.502356672216093e-05, + "loss": 0.026, + "step": 14123 + }, + { + "epoch": 4.29, + "learning_rate": 1.5010958120598472e-05, + "loss": 0.0092, + "step": 14124 + }, + { + "epoch": 4.29, + "learning_rate": 1.4998354533483037e-05, + "loss": 0.0212, + "step": 14125 + }, + { + "epoch": 4.29, + "learning_rate": 1.4985755961282886e-05, + "loss": 0.0143, + "step": 14126 + }, + { + "epoch": 4.29, + "learning_rate": 1.4973162404465927e-05, + "loss": 0.0068, + "step": 14127 + }, + { + "epoch": 4.29, + "learning_rate": 1.4960573863500052e-05, + "loss": 0.0277, + "step": 14128 + }, + { + "epoch": 4.29, + "learning_rate": 1.494799033885284e-05, + "loss": 0.015, + "step": 14129 + }, + { + "epoch": 4.29, + "learning_rate": 1.4935411830991762e-05, + "loss": 0.0568, + "step": 14130 + }, + { + "epoch": 4.29, + "learning_rate": 1.4922838340384064e-05, + "loss": 0.0555, + "step": 14131 + }, + { + "epoch": 4.29, + "learning_rate": 1.4910269867496788e-05, + "loss": 0.0185, + "step": 14132 + }, + { + "epoch": 4.29, + "learning_rate": 1.4897706412796861e-05, + "loss": 0.0371, + "step": 14133 + }, + { + "epoch": 4.29, + "learning_rate": 1.4885147976750943e-05, + "loss": 0.0126, + "step": 14134 + }, + { + "epoch": 4.29, + "learning_rate": 1.4872594559825563e-05, + "loss": 0.0518, + "step": 14135 + }, + { + "epoch": 4.29, + "learning_rate": 1.486004616248706e-05, + "loss": 0.0304, + "step": 14136 + }, + { + "epoch": 4.29, + "learning_rate": 1.4847502785201499e-05, + "loss": 0.0561, + "step": 14137 + }, + { + "epoch": 4.29, + "learning_rate": 1.483496442843492e-05, + "loss": 0.0111, + "step": 14138 + }, + { + "epoch": 4.29, + "learning_rate": 1.4822431092653003e-05, + "loss": 0.0205, + "step": 14139 + }, + { + "epoch": 4.29, + "learning_rate": 1.4809902778321408e-05, + "loss": 0.0302, + "step": 14140 + }, + { + "epoch": 4.29, + "learning_rate": 1.479737948590548e-05, + "loss": 0.0272, + "step": 14141 + }, + { + "epoch": 4.29, + "learning_rate": 1.478486121587043e-05, + "loss": 0.0464, + "step": 14142 + }, + { + "epoch": 4.29, + "learning_rate": 1.4772347968681237e-05, + "loss": 0.0303, + "step": 14143 + }, + { + "epoch": 4.29, + "learning_rate": 1.4759839744802776e-05, + "loss": 0.0104, + "step": 14144 + }, + { + "epoch": 4.29, + "learning_rate": 1.4747336544699679e-05, + "loss": 0.03, + "step": 14145 + }, + { + "epoch": 4.3, + "learning_rate": 1.4734838368836422e-05, + "loss": 0.0403, + "step": 14146 + }, + { + "epoch": 4.3, + "learning_rate": 1.472234521767725e-05, + "loss": 0.0404, + "step": 14147 + }, + { + "epoch": 4.3, + "learning_rate": 1.4709857091686228e-05, + "loss": 0.0354, + "step": 14148 + }, + { + "epoch": 4.3, + "learning_rate": 1.4697373991327316e-05, + "loss": 0.0292, + "step": 14149 + }, + { + "epoch": 4.3, + "learning_rate": 1.468489591706416e-05, + "loss": 0.0133, + "step": 14150 + }, + { + "epoch": 4.3, + "learning_rate": 1.4672422869360374e-05, + "loss": 0.029, + "step": 14151 + }, + { + "epoch": 4.3, + "learning_rate": 1.4659954848679167e-05, + "loss": 0.0308, + "step": 14152 + }, + { + "epoch": 4.3, + "learning_rate": 1.464749185548379e-05, + "loss": 0.0373, + "step": 14153 + }, + { + "epoch": 4.3, + "learning_rate": 1.4635033890237135e-05, + "loss": 0.0253, + "step": 14154 + }, + { + "epoch": 4.3, + "learning_rate": 1.4622580953402052e-05, + "loss": 0.0311, + "step": 14155 + }, + { + "epoch": 4.3, + "learning_rate": 1.4610133045441103e-05, + "loss": 0.0176, + "step": 14156 + }, + { + "epoch": 4.3, + "learning_rate": 1.4597690166816651e-05, + "loss": 0.0195, + "step": 14157 + }, + { + "epoch": 4.3, + "learning_rate": 1.4585252317990959e-05, + "loss": 0.0362, + "step": 14158 + }, + { + "epoch": 4.3, + "learning_rate": 1.4572819499426026e-05, + "loss": 0.0204, + "step": 14159 + }, + { + "epoch": 4.3, + "learning_rate": 1.4560391711583747e-05, + "loss": 0.0068, + "step": 14160 + }, + { + "epoch": 4.3, + "learning_rate": 1.4547968954925737e-05, + "loss": 0.0153, + "step": 14161 + }, + { + "epoch": 4.3, + "learning_rate": 1.4535551229913429e-05, + "loss": 0.0163, + "step": 14162 + }, + { + "epoch": 4.3, + "learning_rate": 1.45231385370082e-05, + "loss": 0.0142, + "step": 14163 + }, + { + "epoch": 4.3, + "learning_rate": 1.451073087667105e-05, + "loss": 0.0252, + "step": 14164 + }, + { + "epoch": 4.3, + "learning_rate": 1.4498328249362945e-05, + "loss": 0.0246, + "step": 14165 + }, + { + "epoch": 4.3, + "learning_rate": 1.4485930655544614e-05, + "loss": 0.0224, + "step": 14166 + }, + { + "epoch": 4.3, + "learning_rate": 1.4473538095676556e-05, + "loss": 0.0301, + "step": 14167 + }, + { + "epoch": 4.3, + "learning_rate": 1.4461150570219088e-05, + "loss": 0.0278, + "step": 14168 + }, + { + "epoch": 4.3, + "learning_rate": 1.4448768079632456e-05, + "loss": 0.0288, + "step": 14169 + }, + { + "epoch": 4.3, + "learning_rate": 1.443639062437656e-05, + "loss": 0.0331, + "step": 14170 + }, + { + "epoch": 4.3, + "learning_rate": 1.4424018204911248e-05, + "loss": 0.0263, + "step": 14171 + }, + { + "epoch": 4.3, + "learning_rate": 1.4411650821696069e-05, + "loss": 0.0215, + "step": 14172 + }, + { + "epoch": 4.3, + "learning_rate": 1.439928847519044e-05, + "loss": 0.0123, + "step": 14173 + }, + { + "epoch": 4.3, + "learning_rate": 1.4386931165853628e-05, + "loss": 0.0332, + "step": 14174 + }, + { + "epoch": 4.3, + "learning_rate": 1.437457889414461e-05, + "loss": 0.0126, + "step": 14175 + }, + { + "epoch": 4.3, + "learning_rate": 1.436223166052231e-05, + "loss": 0.0246, + "step": 14176 + }, + { + "epoch": 4.3, + "learning_rate": 1.4349889465445307e-05, + "loss": 0.0289, + "step": 14177 + }, + { + "epoch": 4.3, + "learning_rate": 1.4337552309372152e-05, + "loss": 0.0218, + "step": 14178 + }, + { + "epoch": 4.31, + "learning_rate": 1.4325220192761061e-05, + "loss": 0.0295, + "step": 14179 + }, + { + "epoch": 4.31, + "learning_rate": 1.4312893116070184e-05, + "loss": 0.0138, + "step": 14180 + }, + { + "epoch": 4.31, + "learning_rate": 1.4300571079757456e-05, + "loss": 0.0214, + "step": 14181 + }, + { + "epoch": 4.31, + "learning_rate": 1.4288254084280575e-05, + "loss": 0.022, + "step": 14182 + }, + { + "epoch": 4.31, + "learning_rate": 1.4275942130097096e-05, + "loss": 0.0309, + "step": 14183 + }, + { + "epoch": 4.31, + "learning_rate": 1.4263635217664316e-05, + "loss": 0.0268, + "step": 14184 + }, + { + "epoch": 4.31, + "learning_rate": 1.425133334743947e-05, + "loss": 0.0359, + "step": 14185 + }, + { + "epoch": 4.31, + "learning_rate": 1.4239036519879492e-05, + "loss": 0.0378, + "step": 14186 + }, + { + "epoch": 4.31, + "learning_rate": 1.422674473544122e-05, + "loss": 0.0184, + "step": 14187 + }, + { + "epoch": 4.31, + "learning_rate": 1.421445799458122e-05, + "loss": 0.023, + "step": 14188 + }, + { + "epoch": 4.31, + "learning_rate": 1.4202176297755891e-05, + "loss": 0.0246, + "step": 14189 + }, + { + "epoch": 4.31, + "learning_rate": 1.4189899645421521e-05, + "loss": 0.0494, + "step": 14190 + }, + { + "epoch": 4.31, + "learning_rate": 1.4177628038034095e-05, + "loss": 0.0198, + "step": 14191 + }, + { + "epoch": 4.31, + "learning_rate": 1.4165361476049547e-05, + "loss": 0.0144, + "step": 14192 + }, + { + "epoch": 4.31, + "learning_rate": 1.415309995992343e-05, + "loss": 0.0404, + "step": 14193 + }, + { + "epoch": 4.31, + "learning_rate": 1.4140843490111315e-05, + "loss": 0.0195, + "step": 14194 + }, + { + "epoch": 4.31, + "learning_rate": 1.4128592067068416e-05, + "loss": 0.0263, + "step": 14195 + }, + { + "epoch": 4.31, + "learning_rate": 1.4116345691249924e-05, + "loss": 0.0069, + "step": 14196 + }, + { + "epoch": 4.31, + "learning_rate": 1.4104104363110707e-05, + "loss": 0.0069, + "step": 14197 + }, + { + "epoch": 4.31, + "learning_rate": 1.4091868083105484e-05, + "loss": 0.0302, + "step": 14198 + }, + { + "epoch": 4.31, + "learning_rate": 1.4079636851688842e-05, + "loss": 0.0198, + "step": 14199 + }, + { + "epoch": 4.31, + "learning_rate": 1.4067410669315066e-05, + "loss": 0.034, + "step": 14200 + }, + { + "epoch": 4.31, + "learning_rate": 1.4055189536438394e-05, + "loss": 0.015, + "step": 14201 + }, + { + "epoch": 4.31, + "learning_rate": 1.4042973453512796e-05, + "loss": 0.0264, + "step": 14202 + }, + { + "epoch": 4.31, + "learning_rate": 1.4030762420991993e-05, + "loss": 0.021, + "step": 14203 + }, + { + "epoch": 4.31, + "learning_rate": 1.4018556439329687e-05, + "loss": 0.0272, + "step": 14204 + }, + { + "epoch": 4.31, + "learning_rate": 1.40063555089792e-05, + "loss": 0.0351, + "step": 14205 + }, + { + "epoch": 4.31, + "learning_rate": 1.3994159630393853e-05, + "loss": 0.0189, + "step": 14206 + }, + { + "epoch": 4.31, + "learning_rate": 1.3981968804026633e-05, + "loss": 0.0322, + "step": 14207 + }, + { + "epoch": 4.31, + "learning_rate": 1.3969783030330395e-05, + "loss": 0.0381, + "step": 14208 + }, + { + "epoch": 4.31, + "learning_rate": 1.3957602309757793e-05, + "loss": 0.0232, + "step": 14209 + }, + { + "epoch": 4.31, + "learning_rate": 1.3945426642761348e-05, + "loss": 0.006, + "step": 14210 + }, + { + "epoch": 4.31, + "learning_rate": 1.3933256029793299e-05, + "loss": 0.0187, + "step": 14211 + }, + { + "epoch": 4.32, + "learning_rate": 1.3921090471305802e-05, + "loss": 0.0419, + "step": 14212 + }, + { + "epoch": 4.32, + "learning_rate": 1.3908929967750759e-05, + "loss": 0.0211, + "step": 14213 + }, + { + "epoch": 4.32, + "learning_rate": 1.3896774519579845e-05, + "loss": 0.02, + "step": 14214 + }, + { + "epoch": 4.32, + "learning_rate": 1.3884624127244664e-05, + "loss": 0.0233, + "step": 14215 + }, + { + "epoch": 4.32, + "learning_rate": 1.3872478791196523e-05, + "loss": 0.0095, + "step": 14216 + }, + { + "epoch": 4.32, + "learning_rate": 1.386033851188666e-05, + "loss": 0.0115, + "step": 14217 + }, + { + "epoch": 4.32, + "learning_rate": 1.384820328976593e-05, + "loss": 0.0352, + "step": 14218 + }, + { + "epoch": 4.32, + "learning_rate": 1.3836073125285225e-05, + "loss": 0.0224, + "step": 14219 + }, + { + "epoch": 4.32, + "learning_rate": 1.3823948018895065e-05, + "loss": 0.0159, + "step": 14220 + }, + { + "epoch": 4.32, + "learning_rate": 1.3811827971045907e-05, + "loss": 0.0144, + "step": 14221 + }, + { + "epoch": 4.32, + "learning_rate": 1.3799712982188027e-05, + "loss": 0.0279, + "step": 14222 + }, + { + "epoch": 4.32, + "learning_rate": 1.3787603052771362e-05, + "loss": 0.0525, + "step": 14223 + }, + { + "epoch": 4.32, + "learning_rate": 1.377549818324582e-05, + "loss": 0.0205, + "step": 14224 + }, + { + "epoch": 4.32, + "learning_rate": 1.3763398374061024e-05, + "loss": 0.0081, + "step": 14225 + }, + { + "epoch": 4.32, + "learning_rate": 1.3751303625666482e-05, + "loss": 0.0115, + "step": 14226 + }, + { + "epoch": 4.32, + "learning_rate": 1.3739213938511483e-05, + "loss": 0.0292, + "step": 14227 + }, + { + "epoch": 4.32, + "learning_rate": 1.3727129313045072e-05, + "loss": 0.0316, + "step": 14228 + }, + { + "epoch": 4.32, + "learning_rate": 1.3715049749716201e-05, + "loss": 0.0244, + "step": 14229 + }, + { + "epoch": 4.32, + "learning_rate": 1.3702975248973563e-05, + "loss": 0.0378, + "step": 14230 + }, + { + "epoch": 4.32, + "learning_rate": 1.3690905811265734e-05, + "loss": 0.0313, + "step": 14231 + }, + { + "epoch": 4.32, + "learning_rate": 1.3678841437041038e-05, + "loss": 0.0288, + "step": 14232 + }, + { + "epoch": 4.32, + "learning_rate": 1.3666782126747617e-05, + "loss": 0.0025, + "step": 14233 + }, + { + "epoch": 4.32, + "learning_rate": 1.3654727880833426e-05, + "loss": 0.0059, + "step": 14234 + }, + { + "epoch": 4.32, + "learning_rate": 1.3642678699746295e-05, + "loss": 0.0319, + "step": 14235 + }, + { + "epoch": 4.32, + "learning_rate": 1.3630634583933747e-05, + "loss": 0.0349, + "step": 14236 + }, + { + "epoch": 4.32, + "learning_rate": 1.3618595533843257e-05, + "loss": 0.0335, + "step": 14237 + }, + { + "epoch": 4.32, + "learning_rate": 1.3606561549922002e-05, + "loss": 0.0093, + "step": 14238 + }, + { + "epoch": 4.32, + "learning_rate": 1.359453263261699e-05, + "loss": 0.0272, + "step": 14239 + }, + { + "epoch": 4.32, + "learning_rate": 1.3582508782375112e-05, + "loss": 0.0114, + "step": 14240 + }, + { + "epoch": 4.32, + "learning_rate": 1.3570489999642965e-05, + "loss": 0.0254, + "step": 14241 + }, + { + "epoch": 4.32, + "learning_rate": 1.355847628486707e-05, + "loss": 0.0254, + "step": 14242 + }, + { + "epoch": 4.32, + "learning_rate": 1.3546467638493674e-05, + "loss": 0.0126, + "step": 14243 + }, + { + "epoch": 4.32, + "learning_rate": 1.3534464060968819e-05, + "loss": 0.0233, + "step": 14244 + }, + { + "epoch": 4.33, + "learning_rate": 1.3522465552738482e-05, + "loss": 0.0353, + "step": 14245 + }, + { + "epoch": 4.33, + "learning_rate": 1.3510472114248289e-05, + "loss": 0.0304, + "step": 14246 + }, + { + "epoch": 4.33, + "learning_rate": 1.349848374594385e-05, + "loss": 0.0238, + "step": 14247 + }, + { + "epoch": 4.33, + "learning_rate": 1.3486500448270443e-05, + "loss": 0.0451, + "step": 14248 + }, + { + "epoch": 4.33, + "learning_rate": 1.3474522221673211e-05, + "loss": 0.0074, + "step": 14249 + }, + { + "epoch": 4.33, + "learning_rate": 1.3462549066597118e-05, + "loss": 0.0314, + "step": 14250 + }, + { + "epoch": 4.33, + "learning_rate": 1.3450580983486936e-05, + "loss": 0.0356, + "step": 14251 + }, + { + "epoch": 4.33, + "learning_rate": 1.343861797278723e-05, + "loss": 0.0497, + "step": 14252 + }, + { + "epoch": 4.33, + "learning_rate": 1.3426660034942427e-05, + "loss": 0.0312, + "step": 14253 + }, + { + "epoch": 4.33, + "learning_rate": 1.3414707170396721e-05, + "loss": 0.0523, + "step": 14254 + }, + { + "epoch": 4.33, + "learning_rate": 1.3402759379594057e-05, + "loss": 0.042, + "step": 14255 + }, + { + "epoch": 4.33, + "learning_rate": 1.3390816662978366e-05, + "loss": 0.0196, + "step": 14256 + }, + { + "epoch": 4.33, + "learning_rate": 1.3378879020993189e-05, + "loss": 0.0364, + "step": 14257 + }, + { + "epoch": 4.33, + "learning_rate": 1.3366946454082072e-05, + "loss": 0.0321, + "step": 14258 + }, + { + "epoch": 4.33, + "learning_rate": 1.335501896268818e-05, + "loss": 0.0057, + "step": 14259 + }, + { + "epoch": 4.33, + "learning_rate": 1.334309654725464e-05, + "loss": 0.0222, + "step": 14260 + }, + { + "epoch": 4.33, + "learning_rate": 1.3331179208224296e-05, + "loss": 0.0201, + "step": 14261 + }, + { + "epoch": 4.33, + "learning_rate": 1.3319266946039864e-05, + "loss": 0.0127, + "step": 14262 + }, + { + "epoch": 4.33, + "learning_rate": 1.330735976114392e-05, + "loss": 0.0199, + "step": 14263 + }, + { + "epoch": 4.33, + "learning_rate": 1.3295457653978647e-05, + "loss": 0.0354, + "step": 14264 + }, + { + "epoch": 4.33, + "learning_rate": 1.3283560624986272e-05, + "loss": 0.0506, + "step": 14265 + }, + { + "epoch": 4.33, + "learning_rate": 1.327166867460866e-05, + "loss": 0.0045, + "step": 14266 + }, + { + "epoch": 4.33, + "learning_rate": 1.3259781803287656e-05, + "loss": 0.044, + "step": 14267 + }, + { + "epoch": 4.33, + "learning_rate": 1.3247900011464758e-05, + "loss": 0.0191, + "step": 14268 + }, + { + "epoch": 4.33, + "learning_rate": 1.3236023299581311e-05, + "loss": 0.0115, + "step": 14269 + }, + { + "epoch": 4.33, + "learning_rate": 1.3224151668078581e-05, + "loss": 0.0195, + "step": 14270 + }, + { + "epoch": 4.33, + "learning_rate": 1.3212285117397514e-05, + "loss": 0.032, + "step": 14271 + }, + { + "epoch": 4.33, + "learning_rate": 1.3200423647978925e-05, + "loss": 0.0368, + "step": 14272 + }, + { + "epoch": 4.33, + "learning_rate": 1.318856726026346e-05, + "loss": 0.0196, + "step": 14273 + }, + { + "epoch": 4.33, + "learning_rate": 1.3176715954691519e-05, + "loss": 0.0412, + "step": 14274 + }, + { + "epoch": 4.33, + "learning_rate": 1.3164869731703315e-05, + "loss": 0.0083, + "step": 14275 + }, + { + "epoch": 4.33, + "learning_rate": 1.3153028591738963e-05, + "loss": 0.012, + "step": 14276 + }, + { + "epoch": 4.33, + "learning_rate": 1.3141192535238276e-05, + "loss": 0.0172, + "step": 14277 + }, + { + "epoch": 4.34, + "learning_rate": 1.3129361562640988e-05, + "loss": 0.0268, + "step": 14278 + }, + { + "epoch": 4.34, + "learning_rate": 1.3117535674386531e-05, + "loss": 0.0236, + "step": 14279 + }, + { + "epoch": 4.34, + "learning_rate": 1.3105714870914203e-05, + "loss": 0.0124, + "step": 14280 + }, + { + "epoch": 4.34, + "learning_rate": 1.3093899152663151e-05, + "loss": 0.0067, + "step": 14281 + }, + { + "epoch": 4.34, + "learning_rate": 1.3082088520072243e-05, + "loss": 0.0339, + "step": 14282 + }, + { + "epoch": 4.34, + "learning_rate": 1.3070282973580293e-05, + "loss": 0.0196, + "step": 14283 + }, + { + "epoch": 4.34, + "learning_rate": 1.3058482513625734e-05, + "loss": 0.0548, + "step": 14284 + }, + { + "epoch": 4.34, + "learning_rate": 1.3046687140646966e-05, + "loss": 0.0113, + "step": 14285 + }, + { + "epoch": 4.34, + "learning_rate": 1.3034896855082188e-05, + "loss": 0.0313, + "step": 14286 + }, + { + "epoch": 4.34, + "learning_rate": 1.3023111657369334e-05, + "loss": 0.04, + "step": 14287 + }, + { + "epoch": 4.34, + "learning_rate": 1.3011331547946235e-05, + "loss": 0.0337, + "step": 14288 + }, + { + "epoch": 4.34, + "learning_rate": 1.299955652725041e-05, + "loss": 0.0257, + "step": 14289 + }, + { + "epoch": 4.34, + "learning_rate": 1.2987786595719323e-05, + "loss": 0.0174, + "step": 14290 + }, + { + "epoch": 4.34, + "learning_rate": 1.2976021753790161e-05, + "loss": 0.0276, + "step": 14291 + }, + { + "epoch": 4.34, + "learning_rate": 1.2964262001900005e-05, + "loss": 0.0215, + "step": 14292 + }, + { + "epoch": 4.34, + "learning_rate": 1.2952507340485657e-05, + "loss": 0.026, + "step": 14293 + }, + { + "epoch": 4.34, + "learning_rate": 1.2940757769983751e-05, + "loss": 0.0254, + "step": 14294 + }, + { + "epoch": 4.34, + "learning_rate": 1.2929013290830787e-05, + "loss": 0.0129, + "step": 14295 + }, + { + "epoch": 4.34, + "learning_rate": 1.2917273903462999e-05, + "loss": 0.0368, + "step": 14296 + }, + { + "epoch": 4.34, + "learning_rate": 1.2905539608316524e-05, + "loss": 0.0364, + "step": 14297 + }, + { + "epoch": 4.34, + "learning_rate": 1.289381040582721e-05, + "loss": 0.0115, + "step": 14298 + }, + { + "epoch": 4.34, + "learning_rate": 1.2882086296430794e-05, + "loss": 0.0163, + "step": 14299 + }, + { + "epoch": 4.34, + "learning_rate": 1.287036728056276e-05, + "loss": 0.0348, + "step": 14300 + }, + { + "epoch": 4.34, + "learning_rate": 1.285865335865846e-05, + "loss": 0.0107, + "step": 14301 + }, + { + "epoch": 4.34, + "learning_rate": 1.2846944531153014e-05, + "loss": 0.0221, + "step": 14302 + }, + { + "epoch": 4.34, + "learning_rate": 1.2835240798481373e-05, + "loss": 0.0377, + "step": 14303 + }, + { + "epoch": 4.34, + "learning_rate": 1.2823542161078353e-05, + "loss": 0.0267, + "step": 14304 + }, + { + "epoch": 4.34, + "learning_rate": 1.2811848619378412e-05, + "loss": 0.0085, + "step": 14305 + }, + { + "epoch": 4.34, + "learning_rate": 1.2800160173816032e-05, + "loss": 0.012, + "step": 14306 + }, + { + "epoch": 4.34, + "learning_rate": 1.278847682482535e-05, + "loss": 0.011, + "step": 14307 + }, + { + "epoch": 4.34, + "learning_rate": 1.2776798572840402e-05, + "loss": 0.029, + "step": 14308 + }, + { + "epoch": 4.34, + "learning_rate": 1.2765125418294975e-05, + "loss": 0.0064, + "step": 14309 + }, + { + "epoch": 4.34, + "learning_rate": 1.2753457361622688e-05, + "loss": 0.0248, + "step": 14310 + }, + { + "epoch": 4.35, + "learning_rate": 1.274179440325701e-05, + "loss": 0.032, + "step": 14311 + }, + { + "epoch": 4.35, + "learning_rate": 1.2730136543631131e-05, + "loss": 0.0409, + "step": 14312 + }, + { + "epoch": 4.35, + "learning_rate": 1.2718483783178168e-05, + "loss": 0.0273, + "step": 14313 + }, + { + "epoch": 4.35, + "learning_rate": 1.2706836122330977e-05, + "loss": 0.0298, + "step": 14314 + }, + { + "epoch": 4.35, + "learning_rate": 1.2695193561522194e-05, + "loss": 0.0165, + "step": 14315 + }, + { + "epoch": 4.35, + "learning_rate": 1.2683556101184306e-05, + "loss": 0.0154, + "step": 14316 + }, + { + "epoch": 4.35, + "learning_rate": 1.2671923741749651e-05, + "loss": 0.041, + "step": 14317 + }, + { + "epoch": 4.35, + "learning_rate": 1.2660296483650301e-05, + "loss": 0.0415, + "step": 14318 + }, + { + "epoch": 4.35, + "learning_rate": 1.2648674327318209e-05, + "loss": 0.0122, + "step": 14319 + }, + { + "epoch": 4.35, + "learning_rate": 1.2637057273185097e-05, + "loss": 0.0301, + "step": 14320 + }, + { + "epoch": 4.35, + "learning_rate": 1.2625445321682453e-05, + "loss": 0.021, + "step": 14321 + }, + { + "epoch": 4.35, + "learning_rate": 1.2613838473241716e-05, + "loss": 0.0247, + "step": 14322 + }, + { + "epoch": 4.35, + "learning_rate": 1.2602236728293957e-05, + "loss": 0.029, + "step": 14323 + }, + { + "epoch": 4.35, + "learning_rate": 1.2590640087270232e-05, + "loss": 0.0284, + "step": 14324 + }, + { + "epoch": 4.35, + "learning_rate": 1.2579048550601245e-05, + "loss": 0.0131, + "step": 14325 + }, + { + "epoch": 4.35, + "learning_rate": 1.2567462118717602e-05, + "loss": 0.0099, + "step": 14326 + }, + { + "epoch": 4.35, + "learning_rate": 1.255588079204976e-05, + "loss": 0.0371, + "step": 14327 + }, + { + "epoch": 4.35, + "learning_rate": 1.2544304571027875e-05, + "loss": 0.0155, + "step": 14328 + }, + { + "epoch": 4.35, + "learning_rate": 1.2532733456082034e-05, + "loss": 0.0235, + "step": 14329 + }, + { + "epoch": 4.35, + "learning_rate": 1.2521167447641977e-05, + "loss": 0.0481, + "step": 14330 + }, + { + "epoch": 4.35, + "learning_rate": 1.2509606546137413e-05, + "loss": 0.0038, + "step": 14331 + }, + { + "epoch": 4.35, + "learning_rate": 1.2498050751997763e-05, + "loss": 0.0216, + "step": 14332 + }, + { + "epoch": 4.35, + "learning_rate": 1.2486500065652333e-05, + "loss": 0.0109, + "step": 14333 + }, + { + "epoch": 4.35, + "learning_rate": 1.2474954487530164e-05, + "loss": 0.0193, + "step": 14334 + }, + { + "epoch": 4.35, + "learning_rate": 1.246341401806013e-05, + "loss": 0.0118, + "step": 14335 + }, + { + "epoch": 4.35, + "learning_rate": 1.2451878657670954e-05, + "loss": 0.0166, + "step": 14336 + }, + { + "epoch": 4.35, + "learning_rate": 1.2440348406791112e-05, + "loss": 0.0279, + "step": 14337 + }, + { + "epoch": 4.35, + "learning_rate": 1.2428823265848958e-05, + "loss": 0.0303, + "step": 14338 + }, + { + "epoch": 4.35, + "learning_rate": 1.2417303235272602e-05, + "loss": 0.018, + "step": 14339 + }, + { + "epoch": 4.35, + "learning_rate": 1.2405788315489984e-05, + "loss": 0.0318, + "step": 14340 + }, + { + "epoch": 4.35, + "learning_rate": 1.2394278506928796e-05, + "loss": 0.0302, + "step": 14341 + }, + { + "epoch": 4.35, + "learning_rate": 1.2382773810016661e-05, + "loss": 0.0258, + "step": 14342 + }, + { + "epoch": 4.35, + "learning_rate": 1.2371274225180889e-05, + "loss": 0.0441, + "step": 14343 + }, + { + "epoch": 4.36, + "learning_rate": 1.235977975284872e-05, + "loss": 0.0222, + "step": 14344 + }, + { + "epoch": 4.36, + "learning_rate": 1.2348290393447113e-05, + "loss": 0.0131, + "step": 14345 + }, + { + "epoch": 4.36, + "learning_rate": 1.2336806147402828e-05, + "loss": 0.0253, + "step": 14346 + }, + { + "epoch": 4.36, + "learning_rate": 1.2325327015142522e-05, + "loss": 0.0459, + "step": 14347 + }, + { + "epoch": 4.36, + "learning_rate": 1.2313852997092572e-05, + "loss": 0.0051, + "step": 14348 + }, + { + "epoch": 4.36, + "learning_rate": 1.2302384093679236e-05, + "loss": 0.025, + "step": 14349 + }, + { + "epoch": 4.36, + "learning_rate": 1.2290920305328556e-05, + "loss": 0.03, + "step": 14350 + }, + { + "epoch": 4.36, + "learning_rate": 1.2279461632466308e-05, + "loss": 0.0281, + "step": 14351 + }, + { + "epoch": 4.36, + "learning_rate": 1.2268008075518237e-05, + "loss": 0.0292, + "step": 14352 + }, + { + "epoch": 4.36, + "learning_rate": 1.2256559634909736e-05, + "loss": 0.0229, + "step": 14353 + }, + { + "epoch": 4.36, + "learning_rate": 1.2245116311066178e-05, + "loss": 0.0264, + "step": 14354 + }, + { + "epoch": 4.36, + "learning_rate": 1.2233678104412526e-05, + "loss": 0.0238, + "step": 14355 + }, + { + "epoch": 4.36, + "learning_rate": 1.2222245015373771e-05, + "loss": 0.0071, + "step": 14356 + }, + { + "epoch": 4.36, + "learning_rate": 1.221081704437456e-05, + "loss": 0.0124, + "step": 14357 + }, + { + "epoch": 4.36, + "learning_rate": 1.2199394191839451e-05, + "loss": 0.0282, + "step": 14358 + }, + { + "epoch": 4.36, + "learning_rate": 1.2187976458192739e-05, + "loss": 0.0517, + "step": 14359 + }, + { + "epoch": 4.36, + "learning_rate": 1.2176563843858566e-05, + "loss": 0.0085, + "step": 14360 + }, + { + "epoch": 4.36, + "learning_rate": 1.2165156349260896e-05, + "loss": 0.0305, + "step": 14361 + }, + { + "epoch": 4.36, + "learning_rate": 1.2153753974823455e-05, + "loss": 0.0444, + "step": 14362 + }, + { + "epoch": 4.36, + "learning_rate": 1.2142356720969837e-05, + "loss": 0.0284, + "step": 14363 + }, + { + "epoch": 4.36, + "learning_rate": 1.2130964588123405e-05, + "loss": 0.0232, + "step": 14364 + }, + { + "epoch": 4.36, + "learning_rate": 1.2119577576707334e-05, + "loss": 0.0256, + "step": 14365 + }, + { + "epoch": 4.36, + "learning_rate": 1.2108195687144623e-05, + "loss": 0.0159, + "step": 14366 + }, + { + "epoch": 4.36, + "learning_rate": 1.2096818919858065e-05, + "loss": 0.0167, + "step": 14367 + }, + { + "epoch": 4.36, + "learning_rate": 1.2085447275270305e-05, + "loss": 0.0106, + "step": 14368 + }, + { + "epoch": 4.36, + "learning_rate": 1.2074080753803723e-05, + "loss": 0.0269, + "step": 14369 + }, + { + "epoch": 4.36, + "learning_rate": 1.206271935588063e-05, + "loss": 0.0225, + "step": 14370 + }, + { + "epoch": 4.36, + "learning_rate": 1.2051363081922972e-05, + "loss": 0.0193, + "step": 14371 + }, + { + "epoch": 4.36, + "learning_rate": 1.2040011932352662e-05, + "loss": 0.0406, + "step": 14372 + }, + { + "epoch": 4.36, + "learning_rate": 1.2028665907591329e-05, + "loss": 0.0336, + "step": 14373 + }, + { + "epoch": 4.36, + "learning_rate": 1.2017325008060468e-05, + "loss": 0.0078, + "step": 14374 + }, + { + "epoch": 4.36, + "learning_rate": 1.2005989234181362e-05, + "loss": 0.0166, + "step": 14375 + }, + { + "epoch": 4.36, + "learning_rate": 1.1994658586375072e-05, + "loss": 0.0432, + "step": 14376 + }, + { + "epoch": 4.37, + "learning_rate": 1.1983333065062528e-05, + "loss": 0.0141, + "step": 14377 + }, + { + "epoch": 4.37, + "learning_rate": 1.1972012670664427e-05, + "loss": 0.0217, + "step": 14378 + }, + { + "epoch": 4.37, + "learning_rate": 1.19606974036013e-05, + "loss": 0.0234, + "step": 14379 + }, + { + "epoch": 4.37, + "learning_rate": 1.1949387264293476e-05, + "loss": 0.0184, + "step": 14380 + }, + { + "epoch": 4.37, + "learning_rate": 1.193808225316107e-05, + "loss": 0.0226, + "step": 14381 + }, + { + "epoch": 4.37, + "learning_rate": 1.1926782370624043e-05, + "loss": 0.0282, + "step": 14382 + }, + { + "epoch": 4.37, + "learning_rate": 1.1915487617102165e-05, + "loss": 0.0251, + "step": 14383 + }, + { + "epoch": 4.37, + "learning_rate": 1.1904197993014963e-05, + "loss": 0.0141, + "step": 14384 + }, + { + "epoch": 4.37, + "learning_rate": 1.1892913498781887e-05, + "loss": 0.0198, + "step": 14385 + }, + { + "epoch": 4.37, + "learning_rate": 1.1881634134822066e-05, + "loss": 0.0218, + "step": 14386 + }, + { + "epoch": 4.37, + "learning_rate": 1.1870359901554483e-05, + "loss": 0.0063, + "step": 14387 + }, + { + "epoch": 4.37, + "learning_rate": 1.1859090799397985e-05, + "loss": 0.0192, + "step": 14388 + }, + { + "epoch": 4.37, + "learning_rate": 1.1847826828771157e-05, + "loss": 0.0341, + "step": 14389 + }, + { + "epoch": 4.37, + "learning_rate": 1.1836567990092461e-05, + "loss": 0.0302, + "step": 14390 + }, + { + "epoch": 4.37, + "learning_rate": 1.1825314283780097e-05, + "loss": 0.0217, + "step": 14391 + }, + { + "epoch": 4.37, + "learning_rate": 1.181406571025208e-05, + "loss": 0.0385, + "step": 14392 + }, + { + "epoch": 4.37, + "learning_rate": 1.1802822269926327e-05, + "loss": 0.0064, + "step": 14393 + }, + { + "epoch": 4.37, + "learning_rate": 1.1791583963220452e-05, + "loss": 0.0122, + "step": 14394 + }, + { + "epoch": 4.37, + "learning_rate": 1.1780350790551974e-05, + "loss": 0.0198, + "step": 14395 + }, + { + "epoch": 4.37, + "learning_rate": 1.1769122752338107e-05, + "loss": 0.0266, + "step": 14396 + }, + { + "epoch": 4.37, + "learning_rate": 1.1757899848995984e-05, + "loss": 0.0161, + "step": 14397 + }, + { + "epoch": 4.37, + "learning_rate": 1.1746682080942455e-05, + "loss": 0.0292, + "step": 14398 + }, + { + "epoch": 4.37, + "learning_rate": 1.1735469448594304e-05, + "loss": 0.0447, + "step": 14399 + }, + { + "epoch": 4.37, + "learning_rate": 1.1724261952367997e-05, + "loss": 0.0355, + "step": 14400 + }, + { + "epoch": 4.37, + "learning_rate": 1.1713059592679852e-05, + "loss": 0.03, + "step": 14401 + }, + { + "epoch": 4.37, + "learning_rate": 1.1701862369946036e-05, + "loss": 0.0155, + "step": 14402 + }, + { + "epoch": 4.37, + "learning_rate": 1.1690670284582448e-05, + "loss": 0.0351, + "step": 14403 + }, + { + "epoch": 4.37, + "learning_rate": 1.1679483337004908e-05, + "loss": 0.0345, + "step": 14404 + }, + { + "epoch": 4.37, + "learning_rate": 1.1668301527628931e-05, + "loss": 0.0215, + "step": 14405 + }, + { + "epoch": 4.37, + "learning_rate": 1.1657124856869905e-05, + "loss": 0.021, + "step": 14406 + }, + { + "epoch": 4.37, + "learning_rate": 1.1645953325142977e-05, + "loss": 0.0347, + "step": 14407 + }, + { + "epoch": 4.37, + "learning_rate": 1.1634786932863166e-05, + "loss": 0.0224, + "step": 14408 + }, + { + "epoch": 4.37, + "learning_rate": 1.1623625680445292e-05, + "loss": 0.0079, + "step": 14409 + }, + { + "epoch": 4.38, + "learning_rate": 1.1612469568303923e-05, + "loss": 0.002, + "step": 14410 + }, + { + "epoch": 4.38, + "learning_rate": 1.1601318596853509e-05, + "loss": 0.0185, + "step": 14411 + }, + { + "epoch": 4.38, + "learning_rate": 1.1590172766508222e-05, + "loss": 0.0112, + "step": 14412 + }, + { + "epoch": 4.38, + "learning_rate": 1.1579032077682177e-05, + "loss": 0.0086, + "step": 14413 + }, + { + "epoch": 4.38, + "learning_rate": 1.1567896530789128e-05, + "loss": 0.0152, + "step": 14414 + }, + { + "epoch": 4.38, + "learning_rate": 1.1556766126242812e-05, + "loss": 0.0292, + "step": 14415 + }, + { + "epoch": 4.38, + "learning_rate": 1.1545640864456645e-05, + "loss": 0.0312, + "step": 14416 + }, + { + "epoch": 4.38, + "learning_rate": 1.1534520745843883e-05, + "loss": 0.0044, + "step": 14417 + }, + { + "epoch": 4.38, + "learning_rate": 1.1523405770817662e-05, + "loss": 0.0214, + "step": 14418 + }, + { + "epoch": 4.38, + "learning_rate": 1.15122959397908e-05, + "loss": 0.0336, + "step": 14419 + }, + { + "epoch": 4.38, + "learning_rate": 1.1501191253176084e-05, + "loss": 0.0133, + "step": 14420 + }, + { + "epoch": 4.38, + "learning_rate": 1.1490091711385919e-05, + "loss": 0.0306, + "step": 14421 + }, + { + "epoch": 4.38, + "learning_rate": 1.1478997314832705e-05, + "loss": 0.0133, + "step": 14422 + }, + { + "epoch": 4.38, + "learning_rate": 1.14679080639285e-05, + "loss": 0.0266, + "step": 14423 + }, + { + "epoch": 4.38, + "learning_rate": 1.1456823959085287e-05, + "loss": 0.0302, + "step": 14424 + }, + { + "epoch": 4.38, + "learning_rate": 1.144574500071479e-05, + "loss": 0.0291, + "step": 14425 + }, + { + "epoch": 4.38, + "learning_rate": 1.1434671189228544e-05, + "loss": 0.0313, + "step": 14426 + }, + { + "epoch": 4.38, + "learning_rate": 1.1423602525037939e-05, + "loss": 0.0229, + "step": 14427 + }, + { + "epoch": 4.38, + "learning_rate": 1.141253900855411e-05, + "loss": 0.0629, + "step": 14428 + }, + { + "epoch": 4.38, + "learning_rate": 1.1401480640188082e-05, + "loss": 0.0181, + "step": 14429 + }, + { + "epoch": 4.38, + "learning_rate": 1.1390427420350574e-05, + "loss": 0.0132, + "step": 14430 + }, + { + "epoch": 4.38, + "learning_rate": 1.1379379349452256e-05, + "loss": 0.0123, + "step": 14431 + }, + { + "epoch": 4.38, + "learning_rate": 1.1368336427903485e-05, + "loss": 0.0403, + "step": 14432 + }, + { + "epoch": 4.38, + "learning_rate": 1.1357298656114467e-05, + "loss": 0.0272, + "step": 14433 + }, + { + "epoch": 4.38, + "learning_rate": 1.1346266034495255e-05, + "loss": 0.0242, + "step": 14434 + }, + { + "epoch": 4.38, + "learning_rate": 1.1335238563455624e-05, + "loss": 0.0182, + "step": 14435 + }, + { + "epoch": 4.38, + "learning_rate": 1.132421624340531e-05, + "loss": 0.033, + "step": 14436 + }, + { + "epoch": 4.38, + "learning_rate": 1.1313199074753654e-05, + "loss": 0.0303, + "step": 14437 + }, + { + "epoch": 4.38, + "learning_rate": 1.1302187057909978e-05, + "loss": 0.0077, + "step": 14438 + }, + { + "epoch": 4.38, + "learning_rate": 1.1291180193283306e-05, + "loss": 0.0313, + "step": 14439 + }, + { + "epoch": 4.38, + "learning_rate": 1.1280178481282542e-05, + "loss": 0.0033, + "step": 14440 + }, + { + "epoch": 4.38, + "learning_rate": 1.1269181922316362e-05, + "loss": 0.0383, + "step": 14441 + }, + { + "epoch": 4.39, + "learning_rate": 1.125819051679322e-05, + "loss": 0.0548, + "step": 14442 + }, + { + "epoch": 4.39, + "learning_rate": 1.1247204265121474e-05, + "loss": 0.0199, + "step": 14443 + }, + { + "epoch": 4.39, + "learning_rate": 1.1236223167709163e-05, + "loss": 0.0346, + "step": 14444 + }, + { + "epoch": 4.39, + "learning_rate": 1.1225247224964279e-05, + "loss": 0.02, + "step": 14445 + }, + { + "epoch": 4.39, + "learning_rate": 1.1214276437294511e-05, + "loss": 0.0046, + "step": 14446 + }, + { + "epoch": 4.39, + "learning_rate": 1.1203310805107368e-05, + "loss": 0.045, + "step": 14447 + }, + { + "epoch": 4.39, + "learning_rate": 1.1192350328810207e-05, + "loss": 0.0285, + "step": 14448 + }, + { + "epoch": 4.39, + "learning_rate": 1.1181395008810168e-05, + "loss": 0.0159, + "step": 14449 + }, + { + "epoch": 4.39, + "learning_rate": 1.1170444845514242e-05, + "loss": 0.043, + "step": 14450 + }, + { + "epoch": 4.39, + "learning_rate": 1.1159499839329189e-05, + "loss": 0.0332, + "step": 14451 + }, + { + "epoch": 4.39, + "learning_rate": 1.1148559990661566e-05, + "loss": 0.0214, + "step": 14452 + }, + { + "epoch": 4.39, + "learning_rate": 1.1137625299917729e-05, + "loss": 0.029, + "step": 14453 + }, + { + "epoch": 4.39, + "learning_rate": 1.112669576750394e-05, + "loss": 0.0482, + "step": 14454 + }, + { + "epoch": 4.39, + "learning_rate": 1.1115771393826122e-05, + "loss": 0.0414, + "step": 14455 + }, + { + "epoch": 4.39, + "learning_rate": 1.1104852179290152e-05, + "loss": 0.0109, + "step": 14456 + }, + { + "epoch": 4.39, + "learning_rate": 1.109393812430162e-05, + "loss": 0.0084, + "step": 14457 + }, + { + "epoch": 4.39, + "learning_rate": 1.1083029229265921e-05, + "loss": 0.0264, + "step": 14458 + }, + { + "epoch": 4.39, + "learning_rate": 1.1072125494588347e-05, + "loss": 0.0117, + "step": 14459 + }, + { + "epoch": 4.39, + "learning_rate": 1.1061226920673871e-05, + "loss": 0.0294, + "step": 14460 + }, + { + "epoch": 4.39, + "learning_rate": 1.1050333507927439e-05, + "loss": 0.0102, + "step": 14461 + }, + { + "epoch": 4.39, + "learning_rate": 1.1039445256753594e-05, + "loss": 0.0494, + "step": 14462 + }, + { + "epoch": 4.39, + "learning_rate": 1.1028562167556892e-05, + "loss": 0.052, + "step": 14463 + }, + { + "epoch": 4.39, + "learning_rate": 1.1017684240741549e-05, + "loss": 0.01, + "step": 14464 + }, + { + "epoch": 4.39, + "learning_rate": 1.1006811476711702e-05, + "loss": 0.0178, + "step": 14465 + }, + { + "epoch": 4.39, + "learning_rate": 1.0995943875871199e-05, + "loss": 0.0354, + "step": 14466 + }, + { + "epoch": 4.39, + "learning_rate": 1.0985081438623749e-05, + "loss": 0.0266, + "step": 14467 + }, + { + "epoch": 4.39, + "learning_rate": 1.097422416537288e-05, + "loss": 0.0363, + "step": 14468 + }, + { + "epoch": 4.39, + "learning_rate": 1.0963372056521869e-05, + "loss": 0.0304, + "step": 14469 + }, + { + "epoch": 4.39, + "learning_rate": 1.0952525112473892e-05, + "loss": 0.0133, + "step": 14470 + }, + { + "epoch": 4.39, + "learning_rate": 1.0941683333631845e-05, + "loss": 0.0615, + "step": 14471 + }, + { + "epoch": 4.39, + "learning_rate": 1.0930846720398474e-05, + "loss": 0.0214, + "step": 14472 + }, + { + "epoch": 4.39, + "learning_rate": 1.0920015273176336e-05, + "loss": 0.0457, + "step": 14473 + }, + { + "epoch": 4.39, + "learning_rate": 1.0909188992367763e-05, + "loss": 0.012, + "step": 14474 + }, + { + "epoch": 4.4, + "learning_rate": 1.0898367878374964e-05, + "loss": 0.0156, + "step": 14475 + }, + { + "epoch": 4.4, + "learning_rate": 1.0887551931599886e-05, + "loss": 0.0278, + "step": 14476 + }, + { + "epoch": 4.4, + "learning_rate": 1.0876741152444307e-05, + "loss": 0.0082, + "step": 14477 + }, + { + "epoch": 4.4, + "learning_rate": 1.086593554130979e-05, + "loss": 0.0237, + "step": 14478 + }, + { + "epoch": 4.4, + "learning_rate": 1.0855135098597794e-05, + "loss": 0.0462, + "step": 14479 + }, + { + "epoch": 4.4, + "learning_rate": 1.0844339824709452e-05, + "loss": 0.0063, + "step": 14480 + }, + { + "epoch": 4.4, + "learning_rate": 1.0833549720045859e-05, + "loss": 0.0163, + "step": 14481 + }, + { + "epoch": 4.4, + "learning_rate": 1.0822764785007776e-05, + "loss": 0.0218, + "step": 14482 + }, + { + "epoch": 4.4, + "learning_rate": 1.0811985019995834e-05, + "loss": 0.0284, + "step": 14483 + }, + { + "epoch": 4.4, + "learning_rate": 1.0801210425410495e-05, + "loss": 0.0234, + "step": 14484 + }, + { + "epoch": 4.4, + "learning_rate": 1.079044100165199e-05, + "loss": 0.0299, + "step": 14485 + }, + { + "epoch": 4.4, + "learning_rate": 1.0779676749120398e-05, + "loss": 0.0088, + "step": 14486 + }, + { + "epoch": 4.4, + "learning_rate": 1.0768917668215516e-05, + "loss": 0.0208, + "step": 14487 + }, + { + "epoch": 4.4, + "learning_rate": 1.0758163759337073e-05, + "loss": 0.0188, + "step": 14488 + }, + { + "epoch": 4.4, + "learning_rate": 1.0747415022884503e-05, + "loss": 0.0122, + "step": 14489 + }, + { + "epoch": 4.4, + "learning_rate": 1.07366714592571e-05, + "loss": 0.008, + "step": 14490 + }, + { + "epoch": 4.4, + "learning_rate": 1.0725933068853997e-05, + "loss": 0.0109, + "step": 14491 + }, + { + "epoch": 4.4, + "learning_rate": 1.0715199852074058e-05, + "loss": 0.0172, + "step": 14492 + }, + { + "epoch": 4.4, + "learning_rate": 1.0704471809316011e-05, + "loss": 0.0215, + "step": 14493 + }, + { + "epoch": 4.4, + "learning_rate": 1.0693748940978309e-05, + "loss": 0.016, + "step": 14494 + }, + { + "epoch": 4.4, + "learning_rate": 1.0683031247459362e-05, + "loss": 0.0319, + "step": 14495 + }, + { + "epoch": 4.4, + "learning_rate": 1.0672318729157237e-05, + "loss": 0.0179, + "step": 14496 + }, + { + "epoch": 4.4, + "learning_rate": 1.0661611386469915e-05, + "loss": 0.0222, + "step": 14497 + }, + { + "epoch": 4.4, + "learning_rate": 1.0650909219795112e-05, + "loss": 0.0352, + "step": 14498 + }, + { + "epoch": 4.4, + "learning_rate": 1.0640212229530392e-05, + "loss": 0.015, + "step": 14499 + }, + { + "epoch": 4.4, + "learning_rate": 1.0629520416073123e-05, + "loss": 0.0446, + "step": 14500 + }, + { + "epoch": 4.4, + "learning_rate": 1.0618833779820452e-05, + "loss": 0.0318, + "step": 14501 + }, + { + "epoch": 4.4, + "learning_rate": 1.0608152321169428e-05, + "loss": 0.0237, + "step": 14502 + }, + { + "epoch": 4.4, + "learning_rate": 1.0597476040516734e-05, + "loss": 0.0027, + "step": 14503 + }, + { + "epoch": 4.4, + "learning_rate": 1.0586804938259035e-05, + "loss": 0.032, + "step": 14504 + }, + { + "epoch": 4.4, + "learning_rate": 1.0576139014792667e-05, + "loss": 0.0204, + "step": 14505 + }, + { + "epoch": 4.4, + "learning_rate": 1.0565478270513928e-05, + "loss": 0.0065, + "step": 14506 + }, + { + "epoch": 4.4, + "learning_rate": 1.0554822705818766e-05, + "loss": 0.0102, + "step": 14507 + }, + { + "epoch": 4.41, + "learning_rate": 1.0544172321103e-05, + "loss": 0.0227, + "step": 14508 + }, + { + "epoch": 4.41, + "learning_rate": 1.0533527116762296e-05, + "loss": 0.0208, + "step": 14509 + }, + { + "epoch": 4.41, + "learning_rate": 1.0522887093192073e-05, + "loss": 0.0178, + "step": 14510 + }, + { + "epoch": 4.41, + "learning_rate": 1.0512252250787595e-05, + "loss": 0.035, + "step": 14511 + }, + { + "epoch": 4.41, + "learning_rate": 1.0501622589943897e-05, + "loss": 0.0256, + "step": 14512 + }, + { + "epoch": 4.41, + "learning_rate": 1.0490998111055816e-05, + "loss": 0.0276, + "step": 14513 + }, + { + "epoch": 4.41, + "learning_rate": 1.0480378814518082e-05, + "loss": 0.0169, + "step": 14514 + }, + { + "epoch": 4.41, + "learning_rate": 1.0469764700725115e-05, + "loss": 0.016, + "step": 14515 + }, + { + "epoch": 4.41, + "learning_rate": 1.0459155770071247e-05, + "loss": 0.0268, + "step": 14516 + }, + { + "epoch": 4.41, + "learning_rate": 1.0448552022950534e-05, + "loss": 0.0315, + "step": 14517 + }, + { + "epoch": 4.41, + "learning_rate": 1.0437953459756875e-05, + "loss": 0.0104, + "step": 14518 + }, + { + "epoch": 4.41, + "learning_rate": 1.0427360080883972e-05, + "loss": 0.022, + "step": 14519 + }, + { + "epoch": 4.41, + "learning_rate": 1.041677188672536e-05, + "loss": 0.0285, + "step": 14520 + }, + { + "epoch": 4.41, + "learning_rate": 1.0406188877674343e-05, + "loss": 0.0091, + "step": 14521 + }, + { + "epoch": 4.41, + "learning_rate": 1.039561105412407e-05, + "loss": 0.0309, + "step": 14522 + }, + { + "epoch": 4.41, + "learning_rate": 1.0385038416467445e-05, + "loss": 0.0119, + "step": 14523 + }, + { + "epoch": 4.41, + "learning_rate": 1.037447096509722e-05, + "loss": 0.0288, + "step": 14524 + }, + { + "epoch": 4.41, + "learning_rate": 1.0363908700405965e-05, + "loss": 0.0237, + "step": 14525 + }, + { + "epoch": 4.41, + "learning_rate": 1.0353351622786e-05, + "loss": 0.017, + "step": 14526 + }, + { + "epoch": 4.41, + "learning_rate": 1.0342799732629558e-05, + "loss": 0.0209, + "step": 14527 + }, + { + "epoch": 4.41, + "learning_rate": 1.033225303032853e-05, + "loss": 0.0616, + "step": 14528 + }, + { + "epoch": 4.41, + "learning_rate": 1.0321711516274733e-05, + "loss": 0.022, + "step": 14529 + }, + { + "epoch": 4.41, + "learning_rate": 1.0311175190859739e-05, + "loss": 0.0575, + "step": 14530 + }, + { + "epoch": 4.41, + "learning_rate": 1.0300644054474948e-05, + "loss": 0.0171, + "step": 14531 + }, + { + "epoch": 4.41, + "learning_rate": 1.029011810751162e-05, + "loss": 0.015, + "step": 14532 + }, + { + "epoch": 4.41, + "learning_rate": 1.0279597350360653e-05, + "loss": 0.0368, + "step": 14533 + }, + { + "epoch": 4.41, + "learning_rate": 1.0269081783412952e-05, + "loss": 0.0196, + "step": 14534 + }, + { + "epoch": 4.41, + "learning_rate": 1.0258571407059073e-05, + "loss": 0.0427, + "step": 14535 + }, + { + "epoch": 4.41, + "learning_rate": 1.0248066221689488e-05, + "loss": 0.0577, + "step": 14536 + }, + { + "epoch": 4.41, + "learning_rate": 1.0237566227694449e-05, + "loss": 0.0186, + "step": 14537 + }, + { + "epoch": 4.41, + "learning_rate": 1.0227071425463929e-05, + "loss": 0.0178, + "step": 14538 + }, + { + "epoch": 4.41, + "learning_rate": 1.0216581815387864e-05, + "loss": 0.0217, + "step": 14539 + }, + { + "epoch": 4.41, + "learning_rate": 1.0206097397855845e-05, + "loss": 0.0585, + "step": 14540 + }, + { + "epoch": 4.42, + "learning_rate": 1.0195618173257391e-05, + "loss": 0.024, + "step": 14541 + }, + { + "epoch": 4.42, + "learning_rate": 1.0185144141981743e-05, + "loss": 0.0404, + "step": 14542 + }, + { + "epoch": 4.42, + "learning_rate": 1.0174675304417989e-05, + "loss": 0.0185, + "step": 14543 + }, + { + "epoch": 4.42, + "learning_rate": 1.0164211660955001e-05, + "loss": 0.015, + "step": 14544 + }, + { + "epoch": 4.42, + "learning_rate": 1.0153753211981485e-05, + "loss": 0.0262, + "step": 14545 + }, + { + "epoch": 4.42, + "learning_rate": 1.0143299957885947e-05, + "loss": 0.0353, + "step": 14546 + }, + { + "epoch": 4.42, + "learning_rate": 1.0132851899056693e-05, + "loss": 0.0266, + "step": 14547 + }, + { + "epoch": 4.42, + "learning_rate": 1.012240903588183e-05, + "loss": 0.0141, + "step": 14548 + }, + { + "epoch": 4.42, + "learning_rate": 1.0111971368749278e-05, + "loss": 0.0357, + "step": 14549 + }, + { + "epoch": 4.42, + "learning_rate": 1.0101538898046779e-05, + "loss": 0.0375, + "step": 14550 + }, + { + "epoch": 4.42, + "learning_rate": 1.009111162416184e-05, + "loss": 0.0111, + "step": 14551 + }, + { + "epoch": 4.42, + "learning_rate": 1.0080689547481885e-05, + "loss": 0.05, + "step": 14552 + }, + { + "epoch": 4.42, + "learning_rate": 1.0070272668393937e-05, + "loss": 0.0061, + "step": 14553 + }, + { + "epoch": 4.42, + "learning_rate": 1.0059860987285034e-05, + "loss": 0.0267, + "step": 14554 + }, + { + "epoch": 4.42, + "learning_rate": 1.0049454504541938e-05, + "loss": 0.0258, + "step": 14555 + }, + { + "epoch": 4.42, + "learning_rate": 1.0039053220551186e-05, + "loss": 0.0178, + "step": 14556 + }, + { + "epoch": 4.42, + "learning_rate": 1.0028657135699203e-05, + "loss": 0.021, + "step": 14557 + }, + { + "epoch": 4.42, + "learning_rate": 1.0018266250372132e-05, + "loss": 0.035, + "step": 14558 + }, + { + "epoch": 4.42, + "learning_rate": 1.0007880564955995e-05, + "loss": 0.0226, + "step": 14559 + }, + { + "epoch": 4.42, + "learning_rate": 9.997500079836535e-06, + "loss": 0.0171, + "step": 14560 + }, + { + "epoch": 4.42, + "learning_rate": 9.98712479539941e-06, + "loss": 0.0212, + "step": 14561 + }, + { + "epoch": 4.42, + "learning_rate": 9.97675471203001e-06, + "loss": 0.0227, + "step": 14562 + }, + { + "epoch": 4.42, + "learning_rate": 9.966389830113564e-06, + "loss": 0.0445, + "step": 14563 + }, + { + "epoch": 4.42, + "learning_rate": 9.95603015003511e-06, + "loss": 0.0081, + "step": 14564 + }, + { + "epoch": 4.42, + "learning_rate": 9.945675672179426e-06, + "loss": 0.027, + "step": 14565 + }, + { + "epoch": 4.42, + "learning_rate": 9.935326396931203e-06, + "loss": 0.0328, + "step": 14566 + }, + { + "epoch": 4.42, + "learning_rate": 9.924982324674852e-06, + "loss": 0.0282, + "step": 14567 + }, + { + "epoch": 4.42, + "learning_rate": 9.91464345579468e-06, + "loss": 0.0243, + "step": 14568 + }, + { + "epoch": 4.42, + "learning_rate": 9.904309790674664e-06, + "loss": 0.0196, + "step": 14569 + }, + { + "epoch": 4.42, + "learning_rate": 9.893981329698747e-06, + "loss": 0.0199, + "step": 14570 + }, + { + "epoch": 4.42, + "learning_rate": 9.883658073250522e-06, + "loss": 0.0266, + "step": 14571 + }, + { + "epoch": 4.42, + "learning_rate": 9.873340021713515e-06, + "loss": 0.0556, + "step": 14572 + }, + { + "epoch": 4.42, + "learning_rate": 9.863027175471072e-06, + "loss": 0.016, + "step": 14573 + }, + { + "epoch": 4.43, + "learning_rate": 9.852719534906166e-06, + "loss": 0.013, + "step": 14574 + }, + { + "epoch": 4.43, + "learning_rate": 9.842417100401779e-06, + "loss": 0.0287, + "step": 14575 + }, + { + "epoch": 4.43, + "learning_rate": 9.832119872340549e-06, + "loss": 0.0136, + "step": 14576 + }, + { + "epoch": 4.43, + "learning_rate": 9.821827851105057e-06, + "loss": 0.0682, + "step": 14577 + }, + { + "epoch": 4.43, + "learning_rate": 9.811541037077596e-06, + "loss": 0.0286, + "step": 14578 + }, + { + "epoch": 4.43, + "learning_rate": 9.801259430640263e-06, + "loss": 0.0284, + "step": 14579 + }, + { + "epoch": 4.43, + "learning_rate": 9.790983032175031e-06, + "loss": 0.0321, + "step": 14580 + }, + { + "epoch": 4.43, + "learning_rate": 9.7807118420636e-06, + "loss": 0.0228, + "step": 14581 + }, + { + "epoch": 4.43, + "learning_rate": 9.770445860687559e-06, + "loss": 0.0365, + "step": 14582 + }, + { + "epoch": 4.43, + "learning_rate": 9.760185088428225e-06, + "loss": 0.0094, + "step": 14583 + }, + { + "epoch": 4.43, + "learning_rate": 9.749929525666772e-06, + "loss": 0.0218, + "step": 14584 + }, + { + "epoch": 4.43, + "learning_rate": 9.739679172784149e-06, + "loss": 0.0282, + "step": 14585 + }, + { + "epoch": 4.43, + "learning_rate": 9.729434030161132e-06, + "loss": 0.0219, + "step": 14586 + }, + { + "epoch": 4.43, + "learning_rate": 9.719194098178302e-06, + "loss": 0.025, + "step": 14587 + }, + { + "epoch": 4.43, + "learning_rate": 9.708959377216036e-06, + "loss": 0.0481, + "step": 14588 + }, + { + "epoch": 4.43, + "learning_rate": 9.698729867654547e-06, + "loss": 0.0187, + "step": 14589 + }, + { + "epoch": 4.43, + "learning_rate": 9.6885055698738e-06, + "loss": 0.0349, + "step": 14590 + }, + { + "epoch": 4.43, + "learning_rate": 9.678286484253622e-06, + "loss": 0.0113, + "step": 14591 + }, + { + "epoch": 4.43, + "learning_rate": 9.668072611173577e-06, + "loss": 0.0122, + "step": 14592 + }, + { + "epoch": 4.43, + "learning_rate": 9.657863951013179e-06, + "loss": 0.0283, + "step": 14593 + }, + { + "epoch": 4.43, + "learning_rate": 9.647660504151521e-06, + "loss": 0.0222, + "step": 14594 + }, + { + "epoch": 4.43, + "learning_rate": 9.637462270967705e-06, + "loss": 0.0114, + "step": 14595 + }, + { + "epoch": 4.43, + "learning_rate": 9.627269251840592e-06, + "loss": 0.0322, + "step": 14596 + }, + { + "epoch": 4.43, + "learning_rate": 9.617081447148745e-06, + "loss": 0.0402, + "step": 14597 + }, + { + "epoch": 4.43, + "learning_rate": 9.606898857270712e-06, + "loss": 0.0363, + "step": 14598 + }, + { + "epoch": 4.43, + "learning_rate": 9.596721482584624e-06, + "loss": 0.0317, + "step": 14599 + }, + { + "epoch": 4.43, + "learning_rate": 9.586549323468645e-06, + "loss": 0.0305, + "step": 14600 + }, + { + "epoch": 4.43, + "learning_rate": 9.576382380300574e-06, + "loss": 0.02, + "step": 14601 + }, + { + "epoch": 4.43, + "learning_rate": 9.566220653458124e-06, + "loss": 0.0428, + "step": 14602 + }, + { + "epoch": 4.43, + "learning_rate": 9.556064143318759e-06, + "loss": 0.0248, + "step": 14603 + }, + { + "epoch": 4.43, + "learning_rate": 9.545912850259745e-06, + "loss": 0.0327, + "step": 14604 + }, + { + "epoch": 4.43, + "learning_rate": 9.535766774658198e-06, + "loss": 0.0323, + "step": 14605 + }, + { + "epoch": 4.43, + "learning_rate": 9.525625916891e-06, + "loss": 0.0195, + "step": 14606 + }, + { + "epoch": 4.44, + "learning_rate": 9.51549027733488e-06, + "loss": 0.0173, + "step": 14607 + }, + { + "epoch": 4.44, + "learning_rate": 9.50535985636634e-06, + "loss": 0.021, + "step": 14608 + }, + { + "epoch": 4.44, + "learning_rate": 9.495234654361678e-06, + "loss": 0.015, + "step": 14609 + }, + { + "epoch": 4.44, + "learning_rate": 9.485114671696992e-06, + "loss": 0.0164, + "step": 14610 + }, + { + "epoch": 4.44, + "learning_rate": 9.474999908748265e-06, + "loss": 0.0152, + "step": 14611 + }, + { + "epoch": 4.44, + "learning_rate": 9.464890365891181e-06, + "loss": 0.0329, + "step": 14612 + }, + { + "epoch": 4.44, + "learning_rate": 9.454786043501321e-06, + "loss": 0.0285, + "step": 14613 + }, + { + "epoch": 4.44, + "learning_rate": 9.444686941954022e-06, + "loss": 0.0203, + "step": 14614 + }, + { + "epoch": 4.44, + "learning_rate": 9.434593061624412e-06, + "loss": 0.0236, + "step": 14615 + }, + { + "epoch": 4.44, + "learning_rate": 9.424504402887478e-06, + "loss": 0.0348, + "step": 14616 + }, + { + "epoch": 4.44, + "learning_rate": 9.414420966117952e-06, + "loss": 0.0183, + "step": 14617 + }, + { + "epoch": 4.44, + "learning_rate": 9.404342751690437e-06, + "loss": 0.016, + "step": 14618 + }, + { + "epoch": 4.44, + "learning_rate": 9.394269759979312e-06, + "loss": 0.0063, + "step": 14619 + }, + { + "epoch": 4.44, + "learning_rate": 9.384201991358713e-06, + "loss": 0.0077, + "step": 14620 + }, + { + "epoch": 4.44, + "learning_rate": 9.374139446202678e-06, + "loss": 0.0786, + "step": 14621 + }, + { + "epoch": 4.44, + "learning_rate": 9.364082124884954e-06, + "loss": 0.0137, + "step": 14622 + }, + { + "epoch": 4.44, + "learning_rate": 9.354030027779192e-06, + "loss": 0.0123, + "step": 14623 + }, + { + "epoch": 4.44, + "learning_rate": 9.343983155258777e-06, + "loss": 0.0086, + "step": 14624 + }, + { + "epoch": 4.44, + "learning_rate": 9.333941507696913e-06, + "loss": 0.047, + "step": 14625 + }, + { + "epoch": 4.44, + "learning_rate": 9.323905085466615e-06, + "loss": 0.0058, + "step": 14626 + }, + { + "epoch": 4.44, + "learning_rate": 9.31387388894072e-06, + "loss": 0.0165, + "step": 14627 + }, + { + "epoch": 4.44, + "learning_rate": 9.30384791849183e-06, + "loss": 0.0286, + "step": 14628 + }, + { + "epoch": 4.44, + "learning_rate": 9.293827174492429e-06, + "loss": 0.0264, + "step": 14629 + }, + { + "epoch": 4.44, + "learning_rate": 9.283811657314737e-06, + "loss": 0.0233, + "step": 14630 + }, + { + "epoch": 4.44, + "learning_rate": 9.273801367330758e-06, + "loss": 0.054, + "step": 14631 + }, + { + "epoch": 4.44, + "learning_rate": 9.263796304912425e-06, + "loss": 0.0297, + "step": 14632 + }, + { + "epoch": 4.44, + "learning_rate": 9.253796470431308e-06, + "loss": 0.0125, + "step": 14633 + }, + { + "epoch": 4.44, + "learning_rate": 9.243801864258976e-06, + "loss": 0.046, + "step": 14634 + }, + { + "epoch": 4.44, + "learning_rate": 9.233812486766584e-06, + "loss": 0.0071, + "step": 14635 + }, + { + "epoch": 4.44, + "learning_rate": 9.223828338325284e-06, + "loss": 0.0274, + "step": 14636 + }, + { + "epoch": 4.44, + "learning_rate": 9.213849419305946e-06, + "loss": 0.0272, + "step": 14637 + }, + { + "epoch": 4.44, + "learning_rate": 9.203875730079224e-06, + "loss": 0.0196, + "step": 14638 + }, + { + "epoch": 4.44, + "learning_rate": 9.193907271015704e-06, + "loss": 0.007, + "step": 14639 + }, + { + "epoch": 4.45, + "learning_rate": 9.183944042485559e-06, + "loss": 0.0177, + "step": 14640 + }, + { + "epoch": 4.45, + "learning_rate": 9.173986044858972e-06, + "loss": 0.0237, + "step": 14641 + }, + { + "epoch": 4.45, + "learning_rate": 9.164033278505816e-06, + "loss": 0.0187, + "step": 14642 + }, + { + "epoch": 4.45, + "learning_rate": 9.154085743795847e-06, + "loss": 0.0284, + "step": 14643 + }, + { + "epoch": 4.45, + "learning_rate": 9.144143441098567e-06, + "loss": 0.0206, + "step": 14644 + }, + { + "epoch": 4.45, + "learning_rate": 9.134206370783282e-06, + "loss": 0.0085, + "step": 14645 + }, + { + "epoch": 4.45, + "learning_rate": 9.124274533219162e-06, + "loss": 0.0337, + "step": 14646 + }, + { + "epoch": 4.45, + "learning_rate": 9.114347928775112e-06, + "loss": 0.026, + "step": 14647 + }, + { + "epoch": 4.45, + "learning_rate": 9.104426557819905e-06, + "loss": 0.0537, + "step": 14648 + }, + { + "epoch": 4.45, + "learning_rate": 9.094510420722095e-06, + "loss": 0.024, + "step": 14649 + }, + { + "epoch": 4.45, + "learning_rate": 9.084599517850006e-06, + "loss": 0.0216, + "step": 14650 + }, + { + "epoch": 4.45, + "learning_rate": 9.07469384957179e-06, + "loss": 0.0269, + "step": 14651 + }, + { + "epoch": 4.45, + "learning_rate": 9.064793416255473e-06, + "loss": 0.0279, + "step": 14652 + }, + { + "epoch": 4.45, + "learning_rate": 9.054898218268757e-06, + "loss": 0.0323, + "step": 14653 + }, + { + "epoch": 4.45, + "learning_rate": 9.045008255979285e-06, + "loss": 0.0067, + "step": 14654 + }, + { + "epoch": 4.45, + "learning_rate": 9.035123529754413e-06, + "loss": 0.0448, + "step": 14655 + }, + { + "epoch": 4.45, + "learning_rate": 9.025244039961293e-06, + "loss": 0.0259, + "step": 14656 + }, + { + "epoch": 4.45, + "learning_rate": 9.015369786966987e-06, + "loss": 0.0349, + "step": 14657 + }, + { + "epoch": 4.45, + "learning_rate": 9.00550077113823e-06, + "loss": 0.0062, + "step": 14658 + }, + { + "epoch": 4.45, + "learning_rate": 8.995636992841666e-06, + "loss": 0.0344, + "step": 14659 + }, + { + "epoch": 4.45, + "learning_rate": 8.985778452443715e-06, + "loss": 0.0472, + "step": 14660 + }, + { + "epoch": 4.45, + "learning_rate": 8.975925150310537e-06, + "loss": 0.018, + "step": 14661 + }, + { + "epoch": 4.45, + "learning_rate": 8.966077086808222e-06, + "loss": 0.0102, + "step": 14662 + }, + { + "epoch": 4.45, + "learning_rate": 8.956234262302541e-06, + "loss": 0.0148, + "step": 14663 + }, + { + "epoch": 4.45, + "learning_rate": 8.94639667715919e-06, + "loss": 0.0158, + "step": 14664 + }, + { + "epoch": 4.45, + "learning_rate": 8.936564331743523e-06, + "loss": 0.0276, + "step": 14665 + }, + { + "epoch": 4.45, + "learning_rate": 8.92673722642085e-06, + "loss": 0.0343, + "step": 14666 + }, + { + "epoch": 4.45, + "learning_rate": 8.916915361556176e-06, + "loss": 0.0102, + "step": 14667 + }, + { + "epoch": 4.45, + "learning_rate": 8.907098737514378e-06, + "loss": 0.0486, + "step": 14668 + }, + { + "epoch": 4.45, + "learning_rate": 8.897287354660132e-06, + "loss": 0.0199, + "step": 14669 + }, + { + "epoch": 4.45, + "learning_rate": 8.887481213357844e-06, + "loss": 0.0093, + "step": 14670 + }, + { + "epoch": 4.45, + "learning_rate": 8.877680313971857e-06, + "loss": 0.0381, + "step": 14671 + }, + { + "epoch": 4.45, + "learning_rate": 8.867884656866181e-06, + "loss": 0.0167, + "step": 14672 + }, + { + "epoch": 4.46, + "learning_rate": 8.85809424240474e-06, + "loss": 0.0191, + "step": 14673 + }, + { + "epoch": 4.46, + "learning_rate": 8.84830907095121e-06, + "loss": 0.0188, + "step": 14674 + }, + { + "epoch": 4.46, + "learning_rate": 8.838529142869067e-06, + "loss": 0.0087, + "step": 14675 + }, + { + "epoch": 4.46, + "learning_rate": 8.828754458521586e-06, + "loss": 0.0302, + "step": 14676 + }, + { + "epoch": 4.46, + "learning_rate": 8.818985018271912e-06, + "loss": 0.0244, + "step": 14677 + }, + { + "epoch": 4.46, + "learning_rate": 8.809220822482937e-06, + "loss": 0.0324, + "step": 14678 + }, + { + "epoch": 4.46, + "learning_rate": 8.79946187151737e-06, + "loss": 0.0193, + "step": 14679 + }, + { + "epoch": 4.46, + "learning_rate": 8.789708165737758e-06, + "loss": 0.0274, + "step": 14680 + }, + { + "epoch": 4.46, + "learning_rate": 8.779959705506356e-06, + "loss": 0.0264, + "step": 14681 + }, + { + "epoch": 4.46, + "learning_rate": 8.770216491185328e-06, + "loss": 0.0232, + "step": 14682 + }, + { + "epoch": 4.46, + "learning_rate": 8.7604785231366e-06, + "loss": 0.0618, + "step": 14683 + }, + { + "epoch": 4.46, + "learning_rate": 8.750745801721932e-06, + "loss": 0.0327, + "step": 14684 + }, + { + "epoch": 4.46, + "learning_rate": 8.74101832730285e-06, + "loss": 0.0237, + "step": 14685 + }, + { + "epoch": 4.46, + "learning_rate": 8.731296100240687e-06, + "loss": 0.0034, + "step": 14686 + }, + { + "epoch": 4.46, + "learning_rate": 8.721579120896615e-06, + "loss": 0.0171, + "step": 14687 + }, + { + "epoch": 4.46, + "learning_rate": 8.71186738963155e-06, + "loss": 0.0145, + "step": 14688 + }, + { + "epoch": 4.46, + "learning_rate": 8.702160906806333e-06, + "loss": 0.0126, + "step": 14689 + }, + { + "epoch": 4.46, + "learning_rate": 8.692459672781476e-06, + "loss": 0.0195, + "step": 14690 + }, + { + "epoch": 4.46, + "learning_rate": 8.68276368791736e-06, + "loss": 0.0229, + "step": 14691 + }, + { + "epoch": 4.46, + "learning_rate": 8.673072952574145e-06, + "loss": 0.0191, + "step": 14692 + }, + { + "epoch": 4.46, + "learning_rate": 8.663387467111843e-06, + "loss": 0.0362, + "step": 14693 + }, + { + "epoch": 4.46, + "learning_rate": 8.653707231890217e-06, + "loss": 0.0122, + "step": 14694 + }, + { + "epoch": 4.46, + "learning_rate": 8.644032247268895e-06, + "loss": 0.0302, + "step": 14695 + }, + { + "epoch": 4.46, + "learning_rate": 8.634362513607256e-06, + "loss": 0.0137, + "step": 14696 + }, + { + "epoch": 4.46, + "learning_rate": 8.624698031264466e-06, + "loss": 0.0161, + "step": 14697 + }, + { + "epoch": 4.46, + "learning_rate": 8.6150388005996e-06, + "loss": 0.0116, + "step": 14698 + }, + { + "epoch": 4.46, + "learning_rate": 8.605384821971406e-06, + "loss": 0.0161, + "step": 14699 + }, + { + "epoch": 4.46, + "learning_rate": 8.595736095738548e-06, + "loss": 0.0294, + "step": 14700 + }, + { + "epoch": 4.46, + "learning_rate": 8.586092622259421e-06, + "loss": 0.0185, + "step": 14701 + }, + { + "epoch": 4.46, + "learning_rate": 8.576454401892257e-06, + "loss": 0.0523, + "step": 14702 + }, + { + "epoch": 4.46, + "learning_rate": 8.566821434995103e-06, + "loss": 0.0213, + "step": 14703 + }, + { + "epoch": 4.46, + "learning_rate": 8.55719372192577e-06, + "loss": 0.0235, + "step": 14704 + }, + { + "epoch": 4.46, + "learning_rate": 8.547571263041941e-06, + "loss": 0.0313, + "step": 14705 + }, + { + "epoch": 4.47, + "learning_rate": 8.537954058700997e-06, + "loss": 0.0441, + "step": 14706 + }, + { + "epoch": 4.47, + "learning_rate": 8.52834210926025e-06, + "loss": 0.0177, + "step": 14707 + }, + { + "epoch": 4.47, + "learning_rate": 8.518735415076716e-06, + "loss": 0.029, + "step": 14708 + }, + { + "epoch": 4.47, + "learning_rate": 8.509133976507293e-06, + "loss": 0.0284, + "step": 14709 + }, + { + "epoch": 4.47, + "learning_rate": 8.499537793908611e-06, + "loss": 0.0227, + "step": 14710 + }, + { + "epoch": 4.47, + "learning_rate": 8.489946867637138e-06, + "loss": 0.0174, + "step": 14711 + }, + { + "epoch": 4.47, + "learning_rate": 8.480361198049184e-06, + "loss": 0.0237, + "step": 14712 + }, + { + "epoch": 4.47, + "learning_rate": 8.470780785500787e-06, + "loss": 0.0227, + "step": 14713 + }, + { + "epoch": 4.47, + "learning_rate": 8.461205630347872e-06, + "loss": 0.0345, + "step": 14714 + }, + { + "epoch": 4.47, + "learning_rate": 8.45163573294611e-06, + "loss": 0.0251, + "step": 14715 + }, + { + "epoch": 4.47, + "learning_rate": 8.442071093650999e-06, + "loss": 0.0417, + "step": 14716 + }, + { + "epoch": 4.47, + "learning_rate": 8.432511712817818e-06, + "loss": 0.0194, + "step": 14717 + }, + { + "epoch": 4.47, + "learning_rate": 8.422957590801687e-06, + "loss": 0.0191, + "step": 14718 + }, + { + "epoch": 4.47, + "learning_rate": 8.413408727957504e-06, + "loss": 0.0387, + "step": 14719 + }, + { + "epoch": 4.47, + "learning_rate": 8.403865124640002e-06, + "loss": 0.0162, + "step": 14720 + }, + { + "epoch": 4.47, + "learning_rate": 8.39432678120368e-06, + "loss": 0.0359, + "step": 14721 + }, + { + "epoch": 4.47, + "learning_rate": 8.384793698002839e-06, + "loss": 0.0274, + "step": 14722 + }, + { + "epoch": 4.47, + "learning_rate": 8.375265875391663e-06, + "loss": 0.0118, + "step": 14723 + }, + { + "epoch": 4.47, + "learning_rate": 8.365743313724017e-06, + "loss": 0.0525, + "step": 14724 + }, + { + "epoch": 4.47, + "learning_rate": 8.356226013353684e-06, + "loss": 0.0593, + "step": 14725 + }, + { + "epoch": 4.47, + "learning_rate": 8.3467139746342e-06, + "loss": 0.0291, + "step": 14726 + }, + { + "epoch": 4.47, + "learning_rate": 8.337207197918867e-06, + "loss": 0.0348, + "step": 14727 + }, + { + "epoch": 4.47, + "learning_rate": 8.327705683560898e-06, + "loss": 0.0205, + "step": 14728 + }, + { + "epoch": 4.47, + "learning_rate": 8.318209431913181e-06, + "loss": 0.0163, + "step": 14729 + }, + { + "epoch": 4.47, + "learning_rate": 8.308718443328549e-06, + "loss": 0.0258, + "step": 14730 + }, + { + "epoch": 4.47, + "learning_rate": 8.29923271815947e-06, + "loss": 0.0099, + "step": 14731 + }, + { + "epoch": 4.47, + "learning_rate": 8.289752256758397e-06, + "loss": 0.02, + "step": 14732 + }, + { + "epoch": 4.47, + "learning_rate": 8.28027705947743e-06, + "loss": 0.0239, + "step": 14733 + }, + { + "epoch": 4.47, + "learning_rate": 8.270807126668622e-06, + "loss": 0.0238, + "step": 14734 + }, + { + "epoch": 4.47, + "learning_rate": 8.261342458683689e-06, + "loss": 0.0223, + "step": 14735 + }, + { + "epoch": 4.47, + "learning_rate": 8.251883055874237e-06, + "loss": 0.0178, + "step": 14736 + }, + { + "epoch": 4.47, + "learning_rate": 8.242428918591665e-06, + "loss": 0.0359, + "step": 14737 + }, + { + "epoch": 4.47, + "learning_rate": 8.232980047187143e-06, + "loss": 0.0098, + "step": 14738 + }, + { + "epoch": 4.48, + "learning_rate": 8.223536442011708e-06, + "loss": 0.0328, + "step": 14739 + }, + { + "epoch": 4.48, + "learning_rate": 8.214098103416127e-06, + "loss": 0.0277, + "step": 14740 + }, + { + "epoch": 4.48, + "learning_rate": 8.204665031751007e-06, + "loss": 0.0261, + "step": 14741 + }, + { + "epoch": 4.48, + "learning_rate": 8.19523722736678e-06, + "loss": 0.018, + "step": 14742 + }, + { + "epoch": 4.48, + "learning_rate": 8.185814690613635e-06, + "loss": 0.0185, + "step": 14743 + }, + { + "epoch": 4.48, + "learning_rate": 8.17639742184164e-06, + "loss": 0.0103, + "step": 14744 + }, + { + "epoch": 4.48, + "learning_rate": 8.166985421400552e-06, + "loss": 0.0135, + "step": 14745 + }, + { + "epoch": 4.48, + "learning_rate": 8.157578689640088e-06, + "loss": 0.0265, + "step": 14746 + }, + { + "epoch": 4.48, + "learning_rate": 8.148177226909569e-06, + "loss": 0.015, + "step": 14747 + }, + { + "epoch": 4.48, + "learning_rate": 8.138781033558318e-06, + "loss": 0.0172, + "step": 14748 + }, + { + "epoch": 4.48, + "learning_rate": 8.129390109935318e-06, + "loss": 0.0309, + "step": 14749 + }, + { + "epoch": 4.48, + "learning_rate": 8.120004456389478e-06, + "loss": 0.0158, + "step": 14750 + }, + { + "epoch": 4.48, + "learning_rate": 8.1106240732694e-06, + "loss": 0.0079, + "step": 14751 + }, + { + "epoch": 4.48, + "learning_rate": 8.10124896092354e-06, + "loss": 0.0098, + "step": 14752 + }, + { + "epoch": 4.48, + "learning_rate": 8.091879119700184e-06, + "loss": 0.0366, + "step": 14753 + }, + { + "epoch": 4.48, + "learning_rate": 8.082514549947355e-06, + "loss": 0.0488, + "step": 14754 + }, + { + "epoch": 4.48, + "learning_rate": 8.073155252012959e-06, + "loss": 0.023, + "step": 14755 + }, + { + "epoch": 4.48, + "learning_rate": 8.063801226244648e-06, + "loss": 0.0163, + "step": 14756 + }, + { + "epoch": 4.48, + "learning_rate": 8.054452472989897e-06, + "loss": 0.0331, + "step": 14757 + }, + { + "epoch": 4.48, + "learning_rate": 8.045108992595978e-06, + "loss": 0.0177, + "step": 14758 + }, + { + "epoch": 4.48, + "learning_rate": 8.035770785409979e-06, + "loss": 0.0373, + "step": 14759 + }, + { + "epoch": 4.48, + "learning_rate": 8.02643785177879e-06, + "loss": 0.0406, + "step": 14760 + }, + { + "epoch": 4.48, + "learning_rate": 8.017110192049097e-06, + "loss": 0.0297, + "step": 14761 + }, + { + "epoch": 4.48, + "learning_rate": 8.007787806567428e-06, + "loss": 0.0297, + "step": 14762 + }, + { + "epoch": 4.48, + "learning_rate": 7.998470695680004e-06, + "loss": 0.0213, + "step": 14763 + }, + { + "epoch": 4.48, + "learning_rate": 7.989158859733013e-06, + "loss": 0.0238, + "step": 14764 + }, + { + "epoch": 4.48, + "learning_rate": 7.979852299072314e-06, + "loss": 0.0491, + "step": 14765 + }, + { + "epoch": 4.48, + "learning_rate": 7.97055101404363e-06, + "loss": 0.0525, + "step": 14766 + }, + { + "epoch": 4.48, + "learning_rate": 7.9612550049925e-06, + "loss": 0.015, + "step": 14767 + }, + { + "epoch": 4.48, + "learning_rate": 7.951964272264183e-06, + "loss": 0.0402, + "step": 14768 + }, + { + "epoch": 4.48, + "learning_rate": 7.942678816203885e-06, + "loss": 0.0168, + "step": 14769 + }, + { + "epoch": 4.48, + "learning_rate": 7.933398637156445e-06, + "loss": 0.024, + "step": 14770 + }, + { + "epoch": 4.48, + "learning_rate": 7.924123735466693e-06, + "loss": 0.0326, + "step": 14771 + }, + { + "epoch": 4.49, + "learning_rate": 7.914854111479064e-06, + "loss": 0.0177, + "step": 14772 + }, + { + "epoch": 4.49, + "learning_rate": 7.905589765537967e-06, + "loss": 0.0266, + "step": 14773 + }, + { + "epoch": 4.49, + "learning_rate": 7.896330697987496e-06, + "loss": 0.0041, + "step": 14774 + }, + { + "epoch": 4.49, + "learning_rate": 7.88707690917164e-06, + "loss": 0.0276, + "step": 14775 + }, + { + "epoch": 4.49, + "learning_rate": 7.877828399434144e-06, + "loss": 0.0155, + "step": 14776 + }, + { + "epoch": 4.49, + "learning_rate": 7.868585169118529e-06, + "loss": 0.0164, + "step": 14777 + }, + { + "epoch": 4.49, + "learning_rate": 7.859347218568207e-06, + "loss": 0.0414, + "step": 14778 + }, + { + "epoch": 4.49, + "learning_rate": 7.850114548126285e-06, + "loss": 0.0371, + "step": 14779 + }, + { + "epoch": 4.49, + "learning_rate": 7.840887158135772e-06, + "loss": 0.0319, + "step": 14780 + }, + { + "epoch": 4.49, + "learning_rate": 7.831665048939428e-06, + "loss": 0.0509, + "step": 14781 + }, + { + "epoch": 4.49, + "learning_rate": 7.822448220879795e-06, + "loss": 0.0201, + "step": 14782 + }, + { + "epoch": 4.49, + "learning_rate": 7.813236674299318e-06, + "loss": 0.0373, + "step": 14783 + }, + { + "epoch": 4.49, + "learning_rate": 7.804030409540102e-06, + "loss": 0.043, + "step": 14784 + }, + { + "epoch": 4.49, + "learning_rate": 7.794829426944194e-06, + "loss": 0.0341, + "step": 14785 + }, + { + "epoch": 4.49, + "learning_rate": 7.785633726853352e-06, + "loss": 0.0194, + "step": 14786 + }, + { + "epoch": 4.49, + "learning_rate": 7.776443309609187e-06, + "loss": 0.0062, + "step": 14787 + }, + { + "epoch": 4.49, + "learning_rate": 7.767258175553077e-06, + "loss": 0.0415, + "step": 14788 + }, + { + "epoch": 4.49, + "learning_rate": 7.75807832502623e-06, + "loss": 0.0204, + "step": 14789 + }, + { + "epoch": 4.49, + "learning_rate": 7.748903758369641e-06, + "loss": 0.0307, + "step": 14790 + }, + { + "epoch": 4.49, + "learning_rate": 7.739734475924153e-06, + "loss": 0.0141, + "step": 14791 + }, + { + "epoch": 4.49, + "learning_rate": 7.730570478030345e-06, + "loss": 0.0151, + "step": 14792 + }, + { + "epoch": 4.49, + "learning_rate": 7.721411765028629e-06, + "loss": 0.0219, + "step": 14793 + }, + { + "epoch": 4.49, + "learning_rate": 7.712258337259247e-06, + "loss": 0.0158, + "step": 14794 + }, + { + "epoch": 4.49, + "learning_rate": 7.703110195062213e-06, + "loss": 0.0132, + "step": 14795 + }, + { + "epoch": 4.49, + "learning_rate": 7.693967338777384e-06, + "loss": 0.0074, + "step": 14796 + }, + { + "epoch": 4.49, + "learning_rate": 7.68482976874431e-06, + "loss": 0.0265, + "step": 14797 + }, + { + "epoch": 4.49, + "learning_rate": 7.675697485302501e-06, + "loss": 0.0337, + "step": 14798 + }, + { + "epoch": 4.49, + "learning_rate": 7.666570488791152e-06, + "loss": 0.0607, + "step": 14799 + }, + { + "epoch": 4.49, + "learning_rate": 7.657448779549324e-06, + "loss": 0.0064, + "step": 14800 + }, + { + "epoch": 4.49, + "learning_rate": 7.648332357915881e-06, + "loss": 0.0116, + "step": 14801 + }, + { + "epoch": 4.49, + "learning_rate": 7.63922122422942e-06, + "loss": 0.0367, + "step": 14802 + }, + { + "epoch": 4.49, + "learning_rate": 7.630115378828433e-06, + "loss": 0.0292, + "step": 14803 + }, + { + "epoch": 4.49, + "learning_rate": 7.6210148220511525e-06, + "loss": 0.0253, + "step": 14804 + }, + { + "epoch": 4.5, + "learning_rate": 7.611919554235657e-06, + "loss": 0.0336, + "step": 14805 + }, + { + "epoch": 4.5, + "learning_rate": 7.602829575719793e-06, + "loss": 0.0166, + "step": 14806 + }, + { + "epoch": 4.5, + "learning_rate": 7.593744886841241e-06, + "loss": 0.0302, + "step": 14807 + }, + { + "epoch": 4.5, + "learning_rate": 7.58466548793748e-06, + "loss": 0.0529, + "step": 14808 + }, + { + "epoch": 4.5, + "learning_rate": 7.575591379345741e-06, + "loss": 0.0261, + "step": 14809 + }, + { + "epoch": 4.5, + "learning_rate": 7.566522561403138e-06, + "loss": 0.0085, + "step": 14810 + }, + { + "epoch": 4.5, + "learning_rate": 7.557459034446517e-06, + "loss": 0.0255, + "step": 14811 + }, + { + "epoch": 4.5, + "learning_rate": 7.548400798812643e-06, + "loss": 0.0475, + "step": 14812 + }, + { + "epoch": 4.5, + "learning_rate": 7.539347854837879e-06, + "loss": 0.0185, + "step": 14813 + }, + { + "epoch": 4.5, + "learning_rate": 7.530300202858625e-06, + "loss": 0.0077, + "step": 14814 + }, + { + "epoch": 4.5, + "learning_rate": 7.5212578432108925e-06, + "loss": 0.0233, + "step": 14815 + }, + { + "epoch": 4.5, + "learning_rate": 7.512220776230632e-06, + "loss": 0.0284, + "step": 14816 + }, + { + "epoch": 4.5, + "learning_rate": 7.50318900225354e-06, + "loss": 0.0239, + "step": 14817 + }, + { + "epoch": 4.5, + "learning_rate": 7.494162521615082e-06, + "loss": 0.038, + "step": 14818 + }, + { + "epoch": 4.5, + "learning_rate": 7.485141334650608e-06, + "loss": 0.0134, + "step": 14819 + }, + { + "epoch": 4.5, + "learning_rate": 7.476125441695197e-06, + "loss": 0.0106, + "step": 14820 + }, + { + "epoch": 4.5, + "learning_rate": 7.467114843083799e-06, + "loss": 0.0172, + "step": 14821 + }, + { + "epoch": 4.5, + "learning_rate": 7.458109539151114e-06, + "loss": 0.038, + "step": 14822 + }, + { + "epoch": 4.5, + "learning_rate": 7.449109530231656e-06, + "loss": 0.0324, + "step": 14823 + }, + { + "epoch": 4.5, + "learning_rate": 7.440114816659726e-06, + "loss": 0.048, + "step": 14824 + }, + { + "epoch": 4.5, + "learning_rate": 7.431125398769489e-06, + "loss": 0.0469, + "step": 14825 + }, + { + "epoch": 4.5, + "learning_rate": 7.4221412768948775e-06, + "loss": 0.0187, + "step": 14826 + }, + { + "epoch": 4.5, + "learning_rate": 7.413162451369609e-06, + "loss": 0.0323, + "step": 14827 + }, + { + "epoch": 4.5, + "learning_rate": 7.404188922527216e-06, + "loss": 0.0238, + "step": 14828 + }, + { + "epoch": 4.5, + "learning_rate": 7.395220690701048e-06, + "loss": 0.0234, + "step": 14829 + }, + { + "epoch": 4.5, + "learning_rate": 7.38625775622424e-06, + "loss": 0.0125, + "step": 14830 + }, + { + "epoch": 4.5, + "learning_rate": 7.377300119429741e-06, + "loss": 0.0289, + "step": 14831 + }, + { + "epoch": 4.5, + "learning_rate": 7.368347780650319e-06, + "loss": 0.0693, + "step": 14832 + }, + { + "epoch": 4.5, + "learning_rate": 7.359400740218524e-06, + "loss": 0.0669, + "step": 14833 + }, + { + "epoch": 4.5, + "learning_rate": 7.350458998466674e-06, + "loss": 0.0136, + "step": 14834 + }, + { + "epoch": 4.5, + "learning_rate": 7.34152255572697e-06, + "loss": 0.0337, + "step": 14835 + }, + { + "epoch": 4.5, + "learning_rate": 7.332591412331345e-06, + "loss": 0.0125, + "step": 14836 + }, + { + "epoch": 4.5, + "learning_rate": 7.32366556861162e-06, + "loss": 0.0253, + "step": 14837 + }, + { + "epoch": 4.51, + "learning_rate": 7.3147450248992765e-06, + "loss": 0.0356, + "step": 14838 + }, + { + "epoch": 4.51, + "learning_rate": 7.3058297815257685e-06, + "loss": 0.0225, + "step": 14839 + }, + { + "epoch": 4.51, + "learning_rate": 7.296919838822213e-06, + "loss": 0.0236, + "step": 14840 + }, + { + "epoch": 4.51, + "learning_rate": 7.2880151971196285e-06, + "loss": 0.014, + "step": 14841 + }, + { + "epoch": 4.51, + "learning_rate": 7.279115856748785e-06, + "loss": 0.0366, + "step": 14842 + }, + { + "epoch": 4.51, + "learning_rate": 7.270221818040234e-06, + "loss": 0.0218, + "step": 14843 + }, + { + "epoch": 4.51, + "learning_rate": 7.2613330813244275e-06, + "loss": 0.0027, + "step": 14844 + }, + { + "epoch": 4.51, + "learning_rate": 7.252449646931486e-06, + "loss": 0.0187, + "step": 14845 + }, + { + "epoch": 4.51, + "learning_rate": 7.243571515191443e-06, + "loss": 0.0053, + "step": 14846 + }, + { + "epoch": 4.51, + "learning_rate": 7.234698686434104e-06, + "loss": 0.0123, + "step": 14847 + }, + { + "epoch": 4.51, + "learning_rate": 7.225831160989037e-06, + "loss": 0.0207, + "step": 14848 + }, + { + "epoch": 4.51, + "learning_rate": 7.2169689391856796e-06, + "loss": 0.0117, + "step": 14849 + }, + { + "epoch": 4.51, + "learning_rate": 7.208112021353185e-06, + "loss": 0.0106, + "step": 14850 + }, + { + "epoch": 4.51, + "learning_rate": 7.199260407820639e-06, + "loss": 0.0144, + "step": 14851 + }, + { + "epoch": 4.51, + "learning_rate": 7.190414098916797e-06, + "loss": 0.0214, + "step": 14852 + }, + { + "epoch": 4.51, + "learning_rate": 7.181573094970278e-06, + "loss": 0.0072, + "step": 14853 + }, + { + "epoch": 4.51, + "learning_rate": 7.172737396309486e-06, + "loss": 0.0346, + "step": 14854 + }, + { + "epoch": 4.51, + "learning_rate": 7.163907003262692e-06, + "loss": 0.0267, + "step": 14855 + }, + { + "epoch": 4.51, + "learning_rate": 7.155081916157868e-06, + "loss": 0.0104, + "step": 14856 + }, + { + "epoch": 4.51, + "learning_rate": 7.146262135322883e-06, + "loss": 0.0295, + "step": 14857 + }, + { + "epoch": 4.51, + "learning_rate": 7.137447661085344e-06, + "loss": 0.0162, + "step": 14858 + }, + { + "epoch": 4.51, + "learning_rate": 7.128638493772654e-06, + "loss": 0.0135, + "step": 14859 + }, + { + "epoch": 4.51, + "learning_rate": 7.119834633712101e-06, + "loss": 0.0385, + "step": 14860 + }, + { + "epoch": 4.51, + "learning_rate": 7.111036081230675e-06, + "loss": 0.0069, + "step": 14861 + }, + { + "epoch": 4.51, + "learning_rate": 7.102242836655297e-06, + "loss": 0.035, + "step": 14862 + }, + { + "epoch": 4.51, + "learning_rate": 7.093454900312506e-06, + "loss": 0.0293, + "step": 14863 + }, + { + "epoch": 4.51, + "learning_rate": 7.084672272528807e-06, + "loss": 0.0132, + "step": 14864 + }, + { + "epoch": 4.51, + "learning_rate": 7.075894953630423e-06, + "loss": 0.0237, + "step": 14865 + }, + { + "epoch": 4.51, + "learning_rate": 7.0671229439434265e-06, + "loss": 0.0117, + "step": 14866 + }, + { + "epoch": 4.51, + "learning_rate": 7.058356243793672e-06, + "loss": 0.0337, + "step": 14867 + }, + { + "epoch": 4.51, + "learning_rate": 7.049594853506818e-06, + "loss": 0.0134, + "step": 14868 + }, + { + "epoch": 4.51, + "learning_rate": 7.040838773408319e-06, + "loss": 0.026, + "step": 14869 + }, + { + "epoch": 4.51, + "learning_rate": 7.032088003823417e-06, + "loss": 0.0157, + "step": 14870 + }, + { + "epoch": 4.52, + "learning_rate": 7.023342545077198e-06, + "loss": 0.0338, + "step": 14871 + }, + { + "epoch": 4.52, + "learning_rate": 7.014602397494523e-06, + "loss": 0.0373, + "step": 14872 + }, + { + "epoch": 4.52, + "learning_rate": 7.0058675614000804e-06, + "loss": 0.0261, + "step": 14873 + }, + { + "epoch": 4.52, + "learning_rate": 6.997138037118344e-06, + "loss": 0.024, + "step": 14874 + }, + { + "epoch": 4.52, + "learning_rate": 6.988413824973554e-06, + "loss": 0.0244, + "step": 14875 + }, + { + "epoch": 4.52, + "learning_rate": 6.9796949252898195e-06, + "loss": 0.0218, + "step": 14876 + }, + { + "epoch": 4.52, + "learning_rate": 6.970981338391013e-06, + "loss": 0.0268, + "step": 14877 + }, + { + "epoch": 4.52, + "learning_rate": 6.962273064600843e-06, + "loss": 0.0354, + "step": 14878 + }, + { + "epoch": 4.52, + "learning_rate": 6.953570104242734e-06, + "loss": 0.0178, + "step": 14879 + }, + { + "epoch": 4.52, + "learning_rate": 6.944872457640044e-06, + "loss": 0.005, + "step": 14880 + }, + { + "epoch": 4.52, + "learning_rate": 6.936180125115831e-06, + "loss": 0.0361, + "step": 14881 + }, + { + "epoch": 4.52, + "learning_rate": 6.927493106992987e-06, + "loss": 0.0237, + "step": 14882 + }, + { + "epoch": 4.52, + "learning_rate": 6.918811403594237e-06, + "loss": 0.0382, + "step": 14883 + }, + { + "epoch": 4.52, + "learning_rate": 6.910135015242024e-06, + "loss": 0.0527, + "step": 14884 + }, + { + "epoch": 4.52, + "learning_rate": 6.901463942258723e-06, + "loss": 0.0351, + "step": 14885 + }, + { + "epoch": 4.52, + "learning_rate": 6.892798184966375e-06, + "loss": 0.0232, + "step": 14886 + }, + { + "epoch": 4.52, + "learning_rate": 6.8841377436869404e-06, + "loss": 0.0547, + "step": 14887 + }, + { + "epoch": 4.52, + "learning_rate": 6.875482618742112e-06, + "loss": 0.0306, + "step": 14888 + }, + { + "epoch": 4.52, + "learning_rate": 6.866832810453365e-06, + "loss": 0.0014, + "step": 14889 + }, + { + "epoch": 4.52, + "learning_rate": 6.8581883191420605e-06, + "loss": 0.0438, + "step": 14890 + }, + { + "epoch": 4.52, + "learning_rate": 6.84954914512929e-06, + "loss": 0.0292, + "step": 14891 + }, + { + "epoch": 4.52, + "learning_rate": 6.840915288735998e-06, + "loss": 0.0213, + "step": 14892 + }, + { + "epoch": 4.52, + "learning_rate": 6.832286750282895e-06, + "loss": 0.0325, + "step": 14893 + }, + { + "epoch": 4.52, + "learning_rate": 6.823663530090506e-06, + "loss": 0.0246, + "step": 14894 + }, + { + "epoch": 4.52, + "learning_rate": 6.815045628479127e-06, + "loss": 0.0165, + "step": 14895 + }, + { + "epoch": 4.52, + "learning_rate": 6.80643304576895e-06, + "loss": 0.0387, + "step": 14896 + }, + { + "epoch": 4.52, + "learning_rate": 6.797825782279853e-06, + "loss": 0.0251, + "step": 14897 + }, + { + "epoch": 4.52, + "learning_rate": 6.789223838331598e-06, + "loss": 0.0171, + "step": 14898 + }, + { + "epoch": 4.52, + "learning_rate": 6.7806272142437276e-06, + "loss": 0.0252, + "step": 14899 + }, + { + "epoch": 4.52, + "learning_rate": 6.772035910335538e-06, + "loss": 0.0376, + "step": 14900 + }, + { + "epoch": 4.52, + "learning_rate": 6.763449926926223e-06, + "loss": 0.0329, + "step": 14901 + }, + { + "epoch": 4.52, + "learning_rate": 6.754869264334695e-06, + "loss": 0.0365, + "step": 14902 + }, + { + "epoch": 4.52, + "learning_rate": 6.746293922879748e-06, + "loss": 0.0118, + "step": 14903 + }, + { + "epoch": 4.53, + "learning_rate": 6.737723902879843e-06, + "loss": 0.0361, + "step": 14904 + }, + { + "epoch": 4.53, + "learning_rate": 6.729159204653412e-06, + "loss": 0.0113, + "step": 14905 + }, + { + "epoch": 4.53, + "learning_rate": 6.720599828518547e-06, + "loss": 0.038, + "step": 14906 + }, + { + "epoch": 4.53, + "learning_rate": 6.712045774793245e-06, + "loss": 0.0255, + "step": 14907 + }, + { + "epoch": 4.53, + "learning_rate": 6.703497043795303e-06, + "loss": 0.0206, + "step": 14908 + }, + { + "epoch": 4.53, + "learning_rate": 6.694953635842182e-06, + "loss": 0.0246, + "step": 14909 + }, + { + "epoch": 4.53, + "learning_rate": 6.6864155512513115e-06, + "loss": 0.034, + "step": 14910 + }, + { + "epoch": 4.53, + "learning_rate": 6.6778827903398225e-06, + "loss": 0.014, + "step": 14911 + }, + { + "epoch": 4.53, + "learning_rate": 6.669355353424727e-06, + "loss": 0.0194, + "step": 14912 + }, + { + "epoch": 4.53, + "learning_rate": 6.6608332408227546e-06, + "loss": 0.0274, + "step": 14913 + }, + { + "epoch": 4.53, + "learning_rate": 6.65231645285047e-06, + "loss": 0.0492, + "step": 14914 + }, + { + "epoch": 4.53, + "learning_rate": 6.643804989824286e-06, + "loss": 0.006, + "step": 14915 + }, + { + "epoch": 4.53, + "learning_rate": 6.63529885206035e-06, + "loss": 0.0231, + "step": 14916 + }, + { + "epoch": 4.53, + "learning_rate": 6.626798039874659e-06, + "loss": 0.0103, + "step": 14917 + }, + { + "epoch": 4.53, + "learning_rate": 6.618302553582977e-06, + "loss": 0.0448, + "step": 14918 + }, + { + "epoch": 4.53, + "learning_rate": 6.609812393500902e-06, + "loss": 0.0199, + "step": 14919 + }, + { + "epoch": 4.53, + "learning_rate": 6.6013275599437806e-06, + "loss": 0.0469, + "step": 14920 + }, + { + "epoch": 4.53, + "learning_rate": 6.592848053226846e-06, + "loss": 0.034, + "step": 14921 + }, + { + "epoch": 4.53, + "learning_rate": 6.584373873665061e-06, + "loss": 0.0498, + "step": 14922 + }, + { + "epoch": 4.53, + "learning_rate": 6.575905021573225e-06, + "loss": 0.0189, + "step": 14923 + }, + { + "epoch": 4.53, + "learning_rate": 6.5674414972659515e-06, + "loss": 0.0305, + "step": 14924 + }, + { + "epoch": 4.53, + "learning_rate": 6.558983301057574e-06, + "loss": 0.0141, + "step": 14925 + }, + { + "epoch": 4.53, + "learning_rate": 6.550530433262357e-06, + "loss": 0.021, + "step": 14926 + }, + { + "epoch": 4.53, + "learning_rate": 6.5420828941942674e-06, + "loss": 0.0111, + "step": 14927 + }, + { + "epoch": 4.53, + "learning_rate": 6.533640684167102e-06, + "loss": 0.035, + "step": 14928 + }, + { + "epoch": 4.53, + "learning_rate": 6.525203803494495e-06, + "loss": 0.0262, + "step": 14929 + }, + { + "epoch": 4.53, + "learning_rate": 6.51677225248981e-06, + "loss": 0.0224, + "step": 14930 + }, + { + "epoch": 4.53, + "learning_rate": 6.508346031466283e-06, + "loss": 0.0479, + "step": 14931 + }, + { + "epoch": 4.53, + "learning_rate": 6.499925140736911e-06, + "loss": 0.0113, + "step": 14932 + }, + { + "epoch": 4.53, + "learning_rate": 6.491509580614512e-06, + "loss": 0.0061, + "step": 14933 + }, + { + "epoch": 4.53, + "learning_rate": 6.483099351411703e-06, + "loss": 0.0314, + "step": 14934 + }, + { + "epoch": 4.53, + "learning_rate": 6.4746944534408986e-06, + "loss": 0.0052, + "step": 14935 + }, + { + "epoch": 4.53, + "learning_rate": 6.4662948870142835e-06, + "loss": 0.0437, + "step": 14936 + }, + { + "epoch": 4.54, + "learning_rate": 6.457900652443926e-06, + "loss": 0.0087, + "step": 14937 + }, + { + "epoch": 4.54, + "learning_rate": 6.449511750041608e-06, + "loss": 0.0121, + "step": 14938 + }, + { + "epoch": 4.54, + "learning_rate": 6.441128180118998e-06, + "loss": 0.0078, + "step": 14939 + }, + { + "epoch": 4.54, + "learning_rate": 6.432749942987481e-06, + "loss": 0.0152, + "step": 14940 + }, + { + "epoch": 4.54, + "learning_rate": 6.424377038958273e-06, + "loss": 0.0326, + "step": 14941 + }, + { + "epoch": 4.54, + "learning_rate": 6.416009468342459e-06, + "loss": 0.0197, + "step": 14942 + }, + { + "epoch": 4.54, + "learning_rate": 6.407647231450807e-06, + "loss": 0.0299, + "step": 14943 + }, + { + "epoch": 4.54, + "learning_rate": 6.39929032859402e-06, + "loss": 0.0252, + "step": 14944 + }, + { + "epoch": 4.54, + "learning_rate": 6.390938760082465e-06, + "loss": 0.0637, + "step": 14945 + }, + { + "epoch": 4.54, + "learning_rate": 6.38259252622641e-06, + "loss": 0.0302, + "step": 14946 + }, + { + "epoch": 4.54, + "learning_rate": 6.374251627335875e-06, + "loss": 0.0202, + "step": 14947 + }, + { + "epoch": 4.54, + "learning_rate": 6.3659160637207105e-06, + "loss": 0.0196, + "step": 14948 + }, + { + "epoch": 4.54, + "learning_rate": 6.357585835690604e-06, + "loss": 0.0468, + "step": 14949 + }, + { + "epoch": 4.54, + "learning_rate": 6.349260943554907e-06, + "loss": 0.0206, + "step": 14950 + }, + { + "epoch": 4.54, + "learning_rate": 6.3409413876229386e-06, + "loss": 0.041, + "step": 14951 + }, + { + "epoch": 4.54, + "learning_rate": 6.3326271682037026e-06, + "loss": 0.0211, + "step": 14952 + }, + { + "epoch": 4.54, + "learning_rate": 6.324318285606083e-06, + "loss": 0.0266, + "step": 14953 + }, + { + "epoch": 4.54, + "learning_rate": 6.316014740138719e-06, + "loss": 0.0153, + "step": 14954 + }, + { + "epoch": 4.54, + "learning_rate": 6.3077165321100465e-06, + "loss": 0.0127, + "step": 14955 + }, + { + "epoch": 4.54, + "learning_rate": 6.299423661828334e-06, + "loss": 0.0495, + "step": 14956 + }, + { + "epoch": 4.54, + "learning_rate": 6.29113612960162e-06, + "loss": 0.0489, + "step": 14957 + }, + { + "epoch": 4.54, + "learning_rate": 6.282853935737792e-06, + "loss": 0.0271, + "step": 14958 + }, + { + "epoch": 4.54, + "learning_rate": 6.2745770805445015e-06, + "loss": 0.0297, + "step": 14959 + }, + { + "epoch": 4.54, + "learning_rate": 6.266305564329188e-06, + "loss": 0.0072, + "step": 14960 + }, + { + "epoch": 4.54, + "learning_rate": 6.258039387399122e-06, + "loss": 0.026, + "step": 14961 + }, + { + "epoch": 4.54, + "learning_rate": 6.249778550061374e-06, + "loss": 0.0296, + "step": 14962 + }, + { + "epoch": 4.54, + "learning_rate": 6.241523052622815e-06, + "loss": 0.0267, + "step": 14963 + }, + { + "epoch": 4.54, + "learning_rate": 6.233272895390102e-06, + "loss": 0.0361, + "step": 14964 + }, + { + "epoch": 4.54, + "learning_rate": 6.225028078669719e-06, + "loss": 0.0108, + "step": 14965 + }, + { + "epoch": 4.54, + "learning_rate": 6.2167886027679085e-06, + "loss": 0.0412, + "step": 14966 + }, + { + "epoch": 4.54, + "learning_rate": 6.208554467990773e-06, + "loss": 0.0159, + "step": 14967 + }, + { + "epoch": 4.54, + "learning_rate": 6.200325674644169e-06, + "loss": 0.0145, + "step": 14968 + }, + { + "epoch": 4.55, + "learning_rate": 6.192102223033801e-06, + "loss": 0.0349, + "step": 14969 + }, + { + "epoch": 4.55, + "learning_rate": 6.183884113465093e-06, + "loss": 0.0097, + "step": 14970 + }, + { + "epoch": 4.55, + "learning_rate": 6.175671346243349e-06, + "loss": 0.0329, + "step": 14971 + }, + { + "epoch": 4.55, + "learning_rate": 6.167463921673676e-06, + "loss": 0.0234, + "step": 14972 + }, + { + "epoch": 4.55, + "learning_rate": 6.159261840060914e-06, + "loss": 0.028, + "step": 14973 + }, + { + "epoch": 4.55, + "learning_rate": 6.1510651017098015e-06, + "loss": 0.0336, + "step": 14974 + }, + { + "epoch": 4.55, + "learning_rate": 6.142873706924745e-06, + "loss": 0.0297, + "step": 14975 + }, + { + "epoch": 4.55, + "learning_rate": 6.134687656010085e-06, + "loss": 0.0176, + "step": 14976 + }, + { + "epoch": 4.55, + "learning_rate": 6.126506949269894e-06, + "loss": 0.0263, + "step": 14977 + }, + { + "epoch": 4.55, + "learning_rate": 6.11833158700808e-06, + "loss": 0.04, + "step": 14978 + }, + { + "epoch": 4.55, + "learning_rate": 6.110161569528299e-06, + "loss": 0.0444, + "step": 14979 + }, + { + "epoch": 4.55, + "learning_rate": 6.101996897134059e-06, + "loss": 0.0395, + "step": 14980 + }, + { + "epoch": 4.55, + "learning_rate": 6.093837570128684e-06, + "loss": 0.0282, + "step": 14981 + }, + { + "epoch": 4.55, + "learning_rate": 6.085683588815215e-06, + "loss": 0.0232, + "step": 14982 + }, + { + "epoch": 4.55, + "learning_rate": 6.077534953496593e-06, + "loss": 0.0217, + "step": 14983 + }, + { + "epoch": 4.55, + "learning_rate": 6.069391664475492e-06, + "loss": 0.0089, + "step": 14984 + }, + { + "epoch": 4.55, + "learning_rate": 6.0612537220544375e-06, + "loss": 0.0257, + "step": 14985 + }, + { + "epoch": 4.55, + "learning_rate": 6.053121126535671e-06, + "loss": 0.0277, + "step": 14986 + }, + { + "epoch": 4.55, + "learning_rate": 6.044993878221366e-06, + "loss": 0.0222, + "step": 14987 + }, + { + "epoch": 4.55, + "learning_rate": 6.036871977413366e-06, + "loss": 0.0131, + "step": 14988 + }, + { + "epoch": 4.55, + "learning_rate": 6.028755424413428e-06, + "loss": 0.0237, + "step": 14989 + }, + { + "epoch": 4.55, + "learning_rate": 6.02064421952303e-06, + "loss": 0.0088, + "step": 14990 + }, + { + "epoch": 4.55, + "learning_rate": 6.012538363043462e-06, + "loss": 0.0319, + "step": 14991 + }, + { + "epoch": 4.55, + "learning_rate": 6.004437855275884e-06, + "loss": 0.0217, + "step": 14992 + }, + { + "epoch": 4.55, + "learning_rate": 5.996342696521139e-06, + "loss": 0.0241, + "step": 14993 + }, + { + "epoch": 4.55, + "learning_rate": 5.98825288708002e-06, + "loss": 0.0319, + "step": 14994 + }, + { + "epoch": 4.55, + "learning_rate": 5.980168427252968e-06, + "loss": 0.029, + "step": 14995 + }, + { + "epoch": 4.55, + "learning_rate": 5.972089317340329e-06, + "loss": 0.0202, + "step": 14996 + }, + { + "epoch": 4.55, + "learning_rate": 5.96401555764221e-06, + "loss": 0.0516, + "step": 14997 + }, + { + "epoch": 4.55, + "learning_rate": 5.95594714845854e-06, + "loss": 0.0169, + "step": 14998 + }, + { + "epoch": 4.55, + "learning_rate": 5.947884090089028e-06, + "loss": 0.0193, + "step": 14999 + }, + { + "epoch": 4.55, + "learning_rate": 5.939826382833185e-06, + "loss": 0.0195, + "step": 15000 + }, + { + "epoch": 4.55, + "learning_rate": 5.9317740269903544e-06, + "loss": 0.0344, + "step": 15001 + }, + { + "epoch": 4.56, + "learning_rate": 5.9237270228596145e-06, + "loss": 0.0269, + "step": 15002 + }, + { + "epoch": 4.56, + "learning_rate": 5.915685370739942e-06, + "loss": 0.0322, + "step": 15003 + }, + { + "epoch": 4.56, + "learning_rate": 5.907649070930015e-06, + "loss": 0.0321, + "step": 15004 + }, + { + "epoch": 4.56, + "learning_rate": 5.899618123728395e-06, + "loss": 0.0216, + "step": 15005 + }, + { + "epoch": 4.56, + "learning_rate": 5.891592529433392e-06, + "loss": 0.0108, + "step": 15006 + }, + { + "epoch": 4.56, + "learning_rate": 5.883572288343103e-06, + "loss": 0.008, + "step": 15007 + }, + { + "epoch": 4.56, + "learning_rate": 5.875557400755504e-06, + "loss": 0.0271, + "step": 15008 + }, + { + "epoch": 4.56, + "learning_rate": 5.8675478669683085e-06, + "loss": 0.0193, + "step": 15009 + }, + { + "epoch": 4.56, + "learning_rate": 5.859543687279061e-06, + "loss": 0.0159, + "step": 15010 + }, + { + "epoch": 4.56, + "learning_rate": 5.8515448619850415e-06, + "loss": 0.0393, + "step": 15011 + }, + { + "epoch": 4.56, + "learning_rate": 5.843551391383428e-06, + "loss": 0.0141, + "step": 15012 + }, + { + "epoch": 4.56, + "learning_rate": 5.8355632757711654e-06, + "loss": 0.0106, + "step": 15013 + }, + { + "epoch": 4.56, + "learning_rate": 5.827580515444952e-06, + "loss": 0.0189, + "step": 15014 + }, + { + "epoch": 4.56, + "learning_rate": 5.819603110701365e-06, + "loss": 0.0086, + "step": 15015 + }, + { + "epoch": 4.56, + "learning_rate": 5.811631061836686e-06, + "loss": 0.0133, + "step": 15016 + }, + { + "epoch": 4.56, + "learning_rate": 5.803664369147093e-06, + "loss": 0.0148, + "step": 15017 + }, + { + "epoch": 4.56, + "learning_rate": 5.795703032928517e-06, + "loss": 0.0355, + "step": 15018 + }, + { + "epoch": 4.56, + "learning_rate": 5.787747053476704e-06, + "loss": 0.0296, + "step": 15019 + }, + { + "epoch": 4.56, + "learning_rate": 5.7797964310871855e-06, + "loss": 0.0382, + "step": 15020 + }, + { + "epoch": 4.56, + "learning_rate": 5.771851166055292e-06, + "loss": 0.0164, + "step": 15021 + }, + { + "epoch": 4.56, + "learning_rate": 5.763911258676202e-06, + "loss": 0.0206, + "step": 15022 + }, + { + "epoch": 4.56, + "learning_rate": 5.75597670924483e-06, + "loss": 0.0185, + "step": 15023 + }, + { + "epoch": 4.56, + "learning_rate": 5.748047518055943e-06, + "loss": 0.0251, + "step": 15024 + }, + { + "epoch": 4.56, + "learning_rate": 5.740123685404069e-06, + "loss": 0.0148, + "step": 15025 + }, + { + "epoch": 4.56, + "learning_rate": 5.732205211583574e-06, + "loss": 0.0135, + "step": 15026 + }, + { + "epoch": 4.56, + "learning_rate": 5.724292096888555e-06, + "loss": 0.0161, + "step": 15027 + }, + { + "epoch": 4.56, + "learning_rate": 5.716384341613045e-06, + "loss": 0.0296, + "step": 15028 + }, + { + "epoch": 4.56, + "learning_rate": 5.7084819460507085e-06, + "loss": 0.0087, + "step": 15029 + }, + { + "epoch": 4.56, + "learning_rate": 5.700584910495159e-06, + "loss": 0.0139, + "step": 15030 + }, + { + "epoch": 4.56, + "learning_rate": 5.692693235239731e-06, + "loss": 0.0132, + "step": 15031 + }, + { + "epoch": 4.56, + "learning_rate": 5.684806920577556e-06, + "loss": 0.0177, + "step": 15032 + }, + { + "epoch": 4.56, + "learning_rate": 5.676925966801615e-06, + "loss": 0.0137, + "step": 15033 + }, + { + "epoch": 4.56, + "learning_rate": 5.669050374204626e-06, + "loss": 0.0273, + "step": 15034 + }, + { + "epoch": 4.57, + "learning_rate": 5.661180143079202e-06, + "loss": 0.0143, + "step": 15035 + }, + { + "epoch": 4.57, + "learning_rate": 5.653315273717663e-06, + "loss": 0.0241, + "step": 15036 + }, + { + "epoch": 4.57, + "learning_rate": 5.645455766412138e-06, + "loss": 0.0094, + "step": 15037 + }, + { + "epoch": 4.57, + "learning_rate": 5.637601621454646e-06, + "loss": 0.0187, + "step": 15038 + }, + { + "epoch": 4.57, + "learning_rate": 5.629752839136886e-06, + "loss": 0.0158, + "step": 15039 + }, + { + "epoch": 4.57, + "learning_rate": 5.621909419750492e-06, + "loss": 0.0186, + "step": 15040 + }, + { + "epoch": 4.57, + "learning_rate": 5.614071363586731e-06, + "loss": 0.0171, + "step": 15041 + }, + { + "epoch": 4.57, + "learning_rate": 5.606238670936836e-06, + "loss": 0.0308, + "step": 15042 + }, + { + "epoch": 4.57, + "learning_rate": 5.598411342091725e-06, + "loss": 0.0127, + "step": 15043 + }, + { + "epoch": 4.57, + "learning_rate": 5.590589377342197e-06, + "loss": 0.0184, + "step": 15044 + }, + { + "epoch": 4.57, + "learning_rate": 5.5827727769787886e-06, + "loss": 0.0141, + "step": 15045 + }, + { + "epoch": 4.57, + "learning_rate": 5.5749615412918665e-06, + "loss": 0.0124, + "step": 15046 + }, + { + "epoch": 4.57, + "learning_rate": 5.567155670571616e-06, + "loss": 0.019, + "step": 15047 + }, + { + "epoch": 4.57, + "learning_rate": 5.559355165107954e-06, + "loss": 0.0228, + "step": 15048 + }, + { + "epoch": 4.57, + "learning_rate": 5.551560025190716e-06, + "loss": 0.0219, + "step": 15049 + }, + { + "epoch": 4.57, + "learning_rate": 5.54377025110942e-06, + "loss": 0.0156, + "step": 15050 + }, + { + "epoch": 4.57, + "learning_rate": 5.535985843153451e-06, + "loss": 0.0094, + "step": 15051 + }, + { + "epoch": 4.57, + "learning_rate": 5.528206801611945e-06, + "loss": 0.0232, + "step": 15052 + }, + { + "epoch": 4.57, + "learning_rate": 5.520433126773904e-06, + "loss": 0.0177, + "step": 15053 + }, + { + "epoch": 4.57, + "learning_rate": 5.512664818928097e-06, + "loss": 0.0261, + "step": 15054 + }, + { + "epoch": 4.57, + "learning_rate": 5.504901878363077e-06, + "loss": 0.0157, + "step": 15055 + }, + { + "epoch": 4.57, + "learning_rate": 5.497144305367263e-06, + "loss": 0.0472, + "step": 15056 + }, + { + "epoch": 4.57, + "learning_rate": 5.48939210022874e-06, + "loss": 0.0245, + "step": 15057 + }, + { + "epoch": 4.57, + "learning_rate": 5.481645263235546e-06, + "loss": 0.017, + "step": 15058 + }, + { + "epoch": 4.57, + "learning_rate": 5.473903794675416e-06, + "loss": 0.027, + "step": 15059 + }, + { + "epoch": 4.57, + "learning_rate": 5.466167694835971e-06, + "loss": 0.0367, + "step": 15060 + }, + { + "epoch": 4.57, + "learning_rate": 5.458436964004548e-06, + "loss": 0.0377, + "step": 15061 + }, + { + "epoch": 4.57, + "learning_rate": 5.4507116024683e-06, + "loss": 0.0086, + "step": 15062 + }, + { + "epoch": 4.57, + "learning_rate": 5.4429916105142634e-06, + "loss": 0.0028, + "step": 15063 + }, + { + "epoch": 4.57, + "learning_rate": 5.4352769884291436e-06, + "loss": 0.0143, + "step": 15064 + }, + { + "epoch": 4.57, + "learning_rate": 5.427567736499578e-06, + "loss": 0.0181, + "step": 15065 + }, + { + "epoch": 4.57, + "learning_rate": 5.419863855011919e-06, + "loss": 0.0254, + "step": 15066 + }, + { + "epoch": 4.57, + "learning_rate": 5.41216534425234e-06, + "loss": 0.0347, + "step": 15067 + }, + { + "epoch": 4.58, + "learning_rate": 5.4044722045067934e-06, + "loss": 0.0427, + "step": 15068 + }, + { + "epoch": 4.58, + "learning_rate": 5.396784436061102e-06, + "loss": 0.0284, + "step": 15069 + }, + { + "epoch": 4.58, + "learning_rate": 5.3891020392008045e-06, + "loss": 0.0532, + "step": 15070 + }, + { + "epoch": 4.58, + "learning_rate": 5.381425014211321e-06, + "loss": 0.0332, + "step": 15071 + }, + { + "epoch": 4.58, + "learning_rate": 5.373753361377792e-06, + "loss": 0.0179, + "step": 15072 + }, + { + "epoch": 4.58, + "learning_rate": 5.366087080985221e-06, + "loss": 0.0297, + "step": 15073 + }, + { + "epoch": 4.58, + "learning_rate": 5.358426173318381e-06, + "loss": 0.0195, + "step": 15074 + }, + { + "epoch": 4.58, + "learning_rate": 5.350770638661828e-06, + "loss": 0.0389, + "step": 15075 + }, + { + "epoch": 4.58, + "learning_rate": 5.343120477299984e-06, + "loss": 0.0229, + "step": 15076 + }, + { + "epoch": 4.58, + "learning_rate": 5.3354756895170215e-06, + "loss": 0.0426, + "step": 15077 + }, + { + "epoch": 4.58, + "learning_rate": 5.327836275596897e-06, + "loss": 0.0192, + "step": 15078 + }, + { + "epoch": 4.58, + "learning_rate": 5.320202235823418e-06, + "loss": 0.0235, + "step": 15079 + }, + { + "epoch": 4.58, + "learning_rate": 5.312573570480138e-06, + "loss": 0.0272, + "step": 15080 + }, + { + "epoch": 4.58, + "learning_rate": 5.3049502798504835e-06, + "loss": 0.0149, + "step": 15081 + }, + { + "epoch": 4.58, + "learning_rate": 5.297332364217594e-06, + "loss": 0.0441, + "step": 15082 + }, + { + "epoch": 4.58, + "learning_rate": 5.289719823864474e-06, + "loss": 0.0219, + "step": 15083 + }, + { + "epoch": 4.58, + "learning_rate": 5.2821126590739e-06, + "loss": 0.061, + "step": 15084 + }, + { + "epoch": 4.58, + "learning_rate": 5.274510870128462e-06, + "loss": 0.0028, + "step": 15085 + }, + { + "epoch": 4.58, + "learning_rate": 5.266914457310551e-06, + "loss": 0.0475, + "step": 15086 + }, + { + "epoch": 4.58, + "learning_rate": 5.259323420902323e-06, + "loss": 0.0246, + "step": 15087 + }, + { + "epoch": 4.58, + "learning_rate": 5.251737761185787e-06, + "loss": 0.0324, + "step": 15088 + }, + { + "epoch": 4.58, + "learning_rate": 5.244157478442718e-06, + "loss": 0.0278, + "step": 15089 + }, + { + "epoch": 4.58, + "learning_rate": 5.236582572954723e-06, + "loss": 0.0178, + "step": 15090 + }, + { + "epoch": 4.58, + "learning_rate": 5.229013045003161e-06, + "loss": 0.0273, + "step": 15091 + }, + { + "epoch": 4.58, + "learning_rate": 5.22144889486924e-06, + "loss": 0.0236, + "step": 15092 + }, + { + "epoch": 4.58, + "learning_rate": 5.213890122833903e-06, + "loss": 0.0163, + "step": 15093 + }, + { + "epoch": 4.58, + "learning_rate": 5.206336729177974e-06, + "loss": 0.0274, + "step": 15094 + }, + { + "epoch": 4.58, + "learning_rate": 5.198788714182045e-06, + "loss": 0.0277, + "step": 15095 + }, + { + "epoch": 4.58, + "learning_rate": 5.191246078126493e-06, + "loss": 0.0287, + "step": 15096 + }, + { + "epoch": 4.58, + "learning_rate": 5.183708821291493e-06, + "loss": 0.0363, + "step": 15097 + }, + { + "epoch": 4.58, + "learning_rate": 5.176176943957039e-06, + "loss": 0.0232, + "step": 15098 + }, + { + "epoch": 4.58, + "learning_rate": 5.168650446402922e-06, + "loss": 0.0243, + "step": 15099 + }, + { + "epoch": 4.58, + "learning_rate": 5.16112932890872e-06, + "loss": 0.0124, + "step": 15100 + }, + { + "epoch": 4.59, + "learning_rate": 5.1536135917538245e-06, + "loss": 0.0256, + "step": 15101 + }, + { + "epoch": 4.59, + "learning_rate": 5.146103235217447e-06, + "loss": 0.0241, + "step": 15102 + }, + { + "epoch": 4.59, + "learning_rate": 5.13859825957853e-06, + "loss": 0.0171, + "step": 15103 + }, + { + "epoch": 4.59, + "learning_rate": 5.131098665115902e-06, + "loss": 0.0182, + "step": 15104 + }, + { + "epoch": 4.59, + "learning_rate": 5.123604452108104e-06, + "loss": 0.0574, + "step": 15105 + }, + { + "epoch": 4.59, + "learning_rate": 5.116115620833599e-06, + "loss": 0.0117, + "step": 15106 + }, + { + "epoch": 4.59, + "learning_rate": 5.108632171570498e-06, + "loss": 0.0371, + "step": 15107 + }, + { + "epoch": 4.59, + "learning_rate": 5.101154104596844e-06, + "loss": 0.0277, + "step": 15108 + }, + { + "epoch": 4.59, + "learning_rate": 5.093681420190382e-06, + "loss": 0.0506, + "step": 15109 + }, + { + "epoch": 4.59, + "learning_rate": 5.086214118628723e-06, + "loss": 0.0293, + "step": 15110 + }, + { + "epoch": 4.59, + "learning_rate": 5.0787522001892625e-06, + "loss": 0.0338, + "step": 15111 + }, + { + "epoch": 4.59, + "learning_rate": 5.071295665149178e-06, + "loss": 0.0358, + "step": 15112 + }, + { + "epoch": 4.59, + "learning_rate": 5.063844513785464e-06, + "loss": 0.0083, + "step": 15113 + }, + { + "epoch": 4.59, + "learning_rate": 5.056398746374885e-06, + "loss": 0.015, + "step": 15114 + }, + { + "epoch": 4.59, + "learning_rate": 5.048958363194066e-06, + "loss": 0.0038, + "step": 15115 + }, + { + "epoch": 4.59, + "learning_rate": 5.041523364519373e-06, + "loss": 0.0108, + "step": 15116 + }, + { + "epoch": 4.59, + "learning_rate": 5.034093750626999e-06, + "loss": 0.0113, + "step": 15117 + }, + { + "epoch": 4.59, + "learning_rate": 5.026669521792942e-06, + "loss": 0.0272, + "step": 15118 + }, + { + "epoch": 4.59, + "learning_rate": 5.019250678292963e-06, + "loss": 0.0323, + "step": 15119 + }, + { + "epoch": 4.59, + "learning_rate": 5.011837220402692e-06, + "loss": 0.0437, + "step": 15120 + }, + { + "epoch": 4.59, + "learning_rate": 5.004429148397459e-06, + "loss": 0.0277, + "step": 15121 + }, + { + "epoch": 4.59, + "learning_rate": 4.997026462552544e-06, + "loss": 0.0114, + "step": 15122 + }, + { + "epoch": 4.59, + "learning_rate": 4.989629163142827e-06, + "loss": 0.0174, + "step": 15123 + }, + { + "epoch": 4.59, + "learning_rate": 4.982237250443172e-06, + "loss": 0.0229, + "step": 15124 + }, + { + "epoch": 4.59, + "learning_rate": 4.974850724728125e-06, + "loss": 0.0246, + "step": 15125 + }, + { + "epoch": 4.59, + "learning_rate": 4.967469586272116e-06, + "loss": 0.0252, + "step": 15126 + }, + { + "epoch": 4.59, + "learning_rate": 4.9600938353492934e-06, + "loss": 0.0308, + "step": 15127 + }, + { + "epoch": 4.59, + "learning_rate": 4.952723472233655e-06, + "loss": 0.0274, + "step": 15128 + }, + { + "epoch": 4.59, + "learning_rate": 4.9453584971989975e-06, + "loss": 0.0156, + "step": 15129 + }, + { + "epoch": 4.59, + "learning_rate": 4.937998910518887e-06, + "loss": 0.0406, + "step": 15130 + }, + { + "epoch": 4.59, + "learning_rate": 4.930644712466753e-06, + "loss": 0.003, + "step": 15131 + }, + { + "epoch": 4.59, + "learning_rate": 4.923295903315743e-06, + "loss": 0.0169, + "step": 15132 + }, + { + "epoch": 4.59, + "learning_rate": 4.915952483338859e-06, + "loss": 0.0125, + "step": 15133 + }, + { + "epoch": 4.6, + "learning_rate": 4.9086144528088786e-06, + "loss": 0.0324, + "step": 15134 + }, + { + "epoch": 4.6, + "learning_rate": 4.901281811998387e-06, + "loss": 0.0794, + "step": 15135 + }, + { + "epoch": 4.6, + "learning_rate": 4.89395456117978e-06, + "loss": 0.0169, + "step": 15136 + }, + { + "epoch": 4.6, + "learning_rate": 4.886632700625259e-06, + "loss": 0.0156, + "step": 15137 + }, + { + "epoch": 4.6, + "learning_rate": 4.879316230606789e-06, + "loss": 0.025, + "step": 15138 + }, + { + "epoch": 4.6, + "learning_rate": 4.872005151396136e-06, + "loss": 0.0225, + "step": 15139 + }, + { + "epoch": 4.6, + "learning_rate": 4.864699463264932e-06, + "loss": 0.0412, + "step": 15140 + }, + { + "epoch": 4.6, + "learning_rate": 4.8573991664845105e-06, + "loss": 0.0423, + "step": 15141 + }, + { + "epoch": 4.6, + "learning_rate": 4.850104261326104e-06, + "loss": 0.029, + "step": 15142 + }, + { + "epoch": 4.6, + "learning_rate": 4.842814748060681e-06, + "loss": 0.031, + "step": 15143 + }, + { + "epoch": 4.6, + "learning_rate": 4.835530626959005e-06, + "loss": 0.0315, + "step": 15144 + }, + { + "epoch": 4.6, + "learning_rate": 4.82825189829168e-06, + "loss": 0.0372, + "step": 15145 + }, + { + "epoch": 4.6, + "learning_rate": 4.820978562329087e-06, + "loss": 0.0228, + "step": 15146 + }, + { + "epoch": 4.6, + "learning_rate": 4.813710619341427e-06, + "loss": 0.0348, + "step": 15147 + }, + { + "epoch": 4.6, + "learning_rate": 4.806448069598618e-06, + "loss": 0.0207, + "step": 15148 + }, + { + "epoch": 4.6, + "learning_rate": 4.79919091337051e-06, + "loss": 0.0166, + "step": 15149 + }, + { + "epoch": 4.6, + "learning_rate": 4.791939150926655e-06, + "loss": 0.0158, + "step": 15150 + }, + { + "epoch": 4.6, + "learning_rate": 4.7846927825364535e-06, + "loss": 0.0177, + "step": 15151 + }, + { + "epoch": 4.6, + "learning_rate": 4.777451808469074e-06, + "loss": 0.032, + "step": 15152 + }, + { + "epoch": 4.6, + "learning_rate": 4.770216228993485e-06, + "loss": 0.0154, + "step": 15153 + }, + { + "epoch": 4.6, + "learning_rate": 4.762986044378486e-06, + "loss": 0.0246, + "step": 15154 + }, + { + "epoch": 4.6, + "learning_rate": 4.755761254892648e-06, + "loss": 0.0504, + "step": 15155 + }, + { + "epoch": 4.6, + "learning_rate": 4.748541860804355e-06, + "loss": 0.0258, + "step": 15156 + }, + { + "epoch": 4.6, + "learning_rate": 4.7413278623817945e-06, + "loss": 0.0247, + "step": 15157 + }, + { + "epoch": 4.6, + "learning_rate": 4.734119259892933e-06, + "loss": 0.0228, + "step": 15158 + }, + { + "epoch": 4.6, + "learning_rate": 4.726916053605556e-06, + "loss": 0.014, + "step": 15159 + }, + { + "epoch": 4.6, + "learning_rate": 4.719718243787218e-06, + "loss": 0.0039, + "step": 15160 + }, + { + "epoch": 4.6, + "learning_rate": 4.712525830705338e-06, + "loss": 0.0388, + "step": 15161 + }, + { + "epoch": 4.6, + "learning_rate": 4.705338814627069e-06, + "loss": 0.0382, + "step": 15162 + }, + { + "epoch": 4.6, + "learning_rate": 4.69815719581938e-06, + "loss": 0.0349, + "step": 15163 + }, + { + "epoch": 4.6, + "learning_rate": 4.690980974549058e-06, + "loss": 0.0302, + "step": 15164 + }, + { + "epoch": 4.6, + "learning_rate": 4.68381015108269e-06, + "loss": 0.0346, + "step": 15165 + }, + { + "epoch": 4.6, + "learning_rate": 4.676644725686612e-06, + "loss": 0.0392, + "step": 15166 + }, + { + "epoch": 4.61, + "learning_rate": 4.669484698627046e-06, + "loss": 0.0213, + "step": 15167 + }, + { + "epoch": 4.61, + "learning_rate": 4.662330070169962e-06, + "loss": 0.0641, + "step": 15168 + }, + { + "epoch": 4.61, + "learning_rate": 4.65518084058108e-06, + "loss": 0.0239, + "step": 15169 + }, + { + "epoch": 4.61, + "learning_rate": 4.648037010126038e-06, + "loss": 0.0363, + "step": 15170 + }, + { + "epoch": 4.61, + "learning_rate": 4.640898579070157e-06, + "loss": 0.0472, + "step": 15171 + }, + { + "epoch": 4.61, + "learning_rate": 4.633765547678658e-06, + "loss": 0.0102, + "step": 15172 + }, + { + "epoch": 4.61, + "learning_rate": 4.626637916216463e-06, + "loss": 0.0313, + "step": 15173 + }, + { + "epoch": 4.61, + "learning_rate": 4.619515684948377e-06, + "loss": 0.0106, + "step": 15174 + }, + { + "epoch": 4.61, + "learning_rate": 4.6123988541389544e-06, + "loss": 0.0195, + "step": 15175 + }, + { + "epoch": 4.61, + "learning_rate": 4.605287424052551e-06, + "loss": 0.0267, + "step": 15176 + }, + { + "epoch": 4.61, + "learning_rate": 4.598181394953404e-06, + "loss": 0.0413, + "step": 15177 + }, + { + "epoch": 4.61, + "learning_rate": 4.591080767105387e-06, + "loss": 0.0294, + "step": 15178 + }, + { + "epoch": 4.61, + "learning_rate": 4.583985540772322e-06, + "loss": 0.0508, + "step": 15179 + }, + { + "epoch": 4.61, + "learning_rate": 4.576895716217766e-06, + "loss": 0.014, + "step": 15180 + }, + { + "epoch": 4.61, + "learning_rate": 4.569811293705089e-06, + "loss": 0.0027, + "step": 15181 + }, + { + "epoch": 4.61, + "learning_rate": 4.562732273497433e-06, + "loss": 0.0208, + "step": 15182 + }, + { + "epoch": 4.61, + "learning_rate": 4.555658655857802e-06, + "loss": 0.0355, + "step": 15183 + }, + { + "epoch": 4.61, + "learning_rate": 4.548590441048955e-06, + "loss": 0.0212, + "step": 15184 + }, + { + "epoch": 4.61, + "learning_rate": 4.541527629333397e-06, + "loss": 0.0221, + "step": 15185 + }, + { + "epoch": 4.61, + "learning_rate": 4.534470220973568e-06, + "loss": 0.0078, + "step": 15186 + }, + { + "epoch": 4.61, + "learning_rate": 4.527418216231576e-06, + "loss": 0.0032, + "step": 15187 + }, + { + "epoch": 4.61, + "learning_rate": 4.520371615369428e-06, + "loss": 0.0433, + "step": 15188 + }, + { + "epoch": 4.61, + "learning_rate": 4.513330418648847e-06, + "loss": 0.004, + "step": 15189 + }, + { + "epoch": 4.61, + "learning_rate": 4.506294626331408e-06, + "loss": 0.024, + "step": 15190 + }, + { + "epoch": 4.61, + "learning_rate": 4.499264238678435e-06, + "loss": 0.0202, + "step": 15191 + }, + { + "epoch": 4.61, + "learning_rate": 4.4922392559511525e-06, + "loss": 0.0185, + "step": 15192 + }, + { + "epoch": 4.61, + "learning_rate": 4.485219678410484e-06, + "loss": 0.0204, + "step": 15193 + }, + { + "epoch": 4.61, + "learning_rate": 4.478205506317173e-06, + "loss": 0.0141, + "step": 15194 + }, + { + "epoch": 4.61, + "learning_rate": 4.471196739931793e-06, + "loss": 0.014, + "step": 15195 + }, + { + "epoch": 4.61, + "learning_rate": 4.464193379514685e-06, + "loss": 0.0189, + "step": 15196 + }, + { + "epoch": 4.61, + "learning_rate": 4.4571954253260265e-06, + "loss": 0.013, + "step": 15197 + }, + { + "epoch": 4.61, + "learning_rate": 4.450202877625758e-06, + "loss": 0.0297, + "step": 15198 + }, + { + "epoch": 4.61, + "learning_rate": 4.443215736673605e-06, + "loss": 0.0131, + "step": 15199 + }, + { + "epoch": 4.62, + "learning_rate": 4.436234002729178e-06, + "loss": 0.0128, + "step": 15200 + }, + { + "epoch": 4.62, + "learning_rate": 4.4292576760517676e-06, + "loss": 0.0355, + "step": 15201 + }, + { + "epoch": 4.62, + "learning_rate": 4.422286756900584e-06, + "loss": 0.0119, + "step": 15202 + }, + { + "epoch": 4.62, + "learning_rate": 4.41532124553452e-06, + "loss": 0.0214, + "step": 15203 + }, + { + "epoch": 4.62, + "learning_rate": 4.408361142212369e-06, + "loss": 0.0284, + "step": 15204 + }, + { + "epoch": 4.62, + "learning_rate": 4.40140644719264e-06, + "loss": 0.0456, + "step": 15205 + }, + { + "epoch": 4.62, + "learning_rate": 4.3944571607336945e-06, + "loss": 0.0682, + "step": 15206 + }, + { + "epoch": 4.62, + "learning_rate": 4.387513283093674e-06, + "loss": 0.0334, + "step": 15207 + }, + { + "epoch": 4.62, + "learning_rate": 4.380574814530557e-06, + "loss": 0.0254, + "step": 15208 + }, + { + "epoch": 4.62, + "learning_rate": 4.373641755302054e-06, + "loss": 0.0235, + "step": 15209 + }, + { + "epoch": 4.62, + "learning_rate": 4.3667141056656915e-06, + "loss": 0.0234, + "step": 15210 + }, + { + "epoch": 4.62, + "learning_rate": 4.359791865878864e-06, + "loss": 0.0268, + "step": 15211 + }, + { + "epoch": 4.62, + "learning_rate": 4.3528750361986495e-06, + "loss": 0.0169, + "step": 15212 + }, + { + "epoch": 4.62, + "learning_rate": 4.345963616882059e-06, + "loss": 0.02, + "step": 15213 + }, + { + "epoch": 4.62, + "learning_rate": 4.33905760818577e-06, + "loss": 0.0214, + "step": 15214 + }, + { + "epoch": 4.62, + "learning_rate": 4.332157010366361e-06, + "loss": 0.007, + "step": 15215 + }, + { + "epoch": 4.62, + "learning_rate": 4.325261823680126e-06, + "loss": 0.0158, + "step": 15216 + }, + { + "epoch": 4.62, + "learning_rate": 4.3183720483832285e-06, + "loss": 0.0425, + "step": 15217 + }, + { + "epoch": 4.62, + "learning_rate": 4.31148768473163e-06, + "loss": 0.027, + "step": 15218 + }, + { + "epoch": 4.62, + "learning_rate": 4.3046087329810075e-06, + "loss": 0.0217, + "step": 15219 + }, + { + "epoch": 4.62, + "learning_rate": 4.297735193386925e-06, + "loss": 0.0246, + "step": 15220 + }, + { + "epoch": 4.62, + "learning_rate": 4.290867066204711e-06, + "loss": 0.0278, + "step": 15221 + }, + { + "epoch": 4.62, + "learning_rate": 4.284004351689513e-06, + "loss": 0.018, + "step": 15222 + }, + { + "epoch": 4.62, + "learning_rate": 4.277147050096241e-06, + "loss": 0.052, + "step": 15223 + }, + { + "epoch": 4.62, + "learning_rate": 4.27029516167961e-06, + "loss": 0.0277, + "step": 15224 + }, + { + "epoch": 4.62, + "learning_rate": 4.263448686694199e-06, + "loss": 0.0083, + "step": 15225 + }, + { + "epoch": 4.62, + "learning_rate": 4.256607625394254e-06, + "loss": 0.0218, + "step": 15226 + }, + { + "epoch": 4.62, + "learning_rate": 4.249771978033989e-06, + "loss": 0.0164, + "step": 15227 + }, + { + "epoch": 4.62, + "learning_rate": 4.242941744867267e-06, + "loss": 0.036, + "step": 15228 + }, + { + "epoch": 4.62, + "learning_rate": 4.236116926147837e-06, + "loss": 0.0331, + "step": 15229 + }, + { + "epoch": 4.62, + "learning_rate": 4.22929752212921e-06, + "loss": 0.0357, + "step": 15230 + }, + { + "epoch": 4.62, + "learning_rate": 4.222483533064702e-06, + "loss": 0.042, + "step": 15231 + }, + { + "epoch": 4.62, + "learning_rate": 4.215674959207443e-06, + "loss": 0.0463, + "step": 15232 + }, + { + "epoch": 4.63, + "learning_rate": 4.208871800810365e-06, + "loss": 0.0191, + "step": 15233 + }, + { + "epoch": 4.63, + "learning_rate": 4.202074058126164e-06, + "loss": 0.0257, + "step": 15234 + }, + { + "epoch": 4.63, + "learning_rate": 4.195281731407357e-06, + "loss": 0.0371, + "step": 15235 + }, + { + "epoch": 4.63, + "learning_rate": 4.1884948209062745e-06, + "loss": 0.0121, + "step": 15236 + }, + { + "epoch": 4.63, + "learning_rate": 4.181713326875013e-06, + "loss": 0.026, + "step": 15237 + }, + { + "epoch": 4.63, + "learning_rate": 4.1749372495655075e-06, + "loss": 0.0249, + "step": 15238 + }, + { + "epoch": 4.63, + "learning_rate": 4.168166589229438e-06, + "loss": 0.0399, + "step": 15239 + }, + { + "epoch": 4.63, + "learning_rate": 4.1614013461183204e-06, + "loss": 0.0242, + "step": 15240 + }, + { + "epoch": 4.63, + "learning_rate": 4.154641520483504e-06, + "loss": 0.0612, + "step": 15241 + }, + { + "epoch": 4.63, + "learning_rate": 4.147887112576037e-06, + "loss": 0.0219, + "step": 15242 + }, + { + "epoch": 4.63, + "learning_rate": 4.141138122646887e-06, + "loss": 0.0319, + "step": 15243 + }, + { + "epoch": 4.63, + "learning_rate": 4.134394550946718e-06, + "loss": 0.0428, + "step": 15244 + }, + { + "epoch": 4.63, + "learning_rate": 4.127656397726048e-06, + "loss": 0.0284, + "step": 15245 + }, + { + "epoch": 4.63, + "learning_rate": 4.12092366323516e-06, + "loss": 0.0338, + "step": 15246 + }, + { + "epoch": 4.63, + "learning_rate": 4.114196347724186e-06, + "loss": 0.0307, + "step": 15247 + }, + { + "epoch": 4.63, + "learning_rate": 4.107474451442994e-06, + "loss": 0.0234, + "step": 15248 + }, + { + "epoch": 4.63, + "learning_rate": 4.100757974641317e-06, + "loss": 0.0265, + "step": 15249 + }, + { + "epoch": 4.63, + "learning_rate": 4.094046917568622e-06, + "loss": 0.0198, + "step": 15250 + }, + { + "epoch": 4.63, + "learning_rate": 4.087341280474227e-06, + "loss": 0.0096, + "step": 15251 + }, + { + "epoch": 4.63, + "learning_rate": 4.080641063607215e-06, + "loss": 0.0267, + "step": 15252 + }, + { + "epoch": 4.63, + "learning_rate": 4.0739462672164714e-06, + "loss": 0.0198, + "step": 15253 + }, + { + "epoch": 4.63, + "learning_rate": 4.067256891550729e-06, + "loss": 0.029, + "step": 15254 + }, + { + "epoch": 4.63, + "learning_rate": 4.060572936858425e-06, + "loss": 0.0243, + "step": 15255 + }, + { + "epoch": 4.63, + "learning_rate": 4.0538944033878915e-06, + "loss": 0.0266, + "step": 15256 + }, + { + "epoch": 4.63, + "learning_rate": 4.047221291387165e-06, + "loss": 0.0177, + "step": 15257 + }, + { + "epoch": 4.63, + "learning_rate": 4.04055360110418e-06, + "loss": 0.0096, + "step": 15258 + }, + { + "epoch": 4.63, + "learning_rate": 4.033891332786621e-06, + "loss": 0.0258, + "step": 15259 + }, + { + "epoch": 4.63, + "learning_rate": 4.02723448668194e-06, + "loss": 0.0207, + "step": 15260 + }, + { + "epoch": 4.63, + "learning_rate": 4.020583063037458e-06, + "loss": 0.0261, + "step": 15261 + }, + { + "epoch": 4.63, + "learning_rate": 4.013937062100209e-06, + "loss": 0.0605, + "step": 15262 + }, + { + "epoch": 4.63, + "learning_rate": 4.007296484117112e-06, + "loss": 0.0334, + "step": 15263 + }, + { + "epoch": 4.63, + "learning_rate": 4.00066132933482e-06, + "loss": 0.0393, + "step": 15264 + }, + { + "epoch": 4.63, + "learning_rate": 3.9940315979998194e-06, + "loss": 0.0168, + "step": 15265 + }, + { + "epoch": 4.64, + "learning_rate": 3.987407290358397e-06, + "loss": 0.0185, + "step": 15266 + }, + { + "epoch": 4.64, + "learning_rate": 3.9807884066566044e-06, + "loss": 0.0382, + "step": 15267 + }, + { + "epoch": 4.64, + "learning_rate": 3.9741749471403295e-06, + "loss": 0.0158, + "step": 15268 + }, + { + "epoch": 4.64, + "learning_rate": 3.967566912055259e-06, + "loss": 0.0272, + "step": 15269 + }, + { + "epoch": 4.64, + "learning_rate": 3.960964301646813e-06, + "loss": 0.0222, + "step": 15270 + }, + { + "epoch": 4.64, + "learning_rate": 3.954367116160295e-06, + "loss": 0.0152, + "step": 15271 + }, + { + "epoch": 4.64, + "learning_rate": 3.947775355840776e-06, + "loss": 0.0177, + "step": 15272 + }, + { + "epoch": 4.64, + "learning_rate": 3.941189020933078e-06, + "loss": 0.0214, + "step": 15273 + }, + { + "epoch": 4.64, + "learning_rate": 3.93460811168192e-06, + "loss": 0.0066, + "step": 15274 + }, + { + "epoch": 4.64, + "learning_rate": 3.92803262833174e-06, + "loss": 0.0402, + "step": 15275 + }, + { + "epoch": 4.64, + "learning_rate": 3.921462571126777e-06, + "loss": 0.0475, + "step": 15276 + }, + { + "epoch": 4.64, + "learning_rate": 3.914897940311118e-06, + "loss": 0.0161, + "step": 15277 + }, + { + "epoch": 4.64, + "learning_rate": 3.908338736128602e-06, + "loss": 0.0169, + "step": 15278 + }, + { + "epoch": 4.64, + "learning_rate": 3.901784958822917e-06, + "loss": 0.0125, + "step": 15279 + }, + { + "epoch": 4.64, + "learning_rate": 3.8952366086374535e-06, + "loss": 0.0192, + "step": 15280 + }, + { + "epoch": 4.64, + "learning_rate": 3.8886936858155136e-06, + "loss": 0.0232, + "step": 15281 + }, + { + "epoch": 4.64, + "learning_rate": 3.8821561906001385e-06, + "loss": 0.0186, + "step": 15282 + }, + { + "epoch": 4.64, + "learning_rate": 3.875624123234167e-06, + "loss": 0.0406, + "step": 15283 + }, + { + "epoch": 4.64, + "learning_rate": 3.869097483960287e-06, + "loss": 0.0329, + "step": 15284 + }, + { + "epoch": 4.64, + "learning_rate": 3.862576273020873e-06, + "loss": 0.0331, + "step": 15285 + }, + { + "epoch": 4.64, + "learning_rate": 3.85606049065823e-06, + "loss": 0.032, + "step": 15286 + }, + { + "epoch": 4.64, + "learning_rate": 3.849550137114349e-06, + "loss": 0.0487, + "step": 15287 + }, + { + "epoch": 4.64, + "learning_rate": 3.843045212631119e-06, + "loss": 0.0237, + "step": 15288 + }, + { + "epoch": 4.64, + "learning_rate": 3.836545717450163e-06, + "loss": 0.0155, + "step": 15289 + }, + { + "epoch": 4.64, + "learning_rate": 3.830051651812887e-06, + "loss": 0.0262, + "step": 15290 + }, + { + "epoch": 4.64, + "learning_rate": 3.823563015960585e-06, + "loss": 0.0141, + "step": 15291 + }, + { + "epoch": 4.64, + "learning_rate": 3.817079810134227e-06, + "loss": 0.0576, + "step": 15292 + }, + { + "epoch": 4.64, + "learning_rate": 3.810602034574689e-06, + "loss": 0.0164, + "step": 15293 + }, + { + "epoch": 4.64, + "learning_rate": 3.804129689522595e-06, + "loss": 0.0539, + "step": 15294 + }, + { + "epoch": 4.64, + "learning_rate": 3.7976627752183687e-06, + "loss": 0.0093, + "step": 15295 + }, + { + "epoch": 4.64, + "learning_rate": 3.7912012919022183e-06, + "loss": 0.0174, + "step": 15296 + }, + { + "epoch": 4.64, + "learning_rate": 3.7847452398142018e-06, + "loss": 0.0142, + "step": 15297 + }, + { + "epoch": 4.64, + "learning_rate": 3.778294619194111e-06, + "loss": 0.0251, + "step": 15298 + }, + { + "epoch": 4.65, + "learning_rate": 3.7718494302816037e-06, + "loss": 0.039, + "step": 15299 + }, + { + "epoch": 4.65, + "learning_rate": 3.7654096733160723e-06, + "loss": 0.0542, + "step": 15300 + }, + { + "epoch": 4.65, + "learning_rate": 3.7589753485367413e-06, + "loss": 0.0079, + "step": 15301 + }, + { + "epoch": 4.65, + "learning_rate": 3.7525464561826203e-06, + "loss": 0.0378, + "step": 15302 + }, + { + "epoch": 4.65, + "learning_rate": 3.7461229964925344e-06, + "loss": 0.0089, + "step": 15303 + }, + { + "epoch": 4.65, + "learning_rate": 3.7397049697050928e-06, + "loss": 0.0415, + "step": 15304 + }, + { + "epoch": 4.65, + "learning_rate": 3.733292376058722e-06, + "loss": 0.044, + "step": 15305 + }, + { + "epoch": 4.65, + "learning_rate": 3.7268852157915973e-06, + "loss": 0.0259, + "step": 15306 + }, + { + "epoch": 4.65, + "learning_rate": 3.720483489141746e-06, + "loss": 0.0223, + "step": 15307 + }, + { + "epoch": 4.65, + "learning_rate": 3.7140871963469763e-06, + "loss": 0.0266, + "step": 15308 + }, + { + "epoch": 4.65, + "learning_rate": 3.7076963376449e-06, + "loss": 0.0201, + "step": 15309 + }, + { + "epoch": 4.65, + "learning_rate": 3.7013109132728925e-06, + "loss": 0.022, + "step": 15310 + }, + { + "epoch": 4.65, + "learning_rate": 3.6949309234681814e-06, + "loss": 0.0263, + "step": 15311 + }, + { + "epoch": 4.65, + "learning_rate": 3.6885563684677268e-06, + "loss": 0.0237, + "step": 15312 + }, + { + "epoch": 4.65, + "learning_rate": 3.6821872485083557e-06, + "loss": 0.0246, + "step": 15313 + }, + { + "epoch": 4.65, + "learning_rate": 3.6758235638266466e-06, + "loss": 0.0348, + "step": 15314 + }, + { + "epoch": 4.65, + "learning_rate": 3.669465314659026e-06, + "loss": 0.0287, + "step": 15315 + }, + { + "epoch": 4.65, + "learning_rate": 3.663112501241655e-06, + "loss": 0.0212, + "step": 15316 + }, + { + "epoch": 4.65, + "learning_rate": 3.6567651238104954e-06, + "loss": 0.0233, + "step": 15317 + }, + { + "epoch": 4.65, + "learning_rate": 3.650423182601392e-06, + "loss": 0.0339, + "step": 15318 + }, + { + "epoch": 4.65, + "learning_rate": 3.644086677849889e-06, + "loss": 0.0403, + "step": 15319 + }, + { + "epoch": 4.65, + "learning_rate": 3.6377556097913985e-06, + "loss": 0.0343, + "step": 15320 + }, + { + "epoch": 4.65, + "learning_rate": 3.631429978661049e-06, + "loss": 0.0219, + "step": 15321 + }, + { + "epoch": 4.65, + "learning_rate": 3.6251097846938692e-06, + "loss": 0.0181, + "step": 15322 + }, + { + "epoch": 4.65, + "learning_rate": 3.618795028124638e-06, + "loss": 0.012, + "step": 15323 + }, + { + "epoch": 4.65, + "learning_rate": 3.6124857091878845e-06, + "loss": 0.0332, + "step": 15324 + }, + { + "epoch": 4.65, + "learning_rate": 3.606181828118038e-06, + "loss": 0.0202, + "step": 15325 + }, + { + "epoch": 4.65, + "learning_rate": 3.5998833851492106e-06, + "loss": 0.0169, + "step": 15326 + }, + { + "epoch": 4.65, + "learning_rate": 3.593590380515432e-06, + "loss": 0.0122, + "step": 15327 + }, + { + "epoch": 4.65, + "learning_rate": 3.587302814450399e-06, + "loss": 0.0242, + "step": 15328 + }, + { + "epoch": 4.65, + "learning_rate": 3.5810206871877413e-06, + "loss": 0.0478, + "step": 15329 + }, + { + "epoch": 4.65, + "learning_rate": 3.574743998960805e-06, + "loss": 0.0322, + "step": 15330 + }, + { + "epoch": 4.65, + "learning_rate": 3.568472750002721e-06, + "loss": 0.0333, + "step": 15331 + }, + { + "epoch": 4.66, + "learning_rate": 3.5622069405464694e-06, + "loss": 0.0138, + "step": 15332 + }, + { + "epoch": 4.66, + "learning_rate": 3.555946570824797e-06, + "loss": 0.0163, + "step": 15333 + }, + { + "epoch": 4.66, + "learning_rate": 3.549691641070285e-06, + "loss": 0.0252, + "step": 15334 + }, + { + "epoch": 4.66, + "learning_rate": 3.543442151515263e-06, + "loss": 0.0214, + "step": 15335 + }, + { + "epoch": 4.66, + "learning_rate": 3.5371981023918805e-06, + "loss": 0.0176, + "step": 15336 + }, + { + "epoch": 4.66, + "learning_rate": 3.530959493932084e-06, + "loss": 0.0169, + "step": 15337 + }, + { + "epoch": 4.66, + "learning_rate": 3.5247263263676384e-06, + "loss": 0.023, + "step": 15338 + }, + { + "epoch": 4.66, + "learning_rate": 3.5184985999300417e-06, + "loss": 0.0232, + "step": 15339 + }, + { + "epoch": 4.66, + "learning_rate": 3.5122763148507096e-06, + "loss": 0.0258, + "step": 15340 + }, + { + "epoch": 4.66, + "learning_rate": 3.5060594713607228e-06, + "loss": 0.0253, + "step": 15341 + }, + { + "epoch": 4.66, + "learning_rate": 3.4998480696910304e-06, + "loss": 0.0392, + "step": 15342 + }, + { + "epoch": 4.66, + "learning_rate": 3.4936421100723976e-06, + "loss": 0.0413, + "step": 15343 + }, + { + "epoch": 4.66, + "learning_rate": 3.487441592735307e-06, + "loss": 0.0374, + "step": 15344 + }, + { + "epoch": 4.66, + "learning_rate": 3.4812465179101245e-06, + "loss": 0.0052, + "step": 15345 + }, + { + "epoch": 4.66, + "learning_rate": 3.475056885826982e-06, + "loss": 0.0096, + "step": 15346 + }, + { + "epoch": 4.66, + "learning_rate": 3.4688726967157797e-06, + "loss": 0.0224, + "step": 15347 + }, + { + "epoch": 4.66, + "learning_rate": 3.4626939508062667e-06, + "loss": 0.0323, + "step": 15348 + }, + { + "epoch": 4.66, + "learning_rate": 3.4565206483279595e-06, + "loss": 0.0285, + "step": 15349 + }, + { + "epoch": 4.66, + "learning_rate": 3.4503527895101914e-06, + "loss": 0.0246, + "step": 15350 + }, + { + "epoch": 4.66, + "learning_rate": 3.4441903745820285e-06, + "loss": 0.0195, + "step": 15351 + }, + { + "epoch": 4.66, + "learning_rate": 3.4380334037724387e-06, + "loss": 0.0092, + "step": 15352 + }, + { + "epoch": 4.66, + "learning_rate": 3.4318818773101055e-06, + "loss": 0.0258, + "step": 15353 + }, + { + "epoch": 4.66, + "learning_rate": 3.4257357954235787e-06, + "loss": 0.0161, + "step": 15354 + }, + { + "epoch": 4.66, + "learning_rate": 3.4195951583411263e-06, + "loss": 0.0205, + "step": 15355 + }, + { + "epoch": 4.66, + "learning_rate": 3.413459966290866e-06, + "loss": 0.0408, + "step": 15356 + }, + { + "epoch": 4.66, + "learning_rate": 3.4073302195007146e-06, + "loss": 0.02, + "step": 15357 + }, + { + "epoch": 4.66, + "learning_rate": 3.4012059181983576e-06, + "loss": 0.0222, + "step": 15358 + }, + { + "epoch": 4.66, + "learning_rate": 3.3950870626113123e-06, + "loss": 0.0072, + "step": 15359 + }, + { + "epoch": 4.66, + "learning_rate": 3.388973652966864e-06, + "loss": 0.0328, + "step": 15360 + }, + { + "epoch": 4.66, + "learning_rate": 3.3828656894921145e-06, + "loss": 0.0216, + "step": 15361 + }, + { + "epoch": 4.66, + "learning_rate": 3.3767631724139486e-06, + "loss": 0.0274, + "step": 15362 + }, + { + "epoch": 4.66, + "learning_rate": 3.370666101959052e-06, + "loss": 0.0223, + "step": 15363 + }, + { + "epoch": 4.66, + "learning_rate": 3.364574478353943e-06, + "loss": 0.0298, + "step": 15364 + }, + { + "epoch": 4.67, + "learning_rate": 3.358488301824891e-06, + "loss": 0.0312, + "step": 15365 + }, + { + "epoch": 4.67, + "learning_rate": 3.352407572597965e-06, + "loss": 0.021, + "step": 15366 + }, + { + "epoch": 4.67, + "learning_rate": 3.346332290899051e-06, + "loss": 0.0266, + "step": 15367 + }, + { + "epoch": 4.67, + "learning_rate": 3.340262456953835e-06, + "loss": 0.0318, + "step": 15368 + }, + { + "epoch": 4.67, + "learning_rate": 3.3341980709877867e-06, + "loss": 0.0204, + "step": 15369 + }, + { + "epoch": 4.67, + "learning_rate": 3.32813913322621e-06, + "loss": 0.0272, + "step": 15370 + }, + { + "epoch": 4.67, + "learning_rate": 3.3220856438941403e-06, + "loss": 0.0297, + "step": 15371 + }, + { + "epoch": 4.67, + "learning_rate": 3.316037603216448e-06, + "loss": 0.0165, + "step": 15372 + }, + { + "epoch": 4.67, + "learning_rate": 3.309995011417821e-06, + "loss": 0.02, + "step": 15373 + }, + { + "epoch": 4.67, + "learning_rate": 3.303957868722712e-06, + "loss": 0.0173, + "step": 15374 + }, + { + "epoch": 4.67, + "learning_rate": 3.297926175355392e-06, + "loss": 0.019, + "step": 15375 + }, + { + "epoch": 4.67, + "learning_rate": 3.2918999315399154e-06, + "loss": 0.0425, + "step": 15376 + }, + { + "epoch": 4.67, + "learning_rate": 3.285879137500136e-06, + "loss": 0.0214, + "step": 15377 + }, + { + "epoch": 4.67, + "learning_rate": 3.2798637934597085e-06, + "loss": 0.0231, + "step": 15378 + }, + { + "epoch": 4.67, + "learning_rate": 3.2738538996420872e-06, + "loss": 0.0386, + "step": 15379 + }, + { + "epoch": 4.67, + "learning_rate": 3.2678494562705103e-06, + "loss": 0.0166, + "step": 15380 + }, + { + "epoch": 4.67, + "learning_rate": 3.2618504635680665e-06, + "loss": 0.0262, + "step": 15381 + }, + { + "epoch": 4.67, + "learning_rate": 3.2558569217575604e-06, + "loss": 0.037, + "step": 15382 + }, + { + "epoch": 4.67, + "learning_rate": 3.2498688310616307e-06, + "loss": 0.0214, + "step": 15383 + }, + { + "epoch": 4.67, + "learning_rate": 3.2438861917027493e-06, + "loss": 0.0236, + "step": 15384 + }, + { + "epoch": 4.67, + "learning_rate": 3.237909003903122e-06, + "loss": 0.0194, + "step": 15385 + }, + { + "epoch": 4.67, + "learning_rate": 3.231937267884821e-06, + "loss": 0.0366, + "step": 15386 + }, + { + "epoch": 4.67, + "learning_rate": 3.2259709838696356e-06, + "loss": 0.0195, + "step": 15387 + }, + { + "epoch": 4.67, + "learning_rate": 3.2200101520792222e-06, + "loss": 0.0366, + "step": 15388 + }, + { + "epoch": 4.67, + "learning_rate": 3.2140547727350197e-06, + "loss": 0.0227, + "step": 15389 + }, + { + "epoch": 4.67, + "learning_rate": 3.208104846058218e-06, + "loss": 0.033, + "step": 15390 + }, + { + "epoch": 4.67, + "learning_rate": 3.2021603722698743e-06, + "loss": 0.0276, + "step": 15391 + }, + { + "epoch": 4.67, + "learning_rate": 3.1962213515907776e-06, + "loss": 0.0181, + "step": 15392 + }, + { + "epoch": 4.67, + "learning_rate": 3.190287784241552e-06, + "loss": 0.0167, + "step": 15393 + }, + { + "epoch": 4.67, + "learning_rate": 3.1843596704426213e-06, + "loss": 0.0178, + "step": 15394 + }, + { + "epoch": 4.67, + "learning_rate": 3.1784370104142086e-06, + "loss": 0.037, + "step": 15395 + }, + { + "epoch": 4.67, + "learning_rate": 3.1725198043763223e-06, + "loss": 0.0171, + "step": 15396 + }, + { + "epoch": 4.67, + "learning_rate": 3.1666080525487193e-06, + "loss": 0.0129, + "step": 15397 + }, + { + "epoch": 4.68, + "learning_rate": 3.160701755151074e-06, + "loss": 0.0143, + "step": 15398 + }, + { + "epoch": 4.68, + "learning_rate": 3.1548009124027275e-06, + "loss": 0.012, + "step": 15399 + }, + { + "epoch": 4.68, + "learning_rate": 3.1489055245229376e-06, + "loss": 0.0354, + "step": 15400 + }, + { + "epoch": 4.68, + "learning_rate": 3.143015591730663e-06, + "loss": 0.0197, + "step": 15401 + }, + { + "epoch": 4.68, + "learning_rate": 3.137131114244695e-06, + "loss": 0.0314, + "step": 15402 + }, + { + "epoch": 4.68, + "learning_rate": 3.131252092283643e-06, + "loss": 0.0215, + "step": 15403 + }, + { + "epoch": 4.68, + "learning_rate": 3.1253785260658805e-06, + "loss": 0.03, + "step": 15404 + }, + { + "epoch": 4.68, + "learning_rate": 3.119510415809584e-06, + "loss": 0.0422, + "step": 15405 + }, + { + "epoch": 4.68, + "learning_rate": 3.1136477617327793e-06, + "loss": 0.0309, + "step": 15406 + }, + { + "epoch": 4.68, + "learning_rate": 3.1077905640532087e-06, + "loss": 0.022, + "step": 15407 + }, + { + "epoch": 4.68, + "learning_rate": 3.101938822988448e-06, + "loss": 0.0219, + "step": 15408 + }, + { + "epoch": 4.68, + "learning_rate": 3.0960925387559067e-06, + "loss": 0.0262, + "step": 15409 + }, + { + "epoch": 4.68, + "learning_rate": 3.090251711572728e-06, + "loss": 0.0357, + "step": 15410 + }, + { + "epoch": 4.68, + "learning_rate": 3.0844163416558876e-06, + "loss": 0.0222, + "step": 15411 + }, + { + "epoch": 4.68, + "learning_rate": 3.078586429222163e-06, + "loss": 0.0241, + "step": 15412 + }, + { + "epoch": 4.68, + "learning_rate": 3.0727619744880807e-06, + "loss": 0.0242, + "step": 15413 + }, + { + "epoch": 4.68, + "learning_rate": 3.066942977670067e-06, + "loss": 0.0503, + "step": 15414 + }, + { + "epoch": 4.68, + "learning_rate": 3.0611294389842168e-06, + "loss": 0.0081, + "step": 15415 + }, + { + "epoch": 4.68, + "learning_rate": 3.05532135864654e-06, + "loss": 0.021, + "step": 15416 + }, + { + "epoch": 4.68, + "learning_rate": 3.04951873687273e-06, + "loss": 0.0154, + "step": 15417 + }, + { + "epoch": 4.68, + "learning_rate": 3.0437215738783994e-06, + "loss": 0.0395, + "step": 15418 + }, + { + "epoch": 4.68, + "learning_rate": 3.037929869878841e-06, + "loss": 0.0239, + "step": 15419 + }, + { + "epoch": 4.68, + "learning_rate": 3.032143625089234e-06, + "loss": 0.0091, + "step": 15420 + }, + { + "epoch": 4.68, + "learning_rate": 3.0263628397245223e-06, + "loss": 0.0336, + "step": 15421 + }, + { + "epoch": 4.68, + "learning_rate": 3.0205875139994174e-06, + "loss": 0.0247, + "step": 15422 + }, + { + "epoch": 4.68, + "learning_rate": 3.014817648128498e-06, + "loss": 0.0248, + "step": 15423 + }, + { + "epoch": 4.68, + "learning_rate": 3.0090532423260426e-06, + "loss": 0.0306, + "step": 15424 + }, + { + "epoch": 4.68, + "learning_rate": 3.0032942968062303e-06, + "loss": 0.01, + "step": 15425 + }, + { + "epoch": 4.68, + "learning_rate": 2.9975408117829723e-06, + "loss": 0.0547, + "step": 15426 + }, + { + "epoch": 4.68, + "learning_rate": 2.9917927874699822e-06, + "loss": 0.0347, + "step": 15427 + }, + { + "epoch": 4.68, + "learning_rate": 2.9860502240808048e-06, + "loss": 0.0235, + "step": 15428 + }, + { + "epoch": 4.68, + "learning_rate": 2.9803131218287202e-06, + "loss": 0.0302, + "step": 15429 + }, + { + "epoch": 4.68, + "learning_rate": 2.9745814809269073e-06, + "loss": 0.0239, + "step": 15430 + }, + { + "epoch": 4.69, + "learning_rate": 2.968855301588213e-06, + "loss": 0.0425, + "step": 15431 + }, + { + "epoch": 4.69, + "learning_rate": 2.963134584025417e-06, + "loss": 0.0365, + "step": 15432 + }, + { + "epoch": 4.69, + "learning_rate": 2.9574193284509494e-06, + "loss": 0.013, + "step": 15433 + }, + { + "epoch": 4.69, + "learning_rate": 2.9517095350771736e-06, + "loss": 0.0242, + "step": 15434 + }, + { + "epoch": 4.69, + "learning_rate": 2.94600520411617e-06, + "loss": 0.0087, + "step": 15435 + }, + { + "epoch": 4.69, + "learning_rate": 2.940306335779835e-06, + "loss": 0.0351, + "step": 15436 + }, + { + "epoch": 4.69, + "learning_rate": 2.9346129302798838e-06, + "loss": 0.0026, + "step": 15437 + }, + { + "epoch": 4.69, + "learning_rate": 2.92892498782778e-06, + "loss": 0.0267, + "step": 15438 + }, + { + "epoch": 4.69, + "learning_rate": 2.9232425086348375e-06, + "loss": 0.0158, + "step": 15439 + }, + { + "epoch": 4.69, + "learning_rate": 2.9175654929121384e-06, + "loss": 0.0228, + "step": 15440 + }, + { + "epoch": 4.69, + "learning_rate": 2.911893940870563e-06, + "loss": 0.0328, + "step": 15441 + }, + { + "epoch": 4.69, + "learning_rate": 2.9062278527208107e-06, + "loss": 0.0071, + "step": 15442 + }, + { + "epoch": 4.69, + "learning_rate": 2.9005672286733284e-06, + "loss": 0.0242, + "step": 15443 + }, + { + "epoch": 4.69, + "learning_rate": 2.8949120689384153e-06, + "loss": 0.0128, + "step": 15444 + }, + { + "epoch": 4.69, + "learning_rate": 2.8892623737261366e-06, + "loss": 0.013, + "step": 15445 + }, + { + "epoch": 4.69, + "learning_rate": 2.8836181432463414e-06, + "loss": 0.0344, + "step": 15446 + }, + { + "epoch": 4.69, + "learning_rate": 2.8779793777087445e-06, + "loss": 0.0179, + "step": 15447 + }, + { + "epoch": 4.69, + "learning_rate": 2.872346077322779e-06, + "loss": 0.0317, + "step": 15448 + }, + { + "epoch": 4.69, + "learning_rate": 2.8667182422976943e-06, + "loss": 0.0279, + "step": 15449 + }, + { + "epoch": 4.69, + "learning_rate": 2.8610958728425728e-06, + "loss": 0.0205, + "step": 15450 + }, + { + "epoch": 4.69, + "learning_rate": 2.855478969166247e-06, + "loss": 0.0187, + "step": 15451 + }, + { + "epoch": 4.69, + "learning_rate": 2.8498675314773845e-06, + "loss": 0.0104, + "step": 15452 + }, + { + "epoch": 4.69, + "learning_rate": 2.8442615599844344e-06, + "loss": 0.0228, + "step": 15453 + }, + { + "epoch": 4.69, + "learning_rate": 2.83866105489563e-06, + "loss": 0.0395, + "step": 15454 + }, + { + "epoch": 4.69, + "learning_rate": 2.833066016419039e-06, + "loss": 0.0236, + "step": 15455 + }, + { + "epoch": 4.69, + "learning_rate": 2.827476444762461e-06, + "loss": 0.0368, + "step": 15456 + }, + { + "epoch": 4.69, + "learning_rate": 2.8218923401335637e-06, + "loss": 0.0075, + "step": 15457 + }, + { + "epoch": 4.69, + "learning_rate": 2.8163137027397643e-06, + "loss": 0.0358, + "step": 15458 + }, + { + "epoch": 4.69, + "learning_rate": 2.810740532788297e-06, + "loss": 0.0117, + "step": 15459 + }, + { + "epoch": 4.69, + "learning_rate": 2.8051728304861967e-06, + "loss": 0.0364, + "step": 15460 + }, + { + "epoch": 4.69, + "learning_rate": 2.7996105960402804e-06, + "loss": 0.05, + "step": 15461 + }, + { + "epoch": 4.69, + "learning_rate": 2.794053829657167e-06, + "loss": 0.0347, + "step": 15462 + }, + { + "epoch": 4.7, + "learning_rate": 2.7885025315432743e-06, + "loss": 0.0315, + "step": 15463 + }, + { + "epoch": 4.7, + "learning_rate": 2.7829567019048203e-06, + "loss": 0.0314, + "step": 15464 + }, + { + "epoch": 4.7, + "learning_rate": 2.777416340947791e-06, + "loss": 0.0456, + "step": 15465 + }, + { + "epoch": 4.7, + "learning_rate": 2.771881448878055e-06, + "loss": 0.0216, + "step": 15466 + }, + { + "epoch": 4.7, + "learning_rate": 2.766352025901164e-06, + "loss": 0.0213, + "step": 15467 + }, + { + "epoch": 4.7, + "learning_rate": 2.7608280722225372e-06, + "loss": 0.0145, + "step": 15468 + }, + { + "epoch": 4.7, + "learning_rate": 2.7553095880473773e-06, + "loss": 0.0188, + "step": 15469 + }, + { + "epoch": 4.7, + "learning_rate": 2.749796573580654e-06, + "loss": 0.0128, + "step": 15470 + }, + { + "epoch": 4.7, + "learning_rate": 2.7442890290272025e-06, + "loss": 0.0342, + "step": 15471 + }, + { + "epoch": 4.7, + "learning_rate": 2.738786954591593e-06, + "loss": 0.0188, + "step": 15472 + }, + { + "epoch": 4.7, + "learning_rate": 2.7332903504781955e-06, + "loss": 0.0308, + "step": 15473 + }, + { + "epoch": 4.7, + "learning_rate": 2.7277992168911965e-06, + "loss": 0.0149, + "step": 15474 + }, + { + "epoch": 4.7, + "learning_rate": 2.7223135540345996e-06, + "loss": 0.0181, + "step": 15475 + }, + { + "epoch": 4.7, + "learning_rate": 2.7168333621121587e-06, + "loss": 0.0559, + "step": 15476 + }, + { + "epoch": 4.7, + "learning_rate": 2.71135864132746e-06, + "loss": 0.0839, + "step": 15477 + }, + { + "epoch": 4.7, + "learning_rate": 2.7058893918838754e-06, + "loss": 0.0372, + "step": 15478 + }, + { + "epoch": 4.7, + "learning_rate": 2.7004256139845416e-06, + "loss": 0.0094, + "step": 15479 + }, + { + "epoch": 4.7, + "learning_rate": 2.694967307832463e-06, + "loss": 0.0252, + "step": 15480 + }, + { + "epoch": 4.7, + "learning_rate": 2.6895144736303608e-06, + "loss": 0.0181, + "step": 15481 + }, + { + "epoch": 4.7, + "learning_rate": 2.68406711158084e-06, + "loss": 0.0206, + "step": 15482 + }, + { + "epoch": 4.7, + "learning_rate": 2.678625221886205e-06, + "loss": 0.0214, + "step": 15483 + }, + { + "epoch": 4.7, + "learning_rate": 2.673188804748644e-06, + "loss": 0.0171, + "step": 15484 + }, + { + "epoch": 4.7, + "learning_rate": 2.667757860370062e-06, + "loss": 0.0219, + "step": 15485 + }, + { + "epoch": 4.7, + "learning_rate": 2.662332388952232e-06, + "loss": 0.0197, + "step": 15486 + }, + { + "epoch": 4.7, + "learning_rate": 2.656912390696708e-06, + "loss": 0.0204, + "step": 15487 + }, + { + "epoch": 4.7, + "learning_rate": 2.651497865804797e-06, + "loss": 0.0274, + "step": 15488 + }, + { + "epoch": 4.7, + "learning_rate": 2.646088814477637e-06, + "loss": 0.0347, + "step": 15489 + }, + { + "epoch": 4.7, + "learning_rate": 2.640685236916168e-06, + "loss": 0.0219, + "step": 15490 + }, + { + "epoch": 4.7, + "learning_rate": 2.635287133321129e-06, + "loss": 0.016, + "step": 15491 + }, + { + "epoch": 4.7, + "learning_rate": 2.6298945038930265e-06, + "loss": 0.0213, + "step": 15492 + }, + { + "epoch": 4.7, + "learning_rate": 2.6245073488321667e-06, + "loss": 0.0367, + "step": 15493 + }, + { + "epoch": 4.7, + "learning_rate": 2.6191256683387064e-06, + "loss": 0.0209, + "step": 15494 + }, + { + "epoch": 4.7, + "learning_rate": 2.613749462612519e-06, + "loss": 0.0097, + "step": 15495 + }, + { + "epoch": 4.71, + "learning_rate": 2.6083787318533445e-06, + "loss": 0.03, + "step": 15496 + }, + { + "epoch": 4.71, + "learning_rate": 2.603013476260657e-06, + "loss": 0.0527, + "step": 15497 + }, + { + "epoch": 4.71, + "learning_rate": 2.5976536960338134e-06, + "loss": 0.0245, + "step": 15498 + }, + { + "epoch": 4.71, + "learning_rate": 2.592299391371855e-06, + "loss": 0.0467, + "step": 15499 + }, + { + "epoch": 4.71, + "learning_rate": 2.586950562473722e-06, + "loss": 0.0244, + "step": 15500 + }, + { + "epoch": 4.71, + "learning_rate": 2.5816072095380724e-06, + "loss": 0.0502, + "step": 15501 + }, + { + "epoch": 4.71, + "learning_rate": 2.576269332763431e-06, + "loss": 0.0099, + "step": 15502 + }, + { + "epoch": 4.71, + "learning_rate": 2.5709369323480555e-06, + "loss": 0.0134, + "step": 15503 + }, + { + "epoch": 4.71, + "learning_rate": 2.5656100084900377e-06, + "loss": 0.038, + "step": 15504 + }, + { + "epoch": 4.71, + "learning_rate": 2.560288561387269e-06, + "loss": 0.0052, + "step": 15505 + }, + { + "epoch": 4.71, + "learning_rate": 2.5549725912374084e-06, + "loss": 0.0388, + "step": 15506 + }, + { + "epoch": 4.71, + "learning_rate": 2.5496620982379644e-06, + "loss": 0.0215, + "step": 15507 + }, + { + "epoch": 4.71, + "learning_rate": 2.5443570825861626e-06, + "loss": 0.0274, + "step": 15508 + }, + { + "epoch": 4.71, + "learning_rate": 2.5390575444790786e-06, + "loss": 0.0425, + "step": 15509 + }, + { + "epoch": 4.71, + "learning_rate": 2.5337634841135713e-06, + "loss": 0.0073, + "step": 15510 + }, + { + "epoch": 4.71, + "learning_rate": 2.5284749016863173e-06, + "loss": 0.0199, + "step": 15511 + }, + { + "epoch": 4.71, + "learning_rate": 2.5231917973937755e-06, + "loss": 0.0348, + "step": 15512 + }, + { + "epoch": 4.71, + "learning_rate": 2.517914171432189e-06, + "loss": 0.0193, + "step": 15513 + }, + { + "epoch": 4.71, + "learning_rate": 2.512642023997602e-06, + "loss": 0.0236, + "step": 15514 + }, + { + "epoch": 4.71, + "learning_rate": 2.50737535528584e-06, + "loss": 0.0215, + "step": 15515 + }, + { + "epoch": 4.71, + "learning_rate": 2.5021141654925803e-06, + "loss": 0.0152, + "step": 15516 + }, + { + "epoch": 4.71, + "learning_rate": 2.496858454813233e-06, + "loss": 0.0, + "step": 15517 + }, + { + "epoch": 4.71, + "learning_rate": 2.491608223443059e-06, + "loss": 0.0176, + "step": 15518 + }, + { + "epoch": 4.71, + "learning_rate": 2.4863634715770854e-06, + "loss": 0.0191, + "step": 15519 + }, + { + "epoch": 4.71, + "learning_rate": 2.481124199410106e-06, + "loss": 0.0113, + "step": 15520 + }, + { + "epoch": 4.71, + "learning_rate": 2.4758904071367657e-06, + "loss": 0.0195, + "step": 15521 + }, + { + "epoch": 4.71, + "learning_rate": 2.4706620949514923e-06, + "loss": 0.0549, + "step": 15522 + }, + { + "epoch": 4.71, + "learning_rate": 2.4654392630485133e-06, + "loss": 0.036, + "step": 15523 + }, + { + "epoch": 4.71, + "learning_rate": 2.460221911621807e-06, + "loss": 0.0447, + "step": 15524 + }, + { + "epoch": 4.71, + "learning_rate": 2.455010040865202e-06, + "loss": 0.0672, + "step": 15525 + }, + { + "epoch": 4.71, + "learning_rate": 2.4498036509722927e-06, + "loss": 0.0248, + "step": 15526 + }, + { + "epoch": 4.71, + "learning_rate": 2.444602742136509e-06, + "loss": 0.0201, + "step": 15527 + }, + { + "epoch": 4.71, + "learning_rate": 2.4394073145510283e-06, + "loss": 0.0472, + "step": 15528 + }, + { + "epoch": 4.72, + "learning_rate": 2.4342173684088304e-06, + "loss": 0.0322, + "step": 15529 + }, + { + "epoch": 4.72, + "learning_rate": 2.4290329039027444e-06, + "loss": 0.0043, + "step": 15530 + }, + { + "epoch": 4.72, + "learning_rate": 2.423853921225316e-06, + "loss": 0.026, + "step": 15531 + }, + { + "epoch": 4.72, + "learning_rate": 2.418680420568975e-06, + "loss": 0.0175, + "step": 15532 + }, + { + "epoch": 4.72, + "learning_rate": 2.4135124021258667e-06, + "loss": 0.0342, + "step": 15533 + }, + { + "epoch": 4.72, + "learning_rate": 2.4083498660879717e-06, + "loss": 0.0389, + "step": 15534 + }, + { + "epoch": 4.72, + "learning_rate": 2.403192812647087e-06, + "loss": 0.045, + "step": 15535 + }, + { + "epoch": 4.72, + "learning_rate": 2.3980412419947413e-06, + "loss": 0.0224, + "step": 15536 + }, + { + "epoch": 4.72, + "learning_rate": 2.3928951543223494e-06, + "loss": 0.0337, + "step": 15537 + }, + { + "epoch": 4.72, + "learning_rate": 2.3877545498210413e-06, + "loss": 0.0314, + "step": 15538 + }, + { + "epoch": 4.72, + "learning_rate": 2.382619428681781e-06, + "loss": 0.0269, + "step": 15539 + }, + { + "epoch": 4.72, + "learning_rate": 2.3774897910953162e-06, + "loss": 0.0202, + "step": 15540 + }, + { + "epoch": 4.72, + "learning_rate": 2.3723656372522105e-06, + "loss": 0.0043, + "step": 15541 + }, + { + "epoch": 4.72, + "learning_rate": 2.3672469673427953e-06, + "loss": 0.0211, + "step": 15542 + }, + { + "epoch": 4.72, + "learning_rate": 2.362133781557252e-06, + "loss": 0.0214, + "step": 15543 + }, + { + "epoch": 4.72, + "learning_rate": 2.3570260800854777e-06, + "loss": 0.0222, + "step": 15544 + }, + { + "epoch": 4.72, + "learning_rate": 2.3519238631172223e-06, + "loss": 0.0205, + "step": 15545 + }, + { + "epoch": 4.72, + "learning_rate": 2.346827130842033e-06, + "loss": 0.0434, + "step": 15546 + }, + { + "epoch": 4.72, + "learning_rate": 2.3417358834492085e-06, + "loss": 0.0205, + "step": 15547 + }, + { + "epoch": 4.72, + "learning_rate": 2.336650121127931e-06, + "loss": 0.0344, + "step": 15548 + }, + { + "epoch": 4.72, + "learning_rate": 2.3315698440670496e-06, + "loss": 0.0193, + "step": 15549 + }, + { + "epoch": 4.72, + "learning_rate": 2.3264950524553127e-06, + "loss": 0.0321, + "step": 15550 + }, + { + "epoch": 4.72, + "learning_rate": 2.321425746481237e-06, + "loss": 0.0302, + "step": 15551 + }, + { + "epoch": 4.72, + "learning_rate": 2.3163619263331378e-06, + "loss": 0.0281, + "step": 15552 + }, + { + "epoch": 4.72, + "learning_rate": 2.311303592199132e-06, + "loss": 0.0257, + "step": 15553 + }, + { + "epoch": 4.72, + "learning_rate": 2.3062507442670685e-06, + "loss": 0.0337, + "step": 15554 + }, + { + "epoch": 4.72, + "learning_rate": 2.3012033827247145e-06, + "loss": 0.0155, + "step": 15555 + }, + { + "epoch": 4.72, + "learning_rate": 2.296161507759503e-06, + "loss": 0.0289, + "step": 15556 + }, + { + "epoch": 4.72, + "learning_rate": 2.2911251195587674e-06, + "loss": 0.0095, + "step": 15557 + }, + { + "epoch": 4.72, + "learning_rate": 2.2860942183095753e-06, + "loss": 0.0343, + "step": 15558 + }, + { + "epoch": 4.72, + "learning_rate": 2.2810688041988266e-06, + "loss": 0.0281, + "step": 15559 + }, + { + "epoch": 4.72, + "learning_rate": 2.276048877413189e-06, + "loss": 0.0145, + "step": 15560 + }, + { + "epoch": 4.72, + "learning_rate": 2.2710344381391134e-06, + "loss": 0.0341, + "step": 15561 + }, + { + "epoch": 4.73, + "learning_rate": 2.2660254865629333e-06, + "loss": 0.017, + "step": 15562 + }, + { + "epoch": 4.73, + "learning_rate": 2.261022022870651e-06, + "loss": 0.0272, + "step": 15563 + }, + { + "epoch": 4.73, + "learning_rate": 2.256024047248184e-06, + "loss": 0.0548, + "step": 15564 + }, + { + "epoch": 4.73, + "learning_rate": 2.25103155988115e-06, + "loss": 0.0292, + "step": 15565 + }, + { + "epoch": 4.73, + "learning_rate": 2.2460445609550348e-06, + "loss": 0.013, + "step": 15566 + }, + { + "epoch": 4.73, + "learning_rate": 2.2410630506550733e-06, + "loss": 0.02, + "step": 15567 + }, + { + "epoch": 4.73, + "learning_rate": 2.236087029166317e-06, + "loss": 0.0253, + "step": 15568 + }, + { + "epoch": 4.73, + "learning_rate": 2.2311164966736184e-06, + "loss": 0.0335, + "step": 15569 + }, + { + "epoch": 4.73, + "learning_rate": 2.226151453361613e-06, + "loss": 0.0236, + "step": 15570 + }, + { + "epoch": 4.73, + "learning_rate": 2.221191899414737e-06, + "loss": 0.0077, + "step": 15571 + }, + { + "epoch": 4.73, + "learning_rate": 2.2162378350172094e-06, + "loss": 0.0447, + "step": 15572 + }, + { + "epoch": 4.73, + "learning_rate": 2.2112892603530996e-06, + "loss": 0.0124, + "step": 15573 + }, + { + "epoch": 4.73, + "learning_rate": 2.206346175606194e-06, + "loss": 0.0234, + "step": 15574 + }, + { + "epoch": 4.73, + "learning_rate": 2.2014085809601288e-06, + "loss": 0.0205, + "step": 15575 + }, + { + "epoch": 4.73, + "learning_rate": 2.1964764765983244e-06, + "loss": 0.0219, + "step": 15576 + }, + { + "epoch": 4.73, + "learning_rate": 2.1915498627039673e-06, + "loss": 0.0306, + "step": 15577 + }, + { + "epoch": 4.73, + "learning_rate": 2.1866287394601114e-06, + "loss": 0.053, + "step": 15578 + }, + { + "epoch": 4.73, + "learning_rate": 2.181713107049543e-06, + "loss": 0.0211, + "step": 15579 + }, + { + "epoch": 4.73, + "learning_rate": 2.17680296565485e-06, + "loss": 0.0272, + "step": 15580 + }, + { + "epoch": 4.73, + "learning_rate": 2.17189831545842e-06, + "loss": 0.0248, + "step": 15581 + }, + { + "epoch": 4.73, + "learning_rate": 2.1669991566424907e-06, + "loss": 0.0293, + "step": 15582 + }, + { + "epoch": 4.73, + "learning_rate": 2.162105489388999e-06, + "loss": 0.0416, + "step": 15583 + }, + { + "epoch": 4.73, + "learning_rate": 2.1572173138797834e-06, + "loss": 0.0052, + "step": 15584 + }, + { + "epoch": 4.73, + "learning_rate": 2.1523346302963986e-06, + "loss": 0.0264, + "step": 15585 + }, + { + "epoch": 4.73, + "learning_rate": 2.147457438820216e-06, + "loss": 0.0133, + "step": 15586 + }, + { + "epoch": 4.73, + "learning_rate": 2.1425857396324076e-06, + "loss": 0.0143, + "step": 15587 + }, + { + "epoch": 4.73, + "learning_rate": 2.137719532913945e-06, + "loss": 0.0134, + "step": 15588 + }, + { + "epoch": 4.73, + "learning_rate": 2.1328588188456343e-06, + "loss": 0.0113, + "step": 15589 + }, + { + "epoch": 4.73, + "learning_rate": 2.12800359760798e-06, + "loss": 0.0183, + "step": 15590 + }, + { + "epoch": 4.73, + "learning_rate": 2.1231538693813555e-06, + "loss": 0.016, + "step": 15591 + }, + { + "epoch": 4.73, + "learning_rate": 2.118309634345916e-06, + "loss": 0.0186, + "step": 15592 + }, + { + "epoch": 4.73, + "learning_rate": 2.113470892681618e-06, + "loss": 0.0308, + "step": 15593 + }, + { + "epoch": 4.73, + "learning_rate": 2.1086376445682174e-06, + "loss": 0.014, + "step": 15594 + }, + { + "epoch": 4.74, + "learning_rate": 2.103809890185221e-06, + "loss": 0.0342, + "step": 15595 + }, + { + "epoch": 4.74, + "learning_rate": 2.098987629712001e-06, + "loss": 0.0196, + "step": 15596 + }, + { + "epoch": 4.74, + "learning_rate": 2.0941708633276655e-06, + "loss": 0.0349, + "step": 15597 + }, + { + "epoch": 4.74, + "learning_rate": 2.0893595912111538e-06, + "loss": 0.0217, + "step": 15598 + }, + { + "epoch": 4.74, + "learning_rate": 2.08455381354119e-06, + "loss": 0.028, + "step": 15599 + }, + { + "epoch": 4.74, + "learning_rate": 2.0797535304962975e-06, + "loss": 0.025, + "step": 15600 + }, + { + "epoch": 4.74, + "learning_rate": 2.074958742254784e-06, + "loss": 0.0247, + "step": 15601 + }, + { + "epoch": 4.74, + "learning_rate": 2.0701694489947574e-06, + "loss": 0.0263, + "step": 15602 + }, + { + "epoch": 4.74, + "learning_rate": 2.065385650894158e-06, + "loss": 0.0145, + "step": 15603 + }, + { + "epoch": 4.74, + "learning_rate": 2.060607348130644e-06, + "loss": 0.0434, + "step": 15604 + }, + { + "epoch": 4.74, + "learning_rate": 2.055834540881757e-06, + "loss": 0.0268, + "step": 15605 + }, + { + "epoch": 4.74, + "learning_rate": 2.0510672293247543e-06, + "loss": 0.0308, + "step": 15606 + }, + { + "epoch": 4.74, + "learning_rate": 2.046305413636762e-06, + "loss": 0.028, + "step": 15607 + }, + { + "epoch": 4.74, + "learning_rate": 2.0415490939946542e-06, + "loss": 0.0257, + "step": 15608 + }, + { + "epoch": 4.74, + "learning_rate": 2.0367982705751074e-06, + "loss": 0.0238, + "step": 15609 + }, + { + "epoch": 4.74, + "learning_rate": 2.032052943554613e-06, + "loss": 0.0254, + "step": 15610 + }, + { + "epoch": 4.74, + "learning_rate": 2.02731311310943e-06, + "loss": 0.0166, + "step": 15611 + }, + { + "epoch": 4.74, + "learning_rate": 2.0225787794156345e-06, + "loss": 0.0244, + "step": 15612 + }, + { + "epoch": 4.74, + "learning_rate": 2.017849942649102e-06, + "loss": 0.0184, + "step": 15613 + }, + { + "epoch": 4.74, + "learning_rate": 2.0131266029855097e-06, + "loss": 0.0346, + "step": 15614 + }, + { + "epoch": 4.74, + "learning_rate": 2.0084087606002665e-06, + "loss": 0.0171, + "step": 15615 + }, + { + "epoch": 4.74, + "learning_rate": 2.0036964156686654e-06, + "loss": 0.0171, + "step": 15616 + }, + { + "epoch": 4.74, + "learning_rate": 1.9989895683657498e-06, + "loss": 0.0276, + "step": 15617 + }, + { + "epoch": 4.74, + "learning_rate": 1.9942882188663635e-06, + "loss": 0.0203, + "step": 15618 + }, + { + "epoch": 4.74, + "learning_rate": 1.9895923673451498e-06, + "loss": 0.0179, + "step": 15619 + }, + { + "epoch": 4.74, + "learning_rate": 1.984902013976536e-06, + "loss": 0.0257, + "step": 15620 + }, + { + "epoch": 4.74, + "learning_rate": 1.980217158934766e-06, + "loss": 0.018, + "step": 15621 + }, + { + "epoch": 4.74, + "learning_rate": 1.9755378023938674e-06, + "loss": 0.0158, + "step": 15622 + }, + { + "epoch": 4.74, + "learning_rate": 1.970863944527651e-06, + "loss": 0.0143, + "step": 15623 + }, + { + "epoch": 4.74, + "learning_rate": 1.9661955855097443e-06, + "loss": 0.0256, + "step": 15624 + }, + { + "epoch": 4.74, + "learning_rate": 1.9615327255135927e-06, + "loss": 0.0073, + "step": 15625 + }, + { + "epoch": 4.74, + "learning_rate": 1.956875364712357e-06, + "loss": 0.0396, + "step": 15626 + }, + { + "epoch": 4.74, + "learning_rate": 1.9522235032790655e-06, + "loss": 0.033, + "step": 15627 + }, + { + "epoch": 4.75, + "learning_rate": 1.947577141386547e-06, + "loss": 0.0258, + "step": 15628 + }, + { + "epoch": 4.75, + "learning_rate": 1.9429362792073465e-06, + "loss": 0.013, + "step": 15629 + }, + { + "epoch": 4.75, + "learning_rate": 1.938300916913926e-06, + "loss": 0.0169, + "step": 15630 + }, + { + "epoch": 4.75, + "learning_rate": 1.933671054678415e-06, + "loss": 0.0157, + "step": 15631 + }, + { + "epoch": 4.75, + "learning_rate": 1.9290466926728254e-06, + "loss": 0.0102, + "step": 15632 + }, + { + "epoch": 4.75, + "learning_rate": 1.9244278310689366e-06, + "loss": 0.0135, + "step": 15633 + }, + { + "epoch": 4.75, + "learning_rate": 1.919814470038328e-06, + "loss": 0.0265, + "step": 15634 + }, + { + "epoch": 4.75, + "learning_rate": 1.9152066097523788e-06, + "loss": 0.023, + "step": 15635 + }, + { + "epoch": 4.75, + "learning_rate": 1.9106042503822363e-06, + "loss": 0.0009, + "step": 15636 + }, + { + "epoch": 4.75, + "learning_rate": 1.9060073920988961e-06, + "loss": 0.024, + "step": 15637 + }, + { + "epoch": 4.75, + "learning_rate": 1.9014160350730722e-06, + "loss": 0.0172, + "step": 15638 + }, + { + "epoch": 4.75, + "learning_rate": 1.8968301794753615e-06, + "loss": 0.0382, + "step": 15639 + }, + { + "epoch": 4.75, + "learning_rate": 1.892249825476111e-06, + "loss": 0.0186, + "step": 15640 + }, + { + "epoch": 4.75, + "learning_rate": 1.8876749732454345e-06, + "loss": 0.0246, + "step": 15641 + }, + { + "epoch": 4.75, + "learning_rate": 1.8831056229533126e-06, + "loss": 0.0268, + "step": 15642 + }, + { + "epoch": 4.75, + "learning_rate": 1.8785417747694598e-06, + "loss": 0.0254, + "step": 15643 + }, + { + "epoch": 4.75, + "learning_rate": 1.8739834288634237e-06, + "loss": 0.0044, + "step": 15644 + }, + { + "epoch": 4.75, + "learning_rate": 1.8694305854045355e-06, + "loss": 0.0134, + "step": 15645 + }, + { + "epoch": 4.75, + "learning_rate": 1.8648832445618934e-06, + "loss": 0.0315, + "step": 15646 + }, + { + "epoch": 4.75, + "learning_rate": 1.8603414065044452e-06, + "loss": 0.011, + "step": 15647 + }, + { + "epoch": 4.75, + "learning_rate": 1.8558050714009066e-06, + "loss": 0.0041, + "step": 15648 + }, + { + "epoch": 4.75, + "learning_rate": 1.8512742394197588e-06, + "loss": 0.0176, + "step": 15649 + }, + { + "epoch": 4.75, + "learning_rate": 1.8467489107293509e-06, + "loss": 0.0457, + "step": 15650 + }, + { + "epoch": 4.75, + "learning_rate": 1.8422290854977484e-06, + "loss": 0.0271, + "step": 15651 + }, + { + "epoch": 4.75, + "learning_rate": 1.8377147638928668e-06, + "loss": 0.0361, + "step": 15652 + }, + { + "epoch": 4.75, + "learning_rate": 1.8332059460824223e-06, + "loss": 0.0315, + "step": 15653 + }, + { + "epoch": 4.75, + "learning_rate": 1.8287026322338471e-06, + "loss": 0.0286, + "step": 15654 + }, + { + "epoch": 4.75, + "learning_rate": 1.8242048225144912e-06, + "loss": 0.0276, + "step": 15655 + }, + { + "epoch": 4.75, + "learning_rate": 1.8197125170913873e-06, + "loss": 0.0426, + "step": 15656 + }, + { + "epoch": 4.75, + "learning_rate": 1.8152257161314355e-06, + "loss": 0.0145, + "step": 15657 + }, + { + "epoch": 4.75, + "learning_rate": 1.8107444198013022e-06, + "loss": 0.0337, + "step": 15658 + }, + { + "epoch": 4.75, + "learning_rate": 1.8062686282674376e-06, + "loss": 0.0283, + "step": 15659 + }, + { + "epoch": 4.75, + "learning_rate": 1.8017983416961424e-06, + "loss": 0.0312, + "step": 15660 + }, + { + "epoch": 4.76, + "learning_rate": 1.7973335602534334e-06, + "loss": 0.0221, + "step": 15661 + }, + { + "epoch": 4.76, + "learning_rate": 1.7928742841051946e-06, + "loss": 0.0274, + "step": 15662 + }, + { + "epoch": 4.76, + "learning_rate": 1.7884205134170437e-06, + "loss": 0.0317, + "step": 15663 + }, + { + "epoch": 4.76, + "learning_rate": 1.7839722483544483e-06, + "loss": 0.0039, + "step": 15664 + }, + { + "epoch": 4.76, + "learning_rate": 1.7795294890826594e-06, + "loss": 0.0195, + "step": 15665 + }, + { + "epoch": 4.76, + "learning_rate": 1.7750922357666786e-06, + "loss": 0.0208, + "step": 15666 + }, + { + "epoch": 4.76, + "learning_rate": 1.7706604885713738e-06, + "loss": 0.0342, + "step": 15667 + }, + { + "epoch": 4.76, + "learning_rate": 1.76623424766133e-06, + "loss": 0.0132, + "step": 15668 + }, + { + "epoch": 4.76, + "learning_rate": 1.7618135132010158e-06, + "loss": 0.0424, + "step": 15669 + }, + { + "epoch": 4.76, + "learning_rate": 1.7573982853546164e-06, + "loss": 0.0513, + "step": 15670 + }, + { + "epoch": 4.76, + "learning_rate": 1.7529885642861508e-06, + "loss": 0.0458, + "step": 15671 + }, + { + "epoch": 4.76, + "learning_rate": 1.748584350159421e-06, + "loss": 0.0398, + "step": 15672 + }, + { + "epoch": 4.76, + "learning_rate": 1.744185643138063e-06, + "loss": 0.0404, + "step": 15673 + }, + { + "epoch": 4.76, + "learning_rate": 1.7397924433854293e-06, + "loss": 0.0291, + "step": 15674 + }, + { + "epoch": 4.76, + "learning_rate": 1.7354047510647395e-06, + "loss": 0.0047, + "step": 15675 + }, + { + "epoch": 4.76, + "learning_rate": 1.7310225663389965e-06, + "loss": 0.0357, + "step": 15676 + }, + { + "epoch": 4.76, + "learning_rate": 1.7266458893709535e-06, + "loss": 0.0125, + "step": 15677 + }, + { + "epoch": 4.76, + "learning_rate": 1.7222747203232134e-06, + "loss": 0.0129, + "step": 15678 + }, + { + "epoch": 4.76, + "learning_rate": 1.7179090593581302e-06, + "loss": 0.0251, + "step": 15679 + }, + { + "epoch": 4.76, + "learning_rate": 1.7135489066379237e-06, + "loss": 0.0169, + "step": 15680 + }, + { + "epoch": 4.76, + "learning_rate": 1.7091942623245147e-06, + "loss": 0.0151, + "step": 15681 + }, + { + "epoch": 4.76, + "learning_rate": 1.7048451265796737e-06, + "loss": 0.0304, + "step": 15682 + }, + { + "epoch": 4.76, + "learning_rate": 1.7005014995649713e-06, + "loss": 0.0297, + "step": 15683 + }, + { + "epoch": 4.76, + "learning_rate": 1.6961633814417453e-06, + "loss": 0.0179, + "step": 15684 + }, + { + "epoch": 4.76, + "learning_rate": 1.69183077237115e-06, + "loss": 0.0328, + "step": 15685 + }, + { + "epoch": 4.76, + "learning_rate": 1.68750367251414e-06, + "loss": 0.0312, + "step": 15686 + }, + { + "epoch": 4.76, + "learning_rate": 1.6831820820314533e-06, + "loss": 0.0382, + "step": 15687 + }, + { + "epoch": 4.76, + "learning_rate": 1.6788660010836118e-06, + "loss": 0.0361, + "step": 15688 + }, + { + "epoch": 4.76, + "learning_rate": 1.6745554298309372e-06, + "loss": 0.025, + "step": 15689 + }, + { + "epoch": 4.76, + "learning_rate": 1.6702503684335844e-06, + "loss": 0.0332, + "step": 15690 + }, + { + "epoch": 4.76, + "learning_rate": 1.6659508170514424e-06, + "loss": 0.0276, + "step": 15691 + }, + { + "epoch": 4.76, + "learning_rate": 1.6616567758442501e-06, + "loss": 0.0085, + "step": 15692 + }, + { + "epoch": 4.76, + "learning_rate": 1.6573682449715131e-06, + "loss": 0.0118, + "step": 15693 + }, + { + "epoch": 4.77, + "learning_rate": 1.6530852245925375e-06, + "loss": 0.0284, + "step": 15694 + }, + { + "epoch": 4.77, + "learning_rate": 1.648807714866396e-06, + "loss": 0.0513, + "step": 15695 + }, + { + "epoch": 4.77, + "learning_rate": 1.6445357159520444e-06, + "loss": 0.0217, + "step": 15696 + }, + { + "epoch": 4.77, + "learning_rate": 1.640269228008123e-06, + "loss": 0.0233, + "step": 15697 + }, + { + "epoch": 4.77, + "learning_rate": 1.6360082511931382e-06, + "loss": 0.0224, + "step": 15698 + }, + { + "epoch": 4.77, + "learning_rate": 1.63175278566538e-06, + "loss": 0.0343, + "step": 15699 + }, + { + "epoch": 4.77, + "learning_rate": 1.627502831582922e-06, + "loss": 0.0129, + "step": 15700 + }, + { + "epoch": 4.77, + "learning_rate": 1.6232583891036378e-06, + "loss": 0.0107, + "step": 15701 + }, + { + "epoch": 4.77, + "learning_rate": 1.6190194583851846e-06, + "loss": 0.0152, + "step": 15702 + }, + { + "epoch": 4.77, + "learning_rate": 1.6147860395850364e-06, + "loss": 0.0185, + "step": 15703 + }, + { + "epoch": 4.77, + "learning_rate": 1.610558132860451e-06, + "loss": 0.022, + "step": 15704 + }, + { + "epoch": 4.77, + "learning_rate": 1.606335738368486e-06, + "loss": 0.022, + "step": 15705 + }, + { + "epoch": 4.77, + "learning_rate": 1.6021188562659826e-06, + "loss": 0.0145, + "step": 15706 + }, + { + "epoch": 4.77, + "learning_rate": 1.597907486709582e-06, + "loss": 0.0237, + "step": 15707 + }, + { + "epoch": 4.77, + "learning_rate": 1.5937016298557593e-06, + "loss": 0.0168, + "step": 15708 + }, + { + "epoch": 4.77, + "learning_rate": 1.5895012858606893e-06, + "loss": 0.0118, + "step": 15709 + }, + { + "epoch": 4.77, + "learning_rate": 1.5853064548804639e-06, + "loss": 0.0171, + "step": 15710 + }, + { + "epoch": 4.77, + "learning_rate": 1.5811171370708753e-06, + "loss": 0.0356, + "step": 15711 + }, + { + "epoch": 4.77, + "learning_rate": 1.5769333325875487e-06, + "loss": 0.0412, + "step": 15712 + }, + { + "epoch": 4.77, + "learning_rate": 1.5727550415858936e-06, + "loss": 0.0337, + "step": 15713 + }, + { + "epoch": 4.77, + "learning_rate": 1.5685822642211355e-06, + "loss": 0.0122, + "step": 15714 + }, + { + "epoch": 4.77, + "learning_rate": 1.564415000648267e-06, + "loss": 0.0093, + "step": 15715 + }, + { + "epoch": 4.77, + "learning_rate": 1.5602532510221144e-06, + "loss": 0.018, + "step": 15716 + }, + { + "epoch": 4.77, + "learning_rate": 1.5560970154972542e-06, + "loss": 0.0295, + "step": 15717 + }, + { + "epoch": 4.77, + "learning_rate": 1.5519462942280625e-06, + "loss": 0.0097, + "step": 15718 + }, + { + "epoch": 4.77, + "learning_rate": 1.5478010873687662e-06, + "loss": 0.0198, + "step": 15719 + }, + { + "epoch": 4.77, + "learning_rate": 1.543661395073309e-06, + "loss": 0.0297, + "step": 15720 + }, + { + "epoch": 4.77, + "learning_rate": 1.539527217495501e-06, + "loss": 0.0331, + "step": 15721 + }, + { + "epoch": 4.77, + "learning_rate": 1.5353985547889026e-06, + "loss": 0.0223, + "step": 15722 + }, + { + "epoch": 4.77, + "learning_rate": 1.5312754071068745e-06, + "loss": 0.0194, + "step": 15723 + }, + { + "epoch": 4.77, + "learning_rate": 1.5271577746025776e-06, + "loss": 0.0222, + "step": 15724 + }, + { + "epoch": 4.77, + "learning_rate": 1.5230456574289728e-06, + "loss": 0.0135, + "step": 15725 + }, + { + "epoch": 4.77, + "learning_rate": 1.5189390557388381e-06, + "loss": 0.0397, + "step": 15726 + }, + { + "epoch": 4.78, + "learning_rate": 1.5148379696846845e-06, + "loss": 0.0232, + "step": 15727 + }, + { + "epoch": 4.78, + "learning_rate": 1.5107423994188738e-06, + "loss": 0.0419, + "step": 15728 + }, + { + "epoch": 4.78, + "learning_rate": 1.5066523450935342e-06, + "loss": 0.0126, + "step": 15729 + }, + { + "epoch": 4.78, + "learning_rate": 1.5025678068606273e-06, + "loss": 0.0428, + "step": 15730 + }, + { + "epoch": 4.78, + "learning_rate": 1.4984887848718486e-06, + "loss": 0.0436, + "step": 15731 + }, + { + "epoch": 4.78, + "learning_rate": 1.4944152792787267e-06, + "loss": 0.0332, + "step": 15732 + }, + { + "epoch": 4.78, + "learning_rate": 1.4903472902326075e-06, + "loss": 0.0241, + "step": 15733 + }, + { + "epoch": 4.78, + "learning_rate": 1.4862848178845698e-06, + "loss": 0.0289, + "step": 15734 + }, + { + "epoch": 4.78, + "learning_rate": 1.4822278623855432e-06, + "loss": 0.019, + "step": 15735 + }, + { + "epoch": 4.78, + "learning_rate": 1.4781764238862237e-06, + "loss": 0.0173, + "step": 15736 + }, + { + "epoch": 4.78, + "learning_rate": 1.4741305025371242e-06, + "loss": 0.0212, + "step": 15737 + }, + { + "epoch": 4.78, + "learning_rate": 1.4700900984885078e-06, + "loss": 0.0541, + "step": 15738 + }, + { + "epoch": 4.78, + "learning_rate": 1.4660552118904877e-06, + "loss": 0.0178, + "step": 15739 + }, + { + "epoch": 4.78, + "learning_rate": 1.462025842892961e-06, + "loss": 0.0107, + "step": 15740 + }, + { + "epoch": 4.78, + "learning_rate": 1.458001991645591e-06, + "loss": 0.0275, + "step": 15741 + }, + { + "epoch": 4.78, + "learning_rate": 1.4539836582978415e-06, + "loss": 0.0265, + "step": 15742 + }, + { + "epoch": 4.78, + "learning_rate": 1.4499708429989765e-06, + "loss": 0.0307, + "step": 15743 + }, + { + "epoch": 4.78, + "learning_rate": 1.4459635458980934e-06, + "loss": 0.02, + "step": 15744 + }, + { + "epoch": 4.78, + "learning_rate": 1.4419617671440397e-06, + "loss": 0.0259, + "step": 15745 + }, + { + "epoch": 4.78, + "learning_rate": 1.4379655068854468e-06, + "loss": 0.0348, + "step": 15746 + }, + { + "epoch": 4.78, + "learning_rate": 1.4339747652707956e-06, + "loss": 0.0292, + "step": 15747 + }, + { + "epoch": 4.78, + "learning_rate": 1.429989542448301e-06, + "loss": 0.0312, + "step": 15748 + }, + { + "epoch": 4.78, + "learning_rate": 1.426009838566028e-06, + "loss": 0.0324, + "step": 15749 + }, + { + "epoch": 4.78, + "learning_rate": 1.4220356537717914e-06, + "loss": 0.0369, + "step": 15750 + }, + { + "epoch": 4.78, + "learning_rate": 1.41806698821324e-06, + "loss": 0.0378, + "step": 15751 + }, + { + "epoch": 4.78, + "learning_rate": 1.4141038420377892e-06, + "loss": 0.0263, + "step": 15752 + }, + { + "epoch": 4.78, + "learning_rate": 1.410146215392638e-06, + "loss": 0.0118, + "step": 15753 + }, + { + "epoch": 4.78, + "learning_rate": 1.406194108424835e-06, + "loss": 0.0215, + "step": 15754 + }, + { + "epoch": 4.78, + "learning_rate": 1.4022475212811635e-06, + "loss": 0.0322, + "step": 15755 + }, + { + "epoch": 4.78, + "learning_rate": 1.3983064541082389e-06, + "loss": 0.0221, + "step": 15756 + }, + { + "epoch": 4.78, + "learning_rate": 1.3943709070524612e-06, + "loss": 0.0361, + "step": 15757 + }, + { + "epoch": 4.78, + "learning_rate": 1.3904408802600297e-06, + "loss": 0.0138, + "step": 15758 + }, + { + "epoch": 4.78, + "learning_rate": 1.3865163738769114e-06, + "loss": 0.0134, + "step": 15759 + }, + { + "epoch": 4.79, + "learning_rate": 1.3825973880489062e-06, + "loss": 0.0153, + "step": 15760 + }, + { + "epoch": 4.79, + "learning_rate": 1.3786839229215974e-06, + "loss": 0.033, + "step": 15761 + }, + { + "epoch": 4.79, + "learning_rate": 1.3747759786403523e-06, + "loss": 0.0265, + "step": 15762 + }, + { + "epoch": 4.79, + "learning_rate": 1.370873555350338e-06, + "loss": 0.0155, + "step": 15763 + }, + { + "epoch": 4.79, + "learning_rate": 1.3669766531965054e-06, + "loss": 0.0152, + "step": 15764 + }, + { + "epoch": 4.79, + "learning_rate": 1.3630852723236552e-06, + "loss": 0.0194, + "step": 15765 + }, + { + "epoch": 4.79, + "learning_rate": 1.3591994128762884e-06, + "loss": 0.0425, + "step": 15766 + }, + { + "epoch": 4.79, + "learning_rate": 1.3553190749988063e-06, + "loss": 0.042, + "step": 15767 + }, + { + "epoch": 4.79, + "learning_rate": 1.3514442588352936e-06, + "loss": 0.0222, + "step": 15768 + }, + { + "epoch": 4.79, + "learning_rate": 1.347574964529735e-06, + "loss": 0.0242, + "step": 15769 + }, + { + "epoch": 4.79, + "learning_rate": 1.3437111922258326e-06, + "loss": 0.0226, + "step": 15770 + }, + { + "epoch": 4.79, + "learning_rate": 1.3398529420671378e-06, + "loss": 0.0527, + "step": 15771 + }, + { + "epoch": 4.79, + "learning_rate": 1.3360002141969694e-06, + "loss": 0.019, + "step": 15772 + }, + { + "epoch": 4.79, + "learning_rate": 1.3321530087584298e-06, + "loss": 0.0254, + "step": 15773 + }, + { + "epoch": 4.79, + "learning_rate": 1.3283113258944545e-06, + "loss": 0.0535, + "step": 15774 + }, + { + "epoch": 4.79, + "learning_rate": 1.3244751657477293e-06, + "loss": 0.0296, + "step": 15775 + }, + { + "epoch": 4.79, + "learning_rate": 1.320644528460757e-06, + "loss": 0.021, + "step": 15776 + }, + { + "epoch": 4.79, + "learning_rate": 1.3168194141758571e-06, + "loss": 0.0276, + "step": 15777 + }, + { + "epoch": 4.79, + "learning_rate": 1.312999823035099e-06, + "loss": 0.0176, + "step": 15778 + }, + { + "epoch": 4.79, + "learning_rate": 1.3091857551803697e-06, + "loss": 0.0124, + "step": 15779 + }, + { + "epoch": 4.79, + "learning_rate": 1.3053772107533555e-06, + "loss": 0.0253, + "step": 15780 + }, + { + "epoch": 4.79, + "learning_rate": 1.3015741898955434e-06, + "loss": 0.0284, + "step": 15781 + }, + { + "epoch": 4.79, + "learning_rate": 1.2977766927482037e-06, + "loss": 0.0231, + "step": 15782 + }, + { + "epoch": 4.79, + "learning_rate": 1.29398471945239e-06, + "loss": 0.0398, + "step": 15783 + }, + { + "epoch": 4.79, + "learning_rate": 1.2901982701489567e-06, + "loss": 0.0335, + "step": 15784 + }, + { + "epoch": 4.79, + "learning_rate": 1.286417344978574e-06, + "loss": 0.023, + "step": 15785 + }, + { + "epoch": 4.79, + "learning_rate": 1.2826419440816804e-06, + "loss": 0.0259, + "step": 15786 + }, + { + "epoch": 4.79, + "learning_rate": 1.2788720675985465e-06, + "loss": 0.0257, + "step": 15787 + }, + { + "epoch": 4.79, + "learning_rate": 1.275107715669177e-06, + "loss": 0.0202, + "step": 15788 + }, + { + "epoch": 4.79, + "learning_rate": 1.2713488884334267e-06, + "loss": 0.0259, + "step": 15789 + }, + { + "epoch": 4.79, + "learning_rate": 1.2675955860309339e-06, + "loss": 0.0141, + "step": 15790 + }, + { + "epoch": 4.79, + "learning_rate": 1.263847808601104e-06, + "loss": 0.0323, + "step": 15791 + }, + { + "epoch": 4.79, + "learning_rate": 1.2601055562831752e-06, + "loss": 0.0267, + "step": 15792 + }, + { + "epoch": 4.8, + "learning_rate": 1.25636882921612e-06, + "loss": 0.0594, + "step": 15793 + }, + { + "epoch": 4.8, + "learning_rate": 1.252637627538794e-06, + "loss": 0.009, + "step": 15794 + }, + { + "epoch": 4.8, + "learning_rate": 1.248911951389786e-06, + "loss": 0.0303, + "step": 15795 + }, + { + "epoch": 4.8, + "learning_rate": 1.2451918009074858e-06, + "loss": 0.0192, + "step": 15796 + }, + { + "epoch": 4.8, + "learning_rate": 1.2414771762300824e-06, + "loss": 0.0309, + "step": 15797 + }, + { + "epoch": 4.8, + "learning_rate": 1.2377680774955822e-06, + "loss": 0.0207, + "step": 15798 + }, + { + "epoch": 4.8, + "learning_rate": 1.2340645048417586e-06, + "loss": 0.0209, + "step": 15799 + }, + { + "epoch": 4.8, + "learning_rate": 1.230366458406168e-06, + "loss": 0.0304, + "step": 15800 + }, + { + "epoch": 4.8, + "learning_rate": 1.226673938326217e-06, + "loss": 0.0296, + "step": 15801 + }, + { + "epoch": 4.8, + "learning_rate": 1.222986944739046e-06, + "loss": 0.0418, + "step": 15802 + }, + { + "epoch": 4.8, + "learning_rate": 1.219305477781629e-06, + "loss": 0.0408, + "step": 15803 + }, + { + "epoch": 4.8, + "learning_rate": 1.2156295375907233e-06, + "loss": 0.0296, + "step": 15804 + }, + { + "epoch": 4.8, + "learning_rate": 1.2119591243028526e-06, + "loss": 0.0171, + "step": 15805 + }, + { + "epoch": 4.8, + "learning_rate": 1.208294238054408e-06, + "loss": 0.0308, + "step": 15806 + }, + { + "epoch": 4.8, + "learning_rate": 1.2046348789814808e-06, + "loss": 0.0263, + "step": 15807 + }, + { + "epoch": 4.8, + "learning_rate": 1.2009810472200621e-06, + "loss": 0.0207, + "step": 15808 + }, + { + "epoch": 4.8, + "learning_rate": 1.1973327429058265e-06, + "loss": 0.0485, + "step": 15809 + }, + { + "epoch": 4.8, + "learning_rate": 1.193689966174316e-06, + "loss": 0.0443, + "step": 15810 + }, + { + "epoch": 4.8, + "learning_rate": 1.1900527171608553e-06, + "loss": 0.0228, + "step": 15811 + }, + { + "epoch": 4.8, + "learning_rate": 1.1864209960005532e-06, + "loss": 0.038, + "step": 15812 + }, + { + "epoch": 4.8, + "learning_rate": 1.1827948028283352e-06, + "loss": 0.0135, + "step": 15813 + }, + { + "epoch": 4.8, + "learning_rate": 1.1791741377788599e-06, + "loss": 0.0093, + "step": 15814 + }, + { + "epoch": 4.8, + "learning_rate": 1.17555900098667e-06, + "loss": 0.0088, + "step": 15815 + }, + { + "epoch": 4.8, + "learning_rate": 1.1719493925860413e-06, + "loss": 0.0171, + "step": 15816 + }, + { + "epoch": 4.8, + "learning_rate": 1.1683453127110498e-06, + "loss": 0.0435, + "step": 15817 + }, + { + "epoch": 4.8, + "learning_rate": 1.1647467614955885e-06, + "loss": 0.0275, + "step": 15818 + }, + { + "epoch": 4.8, + "learning_rate": 1.1611537390733338e-06, + "loss": 0.0164, + "step": 15819 + }, + { + "epoch": 4.8, + "learning_rate": 1.1575662455777457e-06, + "loss": 0.024, + "step": 15820 + }, + { + "epoch": 4.8, + "learning_rate": 1.153984281142084e-06, + "loss": 0.0257, + "step": 15821 + }, + { + "epoch": 4.8, + "learning_rate": 1.1504078458994425e-06, + "loss": 0.032, + "step": 15822 + }, + { + "epoch": 4.8, + "learning_rate": 1.1468369399826315e-06, + "loss": 0.027, + "step": 15823 + }, + { + "epoch": 4.8, + "learning_rate": 1.1432715635243284e-06, + "loss": 0.0253, + "step": 15824 + }, + { + "epoch": 4.8, + "learning_rate": 1.139711716656977e-06, + "loss": 0.035, + "step": 15825 + }, + { + "epoch": 4.81, + "learning_rate": 1.1361573995127882e-06, + "loss": 0.0219, + "step": 15826 + }, + { + "epoch": 4.81, + "learning_rate": 1.1326086122238232e-06, + "loss": 0.0061, + "step": 15827 + }, + { + "epoch": 4.81, + "learning_rate": 1.129065354921893e-06, + "loss": 0.0219, + "step": 15828 + }, + { + "epoch": 4.81, + "learning_rate": 1.125527627738626e-06, + "loss": 0.0541, + "step": 15829 + }, + { + "epoch": 4.81, + "learning_rate": 1.1219954308054336e-06, + "loss": 0.0369, + "step": 15830 + }, + { + "epoch": 4.81, + "learning_rate": 1.118468764253544e-06, + "loss": 0.0517, + "step": 15831 + }, + { + "epoch": 4.81, + "learning_rate": 1.1149476282139358e-06, + "loss": 0.0377, + "step": 15832 + }, + { + "epoch": 4.81, + "learning_rate": 1.1114320228174378e-06, + "loss": 0.0196, + "step": 15833 + }, + { + "epoch": 4.81, + "learning_rate": 1.1079219481945957e-06, + "loss": 0.0229, + "step": 15834 + }, + { + "epoch": 4.81, + "learning_rate": 1.1044174044758546e-06, + "loss": 0.0132, + "step": 15835 + }, + { + "epoch": 4.81, + "learning_rate": 1.1009183917913778e-06, + "loss": 0.0263, + "step": 15836 + }, + { + "epoch": 4.81, + "learning_rate": 1.0974249102711274e-06, + "loss": 0.015, + "step": 15837 + }, + { + "epoch": 4.81, + "learning_rate": 1.0939369600449e-06, + "loss": 0.0144, + "step": 15838 + }, + { + "epoch": 4.81, + "learning_rate": 1.0904545412422249e-06, + "loss": 0.0105, + "step": 15839 + }, + { + "epoch": 4.81, + "learning_rate": 1.0869776539925156e-06, + "loss": 0.0175, + "step": 15840 + }, + { + "epoch": 4.81, + "learning_rate": 1.083506298424902e-06, + "loss": 0.0201, + "step": 15841 + }, + { + "epoch": 4.81, + "learning_rate": 1.0800404746683311e-06, + "loss": 0.0241, + "step": 15842 + }, + { + "epoch": 4.81, + "learning_rate": 1.0765801828515496e-06, + "loss": 0.0334, + "step": 15843 + }, + { + "epoch": 4.81, + "learning_rate": 1.0731254231030883e-06, + "loss": 0.0261, + "step": 15844 + }, + { + "epoch": 4.81, + "learning_rate": 1.0696761955513111e-06, + "loss": 0.005, + "step": 15845 + }, + { + "epoch": 4.81, + "learning_rate": 1.0662325003243156e-06, + "loss": 0.0043, + "step": 15846 + }, + { + "epoch": 4.81, + "learning_rate": 1.0627943375500491e-06, + "loss": 0.0067, + "step": 15847 + }, + { + "epoch": 4.81, + "learning_rate": 1.0593617073562266e-06, + "loss": 0.0108, + "step": 15848 + }, + { + "epoch": 4.81, + "learning_rate": 1.055934609870329e-06, + "loss": 0.0249, + "step": 15849 + }, + { + "epoch": 4.81, + "learning_rate": 1.052513045219705e-06, + "loss": 0.0331, + "step": 15850 + }, + { + "epoch": 4.81, + "learning_rate": 1.0490970135314193e-06, + "loss": 0.0049, + "step": 15851 + }, + { + "epoch": 4.81, + "learning_rate": 1.0456865149324035e-06, + "loss": 0.0096, + "step": 15852 + }, + { + "epoch": 4.81, + "learning_rate": 1.0422815495493064e-06, + "loss": 0.0157, + "step": 15853 + }, + { + "epoch": 4.81, + "learning_rate": 1.0388821175086604e-06, + "loss": 0.015, + "step": 15854 + }, + { + "epoch": 4.81, + "learning_rate": 1.0354882189366974e-06, + "loss": 0.0151, + "step": 15855 + }, + { + "epoch": 4.81, + "learning_rate": 1.032099853959517e-06, + "loss": 0.0066, + "step": 15856 + }, + { + "epoch": 4.81, + "learning_rate": 1.0287170227029852e-06, + "loss": 0.0374, + "step": 15857 + }, + { + "epoch": 4.81, + "learning_rate": 1.0253397252927676e-06, + "loss": 0.0134, + "step": 15858 + }, + { + "epoch": 4.82, + "learning_rate": 1.021967961854314e-06, + "loss": 0.0224, + "step": 15859 + }, + { + "epoch": 4.82, + "learning_rate": 1.0186017325128749e-06, + "loss": 0.0271, + "step": 15860 + }, + { + "epoch": 4.82, + "learning_rate": 1.0152410373934827e-06, + "loss": 0.0, + "step": 15861 + }, + { + "epoch": 4.82, + "learning_rate": 1.0118858766210048e-06, + "loss": 0.0024, + "step": 15862 + }, + { + "epoch": 4.82, + "learning_rate": 1.0085362503200744e-06, + "loss": 0.0212, + "step": 15863 + }, + { + "epoch": 4.82, + "learning_rate": 1.0051921586150758e-06, + "loss": 0.0182, + "step": 15864 + }, + { + "epoch": 4.82, + "learning_rate": 1.0018536016302925e-06, + "loss": 0.0303, + "step": 15865 + }, + { + "epoch": 4.82, + "learning_rate": 9.985205794896923e-07, + "loss": 0.0308, + "step": 15866 + }, + { + "epoch": 4.82, + "learning_rate": 9.95193092317126e-07, + "loss": 0.012, + "step": 15867 + }, + { + "epoch": 4.82, + "learning_rate": 9.918711402361946e-07, + "loss": 0.0151, + "step": 15868 + }, + { + "epoch": 4.82, + "learning_rate": 9.885547233702662e-07, + "loss": 0.0513, + "step": 15869 + }, + { + "epoch": 4.82, + "learning_rate": 9.85243841842559e-07, + "loss": 0.031, + "step": 15870 + }, + { + "epoch": 4.82, + "learning_rate": 9.819384957760578e-07, + "loss": 0.0171, + "step": 15871 + }, + { + "epoch": 4.82, + "learning_rate": 9.786386852935645e-07, + "loss": 0.0352, + "step": 15872 + }, + { + "epoch": 4.82, + "learning_rate": 9.75344410517631e-07, + "loss": 0.0309, + "step": 15873 + }, + { + "epoch": 4.82, + "learning_rate": 9.720556715706597e-07, + "loss": 0.0336, + "step": 15874 + }, + { + "epoch": 4.82, + "learning_rate": 9.687724685747856e-07, + "loss": 0.018, + "step": 15875 + }, + { + "epoch": 4.82, + "learning_rate": 9.654948016519782e-07, + "loss": 0.042, + "step": 15876 + }, + { + "epoch": 4.82, + "learning_rate": 9.622226709240066e-07, + "loss": 0.0543, + "step": 15877 + }, + { + "epoch": 4.82, + "learning_rate": 9.58956076512407e-07, + "loss": 0.0235, + "step": 15878 + }, + { + "epoch": 4.82, + "learning_rate": 9.556950185385492e-07, + "loss": 0.0108, + "step": 15879 + }, + { + "epoch": 4.82, + "learning_rate": 9.524394971235194e-07, + "loss": 0.0149, + "step": 15880 + }, + { + "epoch": 4.82, + "learning_rate": 9.491895123883042e-07, + "loss": 0.0279, + "step": 15881 + }, + { + "epoch": 4.82, + "learning_rate": 9.459450644536071e-07, + "loss": 0.0322, + "step": 15882 + }, + { + "epoch": 4.82, + "learning_rate": 9.427061534399649e-07, + "loss": 0.0158, + "step": 15883 + }, + { + "epoch": 4.82, + "learning_rate": 9.394727794676815e-07, + "loss": 0.0229, + "step": 15884 + }, + { + "epoch": 4.82, + "learning_rate": 9.362449426568441e-07, + "loss": 0.0295, + "step": 15885 + }, + { + "epoch": 4.82, + "learning_rate": 9.330226431274069e-07, + "loss": 0.0398, + "step": 15886 + }, + { + "epoch": 4.82, + "learning_rate": 9.298058809990238e-07, + "loss": 0.0368, + "step": 15887 + }, + { + "epoch": 4.82, + "learning_rate": 9.265946563912163e-07, + "loss": 0.0448, + "step": 15888 + }, + { + "epoch": 4.82, + "learning_rate": 9.233889694232721e-07, + "loss": 0.0553, + "step": 15889 + }, + { + "epoch": 4.82, + "learning_rate": 9.20188820214246e-07, + "loss": 0.0292, + "step": 15890 + }, + { + "epoch": 4.82, + "learning_rate": 9.169942088830429e-07, + "loss": 0.022, + "step": 15891 + }, + { + "epoch": 4.83, + "learning_rate": 9.138051355483178e-07, + "loss": 0.0288, + "step": 15892 + }, + { + "epoch": 4.83, + "learning_rate": 9.106216003285427e-07, + "loss": 0.0324, + "step": 15893 + }, + { + "epoch": 4.83, + "learning_rate": 9.07443603341973e-07, + "loss": 0.0273, + "step": 15894 + }, + { + "epoch": 4.83, + "learning_rate": 9.042711447066809e-07, + "loss": 0.0083, + "step": 15895 + }, + { + "epoch": 4.83, + "learning_rate": 9.01104224540472e-07, + "loss": 0.0378, + "step": 15896 + }, + { + "epoch": 4.83, + "learning_rate": 8.979428429610191e-07, + "loss": 0.0172, + "step": 15897 + }, + { + "epoch": 4.83, + "learning_rate": 8.947870000857449e-07, + "loss": 0.0281, + "step": 15898 + }, + { + "epoch": 4.83, + "learning_rate": 8.916366960319055e-07, + "loss": 0.0135, + "step": 15899 + }, + { + "epoch": 4.83, + "learning_rate": 8.884919309164906e-07, + "loss": 0.0226, + "step": 15900 + }, + { + "epoch": 4.83, + "learning_rate": 8.853527048563402e-07, + "loss": 0.0113, + "step": 15901 + }, + { + "epoch": 4.83, + "learning_rate": 8.822190179680611e-07, + "loss": 0.0366, + "step": 15902 + }, + { + "epoch": 4.83, + "learning_rate": 8.7909087036806e-07, + "loss": 0.0485, + "step": 15903 + }, + { + "epoch": 4.83, + "learning_rate": 8.759682621725439e-07, + "loss": 0.0332, + "step": 15904 + }, + { + "epoch": 4.83, + "learning_rate": 8.728511934975036e-07, + "loss": 0.0012, + "step": 15905 + }, + { + "epoch": 4.83, + "learning_rate": 8.697396644587295e-07, + "loss": 0.0229, + "step": 15906 + }, + { + "epoch": 4.83, + "learning_rate": 8.666336751717961e-07, + "loss": 0.0265, + "step": 15907 + }, + { + "epoch": 4.83, + "learning_rate": 8.635332257520944e-07, + "loss": 0.0082, + "step": 15908 + }, + { + "epoch": 4.83, + "learning_rate": 8.604383163148154e-07, + "loss": 0.0224, + "step": 15909 + }, + { + "epoch": 4.83, + "learning_rate": 8.57348946974884e-07, + "loss": 0.0232, + "step": 15910 + }, + { + "epoch": 4.83, + "learning_rate": 8.54265117847075e-07, + "loss": 0.0408, + "step": 15911 + }, + { + "epoch": 4.83, + "learning_rate": 8.511868290459634e-07, + "loss": 0.0146, + "step": 15912 + }, + { + "epoch": 4.83, + "learning_rate": 8.481140806858911e-07, + "loss": 0.0126, + "step": 15913 + }, + { + "epoch": 4.83, + "learning_rate": 8.450468728809833e-07, + "loss": 0.0404, + "step": 15914 + }, + { + "epoch": 4.83, + "learning_rate": 8.419852057451992e-07, + "loss": 0.015, + "step": 15915 + }, + { + "epoch": 4.83, + "learning_rate": 8.389290793922643e-07, + "loss": 0.0276, + "step": 15916 + }, + { + "epoch": 4.83, + "learning_rate": 8.358784939357044e-07, + "loss": 0.0351, + "step": 15917 + }, + { + "epoch": 4.83, + "learning_rate": 8.32833449488829e-07, + "loss": 0.0253, + "step": 15918 + }, + { + "epoch": 4.83, + "learning_rate": 8.29793946164764e-07, + "loss": 0.029, + "step": 15919 + }, + { + "epoch": 4.83, + "learning_rate": 8.267599840764194e-07, + "loss": 0.0161, + "step": 15920 + }, + { + "epoch": 4.83, + "learning_rate": 8.237315633364883e-07, + "loss": 0.0482, + "step": 15921 + }, + { + "epoch": 4.83, + "learning_rate": 8.207086840574806e-07, + "loss": 0.0322, + "step": 15922 + }, + { + "epoch": 4.83, + "learning_rate": 8.176913463516732e-07, + "loss": 0.0126, + "step": 15923 + }, + { + "epoch": 4.83, + "learning_rate": 8.146795503311765e-07, + "loss": 0.0437, + "step": 15924 + }, + { + "epoch": 4.84, + "learning_rate": 8.116732961078343e-07, + "loss": 0.0291, + "step": 15925 + }, + { + "epoch": 4.84, + "learning_rate": 8.086725837933405e-07, + "loss": 0.0348, + "step": 15926 + }, + { + "epoch": 4.84, + "learning_rate": 8.056774134991562e-07, + "loss": 0.0286, + "step": 15927 + }, + { + "epoch": 4.84, + "learning_rate": 8.026877853365587e-07, + "loss": 0.0113, + "step": 15928 + }, + { + "epoch": 4.84, + "learning_rate": 7.997036994165928e-07, + "loss": 0.0235, + "step": 15929 + }, + { + "epoch": 4.84, + "learning_rate": 7.967251558501031e-07, + "loss": 0.0173, + "step": 15930 + }, + { + "epoch": 4.84, + "learning_rate": 7.937521547477343e-07, + "loss": 0.0222, + "step": 15931 + }, + { + "epoch": 4.84, + "learning_rate": 7.907846962199316e-07, + "loss": 0.0334, + "step": 15932 + }, + { + "epoch": 4.84, + "learning_rate": 7.878227803769399e-07, + "loss": 0.0467, + "step": 15933 + }, + { + "epoch": 4.84, + "learning_rate": 7.848664073287548e-07, + "loss": 0.0349, + "step": 15934 + }, + { + "epoch": 4.84, + "learning_rate": 7.819155771852215e-07, + "loss": 0.0204, + "step": 15935 + }, + { + "epoch": 4.84, + "learning_rate": 7.789702900559358e-07, + "loss": 0.025, + "step": 15936 + }, + { + "epoch": 4.84, + "learning_rate": 7.760305460503269e-07, + "loss": 0.038, + "step": 15937 + }, + { + "epoch": 4.84, + "learning_rate": 7.73096345277574e-07, + "loss": 0.0104, + "step": 15938 + }, + { + "epoch": 4.84, + "learning_rate": 7.7016768784669e-07, + "loss": 0.0156, + "step": 15939 + }, + { + "epoch": 4.84, + "learning_rate": 7.672445738664879e-07, + "loss": 0.0272, + "step": 15940 + }, + { + "epoch": 4.84, + "learning_rate": 7.643270034454973e-07, + "loss": 0.0158, + "step": 15941 + }, + { + "epoch": 4.84, + "learning_rate": 7.614149766921485e-07, + "loss": 0.0161, + "step": 15942 + }, + { + "epoch": 4.84, + "learning_rate": 7.58508493714588e-07, + "loss": 0.0347, + "step": 15943 + }, + { + "epoch": 4.84, + "learning_rate": 7.556075546207796e-07, + "loss": 0.0263, + "step": 15944 + }, + { + "epoch": 4.84, + "learning_rate": 7.527121595185204e-07, + "loss": 0.0237, + "step": 15945 + }, + { + "epoch": 4.84, + "learning_rate": 7.498223085153243e-07, + "loss": 0.0191, + "step": 15946 + }, + { + "epoch": 4.84, + "learning_rate": 7.469380017185555e-07, + "loss": 0.0001, + "step": 15947 + }, + { + "epoch": 4.84, + "learning_rate": 7.440592392353617e-07, + "loss": 0.0215, + "step": 15948 + }, + { + "epoch": 4.84, + "learning_rate": 7.411860211726739e-07, + "loss": 0.036, + "step": 15949 + }, + { + "epoch": 4.84, + "learning_rate": 7.383183476372401e-07, + "loss": 0.0189, + "step": 15950 + }, + { + "epoch": 4.84, + "learning_rate": 7.354562187355584e-07, + "loss": 0.0162, + "step": 15951 + }, + { + "epoch": 4.84, + "learning_rate": 7.325996345739771e-07, + "loss": 0.0156, + "step": 15952 + }, + { + "epoch": 4.84, + "learning_rate": 7.29748595258578e-07, + "loss": 0.0185, + "step": 15953 + }, + { + "epoch": 4.84, + "learning_rate": 7.269031008953097e-07, + "loss": 0.0347, + "step": 15954 + }, + { + "epoch": 4.84, + "learning_rate": 7.240631515898377e-07, + "loss": 0.0008, + "step": 15955 + }, + { + "epoch": 4.84, + "learning_rate": 7.212287474476774e-07, + "loss": 0.0281, + "step": 15956 + }, + { + "epoch": 4.84, + "learning_rate": 7.183998885740949e-07, + "loss": 0.0099, + "step": 15957 + }, + { + "epoch": 4.85, + "learning_rate": 7.155765750742227e-07, + "loss": 0.0313, + "step": 15958 + }, + { + "epoch": 4.85, + "learning_rate": 7.127588070528767e-07, + "loss": 0.0558, + "step": 15959 + }, + { + "epoch": 4.85, + "learning_rate": 7.099465846147734e-07, + "loss": 0.0271, + "step": 15960 + }, + { + "epoch": 4.85, + "learning_rate": 7.071399078643791e-07, + "loss": 0.0051, + "step": 15961 + }, + { + "epoch": 4.85, + "learning_rate": 7.043387769059106e-07, + "loss": 0.0124, + "step": 15962 + }, + { + "epoch": 4.85, + "learning_rate": 7.015431918434844e-07, + "loss": 0.0306, + "step": 15963 + }, + { + "epoch": 4.85, + "learning_rate": 6.987531527808843e-07, + "loss": 0.017, + "step": 15964 + }, + { + "epoch": 4.85, + "learning_rate": 6.959686598218106e-07, + "loss": 0.0345, + "step": 15965 + }, + { + "epoch": 4.85, + "learning_rate": 6.931897130696473e-07, + "loss": 0.0365, + "step": 15966 + }, + { + "epoch": 4.85, + "learning_rate": 6.904163126276451e-07, + "loss": 0.0096, + "step": 15967 + }, + { + "epoch": 4.85, + "learning_rate": 6.876484585988384e-07, + "loss": 0.0276, + "step": 15968 + }, + { + "epoch": 4.85, + "learning_rate": 6.848861510860448e-07, + "loss": 0.0082, + "step": 15969 + }, + { + "epoch": 4.85, + "learning_rate": 6.821293901918657e-07, + "loss": 0.0482, + "step": 15970 + }, + { + "epoch": 4.85, + "learning_rate": 6.793781760187022e-07, + "loss": 0.0222, + "step": 15971 + }, + { + "epoch": 4.85, + "learning_rate": 6.766325086687563e-07, + "loss": 0.0284, + "step": 15972 + }, + { + "epoch": 4.85, + "learning_rate": 6.738923882440295e-07, + "loss": 0.0457, + "step": 15973 + }, + { + "epoch": 4.85, + "learning_rate": 6.711578148463237e-07, + "loss": 0.0315, + "step": 15974 + }, + { + "epoch": 4.85, + "learning_rate": 6.684287885771745e-07, + "loss": 0.0296, + "step": 15975 + }, + { + "epoch": 4.85, + "learning_rate": 6.657053095380005e-07, + "loss": 0.0238, + "step": 15976 + }, + { + "epoch": 4.85, + "learning_rate": 6.629873778299543e-07, + "loss": 0.0094, + "step": 15977 + }, + { + "epoch": 4.85, + "learning_rate": 6.602749935539886e-07, + "loss": 0.0179, + "step": 15978 + }, + { + "epoch": 4.85, + "learning_rate": 6.575681568108893e-07, + "loss": 0.032, + "step": 15979 + }, + { + "epoch": 4.85, + "learning_rate": 6.548668677011759e-07, + "loss": 0.0267, + "step": 15980 + }, + { + "epoch": 4.85, + "learning_rate": 6.521711263252183e-07, + "loss": 0.0269, + "step": 15981 + }, + { + "epoch": 4.85, + "learning_rate": 6.494809327831196e-07, + "loss": 0.005, + "step": 15982 + }, + { + "epoch": 4.85, + "learning_rate": 6.467962871748666e-07, + "loss": 0.0299, + "step": 15983 + }, + { + "epoch": 4.85, + "learning_rate": 6.441171896001462e-07, + "loss": 0.0092, + "step": 15984 + }, + { + "epoch": 4.85, + "learning_rate": 6.414436401584788e-07, + "loss": 0.0277, + "step": 15985 + }, + { + "epoch": 4.85, + "learning_rate": 6.387756389492016e-07, + "loss": 0.0153, + "step": 15986 + }, + { + "epoch": 4.85, + "learning_rate": 6.361131860714185e-07, + "loss": 0.0429, + "step": 15987 + }, + { + "epoch": 4.85, + "learning_rate": 6.334562816240174e-07, + "loss": 0.0492, + "step": 15988 + }, + { + "epoch": 4.85, + "learning_rate": 6.308049257056857e-07, + "loss": 0.0559, + "step": 15989 + }, + { + "epoch": 4.86, + "learning_rate": 6.281591184149615e-07, + "loss": 0.0572, + "step": 15990 + }, + { + "epoch": 4.86, + "learning_rate": 6.25518859850066e-07, + "loss": 0.0227, + "step": 15991 + }, + { + "epoch": 4.86, + "learning_rate": 6.228841501091208e-07, + "loss": 0.0135, + "step": 15992 + }, + { + "epoch": 4.86, + "learning_rate": 6.202549892899977e-07, + "loss": 0.0118, + "step": 15993 + }, + { + "epoch": 4.86, + "learning_rate": 6.176313774903186e-07, + "loss": 0.0139, + "step": 15994 + }, + { + "epoch": 4.86, + "learning_rate": 6.150133148076053e-07, + "loss": 0.0305, + "step": 15995 + }, + { + "epoch": 4.86, + "learning_rate": 6.124008013390636e-07, + "loss": 0.0362, + "step": 15996 + }, + { + "epoch": 4.86, + "learning_rate": 6.097938371817657e-07, + "loss": 0.0293, + "step": 15997 + }, + { + "epoch": 4.86, + "learning_rate": 6.071924224325342e-07, + "loss": 0.0519, + "step": 15998 + }, + { + "epoch": 4.86, + "learning_rate": 6.045965571880085e-07, + "loss": 0.0269, + "step": 15999 + }, + { + "epoch": 4.86, + "learning_rate": 6.020062415446281e-07, + "loss": 0.0264, + "step": 16000 + }, + { + "epoch": 4.86, + "learning_rate": 5.99421475598616e-07, + "loss": 0.0086, + "step": 16001 + }, + { + "epoch": 4.86, + "learning_rate": 5.968422594459788e-07, + "loss": 0.0337, + "step": 16002 + }, + { + "epoch": 4.86, + "learning_rate": 5.942685931825231e-07, + "loss": 0.035, + "step": 16003 + }, + { + "epoch": 4.86, + "learning_rate": 5.917004769038725e-07, + "loss": 0.0386, + "step": 16004 + }, + { + "epoch": 4.86, + "learning_rate": 5.891379107054007e-07, + "loss": 0.0071, + "step": 16005 + }, + { + "epoch": 4.86, + "learning_rate": 5.865808946823314e-07, + "loss": 0.0323, + "step": 16006 + }, + { + "epoch": 4.86, + "learning_rate": 5.840294289296221e-07, + "loss": 0.0124, + "step": 16007 + }, + { + "epoch": 4.86, + "learning_rate": 5.814835135420471e-07, + "loss": 0.0408, + "step": 16008 + }, + { + "epoch": 4.86, + "learning_rate": 5.789431486141971e-07, + "loss": 0.0316, + "step": 16009 + }, + { + "epoch": 4.86, + "learning_rate": 5.764083342404469e-07, + "loss": 0.0071, + "step": 16010 + }, + { + "epoch": 4.86, + "learning_rate": 5.738790705149543e-07, + "loss": 0.0407, + "step": 16011 + }, + { + "epoch": 4.86, + "learning_rate": 5.713553575316442e-07, + "loss": 0.0235, + "step": 16012 + }, + { + "epoch": 4.86, + "learning_rate": 5.688371953842919e-07, + "loss": 0.0379, + "step": 16013 + }, + { + "epoch": 4.86, + "learning_rate": 5.663245841664388e-07, + "loss": 0.0412, + "step": 16014 + }, + { + "epoch": 4.86, + "learning_rate": 5.638175239714105e-07, + "loss": 0.0305, + "step": 16015 + }, + { + "epoch": 4.86, + "learning_rate": 5.61316014892349e-07, + "loss": 0.0542, + "step": 16016 + }, + { + "epoch": 4.86, + "learning_rate": 5.588200570221634e-07, + "loss": 0.0249, + "step": 16017 + }, + { + "epoch": 4.86, + "learning_rate": 5.563296504535964e-07, + "loss": 0.0173, + "step": 16018 + }, + { + "epoch": 4.86, + "learning_rate": 5.538447952791236e-07, + "loss": 0.0256, + "step": 16019 + }, + { + "epoch": 4.86, + "learning_rate": 5.513654915910881e-07, + "loss": 0.0306, + "step": 16020 + }, + { + "epoch": 4.86, + "learning_rate": 5.488917394815661e-07, + "loss": 0.0327, + "step": 16021 + }, + { + "epoch": 4.86, + "learning_rate": 5.46423539042451e-07, + "loss": 0.0165, + "step": 16022 + }, + { + "epoch": 4.87, + "learning_rate": 5.439608903654357e-07, + "loss": 0.0315, + "step": 16023 + }, + { + "epoch": 4.87, + "learning_rate": 5.415037935419975e-07, + "loss": 0.0146, + "step": 16024 + }, + { + "epoch": 4.87, + "learning_rate": 5.390522486633964e-07, + "loss": 0.0315, + "step": 16025 + }, + { + "epoch": 4.87, + "learning_rate": 5.366062558207262e-07, + "loss": 0.0111, + "step": 16026 + }, + { + "epoch": 4.87, + "learning_rate": 5.341658151048311e-07, + "loss": 0.024, + "step": 16027 + }, + { + "epoch": 4.87, + "learning_rate": 5.317309266063718e-07, + "loss": 0.0119, + "step": 16028 + }, + { + "epoch": 4.87, + "learning_rate": 5.293015904157927e-07, + "loss": 0.0126, + "step": 16029 + }, + { + "epoch": 4.87, + "learning_rate": 5.268778066233382e-07, + "loss": 0.0012, + "step": 16030 + }, + { + "epoch": 4.87, + "learning_rate": 5.244595753190695e-07, + "loss": 0.015, + "step": 16031 + }, + { + "epoch": 4.87, + "learning_rate": 5.220468965927815e-07, + "loss": 0.0362, + "step": 16032 + }, + { + "epoch": 4.87, + "learning_rate": 5.196397705341027e-07, + "loss": 0.0122, + "step": 16033 + }, + { + "epoch": 4.87, + "learning_rate": 5.172381972324613e-07, + "loss": 0.0213, + "step": 16034 + }, + { + "epoch": 4.87, + "learning_rate": 5.148421767770694e-07, + "loss": 0.0306, + "step": 16035 + }, + { + "epoch": 4.87, + "learning_rate": 5.124517092569225e-07, + "loss": 0.0207, + "step": 16036 + }, + { + "epoch": 4.87, + "learning_rate": 5.100667947608328e-07, + "loss": 0.0144, + "step": 16037 + }, + { + "epoch": 4.87, + "learning_rate": 5.076874333773961e-07, + "loss": 0.0354, + "step": 16038 + }, + { + "epoch": 4.87, + "learning_rate": 5.053136251949752e-07, + "loss": 0.0315, + "step": 16039 + }, + { + "epoch": 4.87, + "learning_rate": 5.029453703017661e-07, + "loss": 0.0286, + "step": 16040 + }, + { + "epoch": 4.87, + "learning_rate": 5.005826687857317e-07, + "loss": 0.0443, + "step": 16041 + }, + { + "epoch": 4.87, + "learning_rate": 4.98225520734652e-07, + "loss": 0.0245, + "step": 16042 + }, + { + "epoch": 4.87, + "learning_rate": 4.958739262360901e-07, + "loss": 0.013, + "step": 16043 + }, + { + "epoch": 4.87, + "learning_rate": 4.935278853773927e-07, + "loss": 0.0233, + "step": 16044 + }, + { + "epoch": 4.87, + "learning_rate": 4.911873982457071e-07, + "loss": 0.0287, + "step": 16045 + }, + { + "epoch": 4.87, + "learning_rate": 4.888524649279968e-07, + "loss": 0.0319, + "step": 16046 + }, + { + "epoch": 4.87, + "learning_rate": 4.865230855109592e-07, + "loss": 0.0297, + "step": 16047 + }, + { + "epoch": 4.87, + "learning_rate": 4.841992600811418e-07, + "loss": 0.0308, + "step": 16048 + }, + { + "epoch": 4.87, + "learning_rate": 4.818809887248754e-07, + "loss": 0.0269, + "step": 16049 + }, + { + "epoch": 4.87, + "learning_rate": 4.795682715282911e-07, + "loss": 0.0062, + "step": 16050 + }, + { + "epoch": 4.87, + "learning_rate": 4.772611085772537e-07, + "loss": 0.0276, + "step": 16051 + }, + { + "epoch": 4.87, + "learning_rate": 4.7495949995751124e-07, + "loss": 0.013, + "step": 16052 + }, + { + "epoch": 4.87, + "learning_rate": 4.7266344575452864e-07, + "loss": 0.014, + "step": 16053 + }, + { + "epoch": 4.87, + "learning_rate": 4.703729460536376e-07, + "loss": 0.0281, + "step": 16054 + }, + { + "epoch": 4.87, + "learning_rate": 4.680880009398702e-07, + "loss": 0.0449, + "step": 16055 + }, + { + "epoch": 4.88, + "learning_rate": 4.6580861049815845e-07, + "loss": 0.0303, + "step": 16056 + }, + { + "epoch": 4.88, + "learning_rate": 4.635347748131513e-07, + "loss": 0.0143, + "step": 16057 + }, + { + "epoch": 4.88, + "learning_rate": 4.6126649396929785e-07, + "loss": 0.01, + "step": 16058 + }, + { + "epoch": 4.88, + "learning_rate": 4.5900376805088066e-07, + "loss": 0.0305, + "step": 16059 + }, + { + "epoch": 4.88, + "learning_rate": 4.567465971419493e-07, + "loss": 0.042, + "step": 16060 + }, + { + "epoch": 4.88, + "learning_rate": 4.5449498132636985e-07, + "loss": 0.0278, + "step": 16061 + }, + { + "epoch": 4.88, + "learning_rate": 4.522489206877422e-07, + "loss": 0.0114, + "step": 16062 + }, + { + "epoch": 4.88, + "learning_rate": 4.500084153095329e-07, + "loss": 0.0677, + "step": 16063 + }, + { + "epoch": 4.88, + "learning_rate": 4.477734652749421e-07, + "loss": 0.0495, + "step": 16064 + }, + { + "epoch": 4.88, + "learning_rate": 4.455440706670199e-07, + "loss": 0.0349, + "step": 16065 + }, + { + "epoch": 4.88, + "learning_rate": 4.4332023156856686e-07, + "loss": 0.0087, + "step": 16066 + }, + { + "epoch": 4.88, + "learning_rate": 4.4110194806220025e-07, + "loss": 0.0368, + "step": 16067 + }, + { + "epoch": 4.88, + "learning_rate": 4.388892202303207e-07, + "loss": 0.0377, + "step": 16068 + }, + { + "epoch": 4.88, + "learning_rate": 4.366820481551125e-07, + "loss": 0.0236, + "step": 16069 + }, + { + "epoch": 4.88, + "learning_rate": 4.344804319185935e-07, + "loss": 0.0166, + "step": 16070 + }, + { + "epoch": 4.88, + "learning_rate": 4.3228437160251484e-07, + "loss": 0.0221, + "step": 16071 + }, + { + "epoch": 4.88, + "learning_rate": 4.300938672884613e-07, + "loss": 0.0304, + "step": 16072 + }, + { + "epoch": 4.88, + "learning_rate": 4.2790891905781775e-07, + "loss": 0.0055, + "step": 16073 + }, + { + "epoch": 4.88, + "learning_rate": 4.2572952699175267e-07, + "loss": 0.0446, + "step": 16074 + }, + { + "epoch": 4.88, + "learning_rate": 4.2355569117120124e-07, + "loss": 0.0297, + "step": 16075 + }, + { + "epoch": 4.88, + "learning_rate": 4.2138741167693224e-07, + "loss": 0.0219, + "step": 16076 + }, + { + "epoch": 4.88, + "learning_rate": 4.1922468858948123e-07, + "loss": 0.0346, + "step": 16077 + }, + { + "epoch": 4.88, + "learning_rate": 4.170675219891839e-07, + "loss": 0.0175, + "step": 16078 + }, + { + "epoch": 4.88, + "learning_rate": 4.149159119561929e-07, + "loss": 0.0387, + "step": 16079 + }, + { + "epoch": 4.88, + "learning_rate": 4.127698585704109e-07, + "loss": 0.0297, + "step": 16080 + }, + { + "epoch": 4.88, + "learning_rate": 4.1062936191157413e-07, + "loss": 0.0214, + "step": 16081 + }, + { + "epoch": 4.88, + "learning_rate": 4.0849442205920234e-07, + "loss": 0.0071, + "step": 16082 + }, + { + "epoch": 4.88, + "learning_rate": 4.0636503909256546e-07, + "loss": 0.0172, + "step": 16083 + }, + { + "epoch": 4.88, + "learning_rate": 4.0424121309080016e-07, + "loss": 0.0313, + "step": 16084 + }, + { + "epoch": 4.88, + "learning_rate": 4.021229441327767e-07, + "loss": 0.0437, + "step": 16085 + }, + { + "epoch": 4.88, + "learning_rate": 4.0001023229719876e-07, + "loss": 0.0284, + "step": 16086 + }, + { + "epoch": 4.88, + "learning_rate": 3.979030776625536e-07, + "loss": 0.027, + "step": 16087 + }, + { + "epoch": 4.88, + "learning_rate": 3.9580148030709524e-07, + "loss": 0.0187, + "step": 16088 + }, + { + "epoch": 4.89, + "learning_rate": 3.9370544030891125e-07, + "loss": 0.0325, + "step": 16089 + }, + { + "epoch": 4.89, + "learning_rate": 3.916149577458394e-07, + "loss": 0.0112, + "step": 16090 + }, + { + "epoch": 4.89, + "learning_rate": 3.895300326955508e-07, + "loss": 0.0128, + "step": 16091 + }, + { + "epoch": 4.89, + "learning_rate": 3.874506652355003e-07, + "loss": 0.0343, + "step": 16092 + }, + { + "epoch": 4.89, + "learning_rate": 3.853768554429093e-07, + "loss": 0.0186, + "step": 16093 + }, + { + "epoch": 4.89, + "learning_rate": 3.8330860339483294e-07, + "loss": 0.0138, + "step": 16094 + }, + { + "epoch": 4.89, + "learning_rate": 3.812459091681097e-07, + "loss": 0.0114, + "step": 16095 + }, + { + "epoch": 4.89, + "learning_rate": 3.791887728393284e-07, + "loss": 0.0491, + "step": 16096 + }, + { + "epoch": 4.89, + "learning_rate": 3.771371944849277e-07, + "loss": 0.0593, + "step": 16097 + }, + { + "epoch": 4.89, + "learning_rate": 3.750911741811302e-07, + "loss": 0.0066, + "step": 16098 + }, + { + "epoch": 4.89, + "learning_rate": 3.730507120039083e-07, + "loss": 0.0407, + "step": 16099 + }, + { + "epoch": 4.89, + "learning_rate": 3.710158080290848e-07, + "loss": 0.043, + "step": 16100 + }, + { + "epoch": 4.89, + "learning_rate": 3.6898646233223253e-07, + "loss": 0.0258, + "step": 16101 + }, + { + "epoch": 4.89, + "learning_rate": 3.669626749887411e-07, + "loss": 0.0284, + "step": 16102 + }, + { + "epoch": 4.89, + "learning_rate": 3.6494444607380046e-07, + "loss": 0.031, + "step": 16103 + }, + { + "epoch": 4.89, + "learning_rate": 3.629317756623673e-07, + "loss": 0.0419, + "step": 16104 + }, + { + "epoch": 4.89, + "learning_rate": 3.6092466382921514e-07, + "loss": 0.0257, + "step": 16105 + }, + { + "epoch": 4.89, + "learning_rate": 3.5892311064888435e-07, + "loss": 0.0364, + "step": 16106 + }, + { + "epoch": 4.89, + "learning_rate": 3.5692711619576544e-07, + "loss": 0.0321, + "step": 16107 + }, + { + "epoch": 4.89, + "learning_rate": 3.549366805439657e-07, + "loss": 0.0247, + "step": 16108 + }, + { + "epoch": 4.89, + "learning_rate": 3.529518037674428e-07, + "loss": 0.0207, + "step": 16109 + }, + { + "epoch": 4.89, + "learning_rate": 3.509724859399043e-07, + "loss": 0.0259, + "step": 16110 + }, + { + "epoch": 4.89, + "learning_rate": 3.4899872713492483e-07, + "loss": 0.0267, + "step": 16111 + }, + { + "epoch": 4.89, + "learning_rate": 3.4703052742577896e-07, + "loss": 0.0181, + "step": 16112 + }, + { + "epoch": 4.89, + "learning_rate": 3.450678868855916e-07, + "loss": 0.0189, + "step": 16113 + }, + { + "epoch": 4.89, + "learning_rate": 3.4311080558728777e-07, + "loss": 0.0534, + "step": 16114 + }, + { + "epoch": 4.89, + "learning_rate": 3.411592836035426e-07, + "loss": 0.0457, + "step": 16115 + }, + { + "epoch": 4.89, + "learning_rate": 3.3921332100686484e-07, + "loss": 0.0068, + "step": 16116 + }, + { + "epoch": 4.89, + "learning_rate": 3.372729178695299e-07, + "loss": 0.0497, + "step": 16117 + }, + { + "epoch": 4.89, + "learning_rate": 3.3533807426363026e-07, + "loss": 0.023, + "step": 16118 + }, + { + "epoch": 4.89, + "learning_rate": 3.33408790261025e-07, + "loss": 0.0225, + "step": 16119 + }, + { + "epoch": 4.89, + "learning_rate": 3.314850659333901e-07, + "loss": 0.0172, + "step": 16120 + }, + { + "epoch": 4.89, + "learning_rate": 3.295669013521851e-07, + "loss": 0.0292, + "step": 16121 + }, + { + "epoch": 4.9, + "learning_rate": 3.276542965886697e-07, + "loss": 0.0304, + "step": 16122 + }, + { + "epoch": 4.9, + "learning_rate": 3.2574725171388704e-07, + "loss": 0.0135, + "step": 16123 + }, + { + "epoch": 4.9, + "learning_rate": 3.2384576679868047e-07, + "loss": 0.0291, + "step": 16124 + }, + { + "epoch": 4.9, + "learning_rate": 3.219498419136768e-07, + "loss": 0.0443, + "step": 16125 + }, + { + "epoch": 4.9, + "learning_rate": 3.20059477129303e-07, + "loss": 0.0292, + "step": 16126 + }, + { + "epoch": 4.9, + "learning_rate": 3.181746725157863e-07, + "loss": 0.0402, + "step": 16127 + }, + { + "epoch": 4.9, + "learning_rate": 3.16295428143154e-07, + "loss": 0.0355, + "step": 16128 + }, + { + "epoch": 4.9, + "learning_rate": 3.1442174408120024e-07, + "loss": 0.0127, + "step": 16129 + }, + { + "epoch": 4.9, + "learning_rate": 3.12553620399536e-07, + "loss": 0.027, + "step": 16130 + }, + { + "epoch": 4.9, + "learning_rate": 3.1069105716753915e-07, + "loss": 0.0343, + "step": 16131 + }, + { + "epoch": 4.9, + "learning_rate": 3.0883405445440436e-07, + "loss": 0.0218, + "step": 16132 + }, + { + "epoch": 4.9, + "learning_rate": 3.0698261232912636e-07, + "loss": 0.0407, + "step": 16133 + }, + { + "epoch": 4.9, + "learning_rate": 3.051367308604835e-07, + "loss": 0.0053, + "step": 16134 + }, + { + "epoch": 4.9, + "learning_rate": 3.03296410117021e-07, + "loss": 0.024, + "step": 16135 + }, + { + "epoch": 4.9, + "learning_rate": 3.0146165016711743e-07, + "loss": 0.0104, + "step": 16136 + }, + { + "epoch": 4.9, + "learning_rate": 2.996324510789183e-07, + "loss": 0.044, + "step": 16137 + }, + { + "epoch": 4.9, + "learning_rate": 2.978088129203859e-07, + "loss": 0.0176, + "step": 16138 + }, + { + "epoch": 4.9, + "learning_rate": 2.959907357592661e-07, + "loss": 0.0086, + "step": 16139 + }, + { + "epoch": 4.9, + "learning_rate": 2.941782196630882e-07, + "loss": 0.025, + "step": 16140 + }, + { + "epoch": 4.9, + "learning_rate": 2.923712646991649e-07, + "loss": 0.0327, + "step": 16141 + }, + { + "epoch": 4.9, + "learning_rate": 2.9056987093464267e-07, + "loss": 0.027, + "step": 16142 + }, + { + "epoch": 4.9, + "learning_rate": 2.887740384364345e-07, + "loss": 0.0297, + "step": 16143 + }, + { + "epoch": 4.9, + "learning_rate": 2.8698376727125383e-07, + "loss": 0.0423, + "step": 16144 + }, + { + "epoch": 4.9, + "learning_rate": 2.851990575055807e-07, + "loss": 0.0126, + "step": 16145 + }, + { + "epoch": 4.9, + "learning_rate": 2.8341990920572876e-07, + "loss": 0.0279, + "step": 16146 + }, + { + "epoch": 4.9, + "learning_rate": 2.8164632243779516e-07, + "loss": 0.0197, + "step": 16147 + }, + { + "epoch": 4.9, + "learning_rate": 2.798782972676605e-07, + "loss": 0.0316, + "step": 16148 + }, + { + "epoch": 4.9, + "learning_rate": 2.78115833760989e-07, + "loss": 0.0149, + "step": 16149 + }, + { + "epoch": 4.9, + "learning_rate": 2.7635893198326153e-07, + "loss": 0.0604, + "step": 16150 + }, + { + "epoch": 4.9, + "learning_rate": 2.7460759199972593e-07, + "loss": 0.0647, + "step": 16151 + }, + { + "epoch": 4.9, + "learning_rate": 2.728618138754635e-07, + "loss": 0.0153, + "step": 16152 + }, + { + "epoch": 4.9, + "learning_rate": 2.711215976753056e-07, + "loss": 0.0503, + "step": 16153 + }, + { + "epoch": 4.9, + "learning_rate": 2.6938694346391733e-07, + "loss": 0.0501, + "step": 16154 + }, + { + "epoch": 4.91, + "learning_rate": 2.676578513057137e-07, + "loss": 0.0146, + "step": 16155 + }, + { + "epoch": 4.91, + "learning_rate": 2.6593432126492676e-07, + "loss": 0.0112, + "step": 16156 + }, + { + "epoch": 4.91, + "learning_rate": 2.642163534056052e-07, + "loss": 0.0243, + "step": 16157 + }, + { + "epoch": 4.91, + "learning_rate": 2.6250394779154805e-07, + "loss": 0.0399, + "step": 16158 + }, + { + "epoch": 4.91, + "learning_rate": 2.607971044863544e-07, + "loss": 0.0234, + "step": 16159 + }, + { + "epoch": 4.91, + "learning_rate": 2.590958235534568e-07, + "loss": 0.0374, + "step": 16160 + }, + { + "epoch": 4.91, + "learning_rate": 2.574001050560215e-07, + "loss": 0.0316, + "step": 16161 + }, + { + "epoch": 4.91, + "learning_rate": 2.5570994905706464e-07, + "loss": 0.0349, + "step": 16162 + }, + { + "epoch": 4.91, + "learning_rate": 2.5402535561936943e-07, + "loss": 0.0191, + "step": 16163 + }, + { + "epoch": 4.91, + "learning_rate": 2.523463248054858e-07, + "loss": 0.0251, + "step": 16164 + }, + { + "epoch": 4.91, + "learning_rate": 2.506728566778138e-07, + "loss": 0.0426, + "step": 16165 + }, + { + "epoch": 4.91, + "learning_rate": 2.490049512985204e-07, + "loss": 0.0071, + "step": 16166 + }, + { + "epoch": 4.91, + "learning_rate": 2.473426087295394e-07, + "loss": 0.0233, + "step": 16167 + }, + { + "epoch": 4.91, + "learning_rate": 2.45685829032638e-07, + "loss": 0.0326, + "step": 16168 + }, + { + "epoch": 4.91, + "learning_rate": 2.4403461226936706e-07, + "loss": 0.0427, + "step": 16169 + }, + { + "epoch": 4.91, + "learning_rate": 2.423889585010441e-07, + "loss": 0.0333, + "step": 16170 + }, + { + "epoch": 4.91, + "learning_rate": 2.4074886778880367e-07, + "loss": 0.0083, + "step": 16171 + }, + { + "epoch": 4.91, + "learning_rate": 2.391143401935969e-07, + "loss": 0.0263, + "step": 16172 + }, + { + "epoch": 4.91, + "learning_rate": 2.374853757761086e-07, + "loss": 0.0088, + "step": 16173 + }, + { + "epoch": 4.91, + "learning_rate": 2.3586197459685706e-07, + "loss": 0.0162, + "step": 16174 + }, + { + "epoch": 4.91, + "learning_rate": 2.3424413671616073e-07, + "loss": 0.0198, + "step": 16175 + }, + { + "epoch": 4.91, + "learning_rate": 2.3263186219412144e-07, + "loss": 0.036, + "step": 16176 + }, + { + "epoch": 4.91, + "learning_rate": 2.3102515109060805e-07, + "loss": 0.0362, + "step": 16177 + }, + { + "epoch": 4.91, + "learning_rate": 2.2942400346532275e-07, + "loss": 0.0192, + "step": 16178 + }, + { + "epoch": 4.91, + "learning_rate": 2.2782841937771802e-07, + "loss": 0.0488, + "step": 16179 + }, + { + "epoch": 4.91, + "learning_rate": 2.2623839888711304e-07, + "loss": 0.0259, + "step": 16180 + }, + { + "epoch": 4.91, + "learning_rate": 2.246539420525273e-07, + "loss": 0.0333, + "step": 16181 + }, + { + "epoch": 4.91, + "learning_rate": 2.2307504893283034e-07, + "loss": 0.0165, + "step": 16182 + }, + { + "epoch": 4.91, + "learning_rate": 2.2150171958667528e-07, + "loss": 0.0131, + "step": 16183 + }, + { + "epoch": 4.91, + "learning_rate": 2.19933954072532e-07, + "loss": 0.0514, + "step": 16184 + }, + { + "epoch": 4.91, + "learning_rate": 2.1837175244860394e-07, + "loss": 0.0156, + "step": 16185 + }, + { + "epoch": 4.91, + "learning_rate": 2.16815114772928e-07, + "loss": 0.0152, + "step": 16186 + }, + { + "epoch": 4.91, + "learning_rate": 2.1526404110334128e-07, + "loss": 0.0283, + "step": 16187 + }, + { + "epoch": 4.92, + "learning_rate": 2.1371853149744766e-07, + "loss": 0.0283, + "step": 16188 + }, + { + "epoch": 4.92, + "learning_rate": 2.1217858601268455e-07, + "loss": 0.0385, + "step": 16189 + }, + { + "epoch": 4.92, + "learning_rate": 2.106442047062229e-07, + "loss": 0.0407, + "step": 16190 + }, + { + "epoch": 4.92, + "learning_rate": 2.0911538763508374e-07, + "loss": 0.0266, + "step": 16191 + }, + { + "epoch": 4.92, + "learning_rate": 2.0759213485603832e-07, + "loss": 0.0241, + "step": 16192 + }, + { + "epoch": 4.92, + "learning_rate": 2.0607444642569136e-07, + "loss": 0.0211, + "step": 16193 + }, + { + "epoch": 4.92, + "learning_rate": 2.0456232240041448e-07, + "loss": 0.0096, + "step": 16194 + }, + { + "epoch": 4.92, + "learning_rate": 2.030557628363627e-07, + "loss": 0.0164, + "step": 16195 + }, + { + "epoch": 4.92, + "learning_rate": 2.015547677895246e-07, + "loss": 0.0269, + "step": 16196 + }, + { + "epoch": 4.92, + "learning_rate": 2.0005933731563895e-07, + "loss": 0.0373, + "step": 16197 + }, + { + "epoch": 4.92, + "learning_rate": 1.9856947147026125e-07, + "loss": 0.0205, + "step": 16198 + }, + { + "epoch": 4.92, + "learning_rate": 1.9708517030874727e-07, + "loss": 0.0128, + "step": 16199 + }, + { + "epoch": 4.92, + "learning_rate": 1.956064338862362e-07, + "loss": 0.029, + "step": 16200 + }, + { + "epoch": 4.92, + "learning_rate": 1.9413326225763416e-07, + "loss": 0.01, + "step": 16201 + }, + { + "epoch": 4.92, + "learning_rate": 1.9266565547768064e-07, + "loss": 0.0127, + "step": 16202 + }, + { + "epoch": 4.92, + "learning_rate": 1.912036136008821e-07, + "loss": 0.033, + "step": 16203 + }, + { + "epoch": 4.92, + "learning_rate": 1.8974713668157838e-07, + "loss": 0.0415, + "step": 16204 + }, + { + "epoch": 4.92, + "learning_rate": 1.8829622477384287e-07, + "loss": 0.0355, + "step": 16205 + }, + { + "epoch": 4.92, + "learning_rate": 1.868508779315825e-07, + "loss": 0.0255, + "step": 16206 + }, + { + "epoch": 4.92, + "learning_rate": 1.854110962084876e-07, + "loss": 0.0216, + "step": 16207 + }, + { + "epoch": 4.92, + "learning_rate": 1.8397687965804874e-07, + "loss": 0.0341, + "step": 16208 + }, + { + "epoch": 4.92, + "learning_rate": 1.8254822833353998e-07, + "loss": 0.0177, + "step": 16209 + }, + { + "epoch": 4.92, + "learning_rate": 1.8112514228801888e-07, + "loss": 0.0365, + "step": 16210 + }, + { + "epoch": 4.92, + "learning_rate": 1.7970762157435982e-07, + "loss": 0.0152, + "step": 16211 + }, + { + "epoch": 4.92, + "learning_rate": 1.7829566624522062e-07, + "loss": 0.0439, + "step": 16212 + }, + { + "epoch": 4.92, + "learning_rate": 1.7688927635305938e-07, + "loss": 0.0542, + "step": 16213 + }, + { + "epoch": 4.92, + "learning_rate": 1.7548845195010096e-07, + "loss": 0.0094, + "step": 16214 + }, + { + "epoch": 4.92, + "learning_rate": 1.740931930883871e-07, + "loss": 0.0224, + "step": 16215 + }, + { + "epoch": 4.92, + "learning_rate": 1.7270349981974297e-07, + "loss": 0.0242, + "step": 16216 + }, + { + "epoch": 4.92, + "learning_rate": 1.713193721958106e-07, + "loss": 0.026, + "step": 16217 + }, + { + "epoch": 4.92, + "learning_rate": 1.699408102679989e-07, + "loss": 0.0436, + "step": 16218 + }, + { + "epoch": 4.92, + "learning_rate": 1.685678140875002e-07, + "loss": 0.0139, + "step": 16219 + }, + { + "epoch": 4.92, + "learning_rate": 1.6720038370532373e-07, + "loss": 0.0319, + "step": 16220 + }, + { + "epoch": 4.93, + "learning_rate": 1.658385191722955e-07, + "loss": 0.0075, + "step": 16221 + }, + { + "epoch": 4.93, + "learning_rate": 1.6448222053895844e-07, + "loss": 0.0098, + "step": 16222 + }, + { + "epoch": 4.93, + "learning_rate": 1.6313148785573883e-07, + "loss": 0.0195, + "step": 16223 + }, + { + "epoch": 4.93, + "learning_rate": 1.617863211727799e-07, + "loss": 0.0299, + "step": 16224 + }, + { + "epoch": 4.93, + "learning_rate": 1.6044672054005836e-07, + "loss": 0.044, + "step": 16225 + }, + { + "epoch": 4.93, + "learning_rate": 1.5911268600735106e-07, + "loss": 0.0353, + "step": 16226 + }, + { + "epoch": 4.93, + "learning_rate": 1.5778421762420169e-07, + "loss": 0.0208, + "step": 16227 + }, + { + "epoch": 4.93, + "learning_rate": 1.564613154399541e-07, + "loss": 0.0059, + "step": 16228 + }, + { + "epoch": 4.93, + "learning_rate": 1.5514397950375234e-07, + "loss": 0.0152, + "step": 16229 + }, + { + "epoch": 4.93, + "learning_rate": 1.5383220986454058e-07, + "loss": 0.0357, + "step": 16230 + }, + { + "epoch": 4.93, + "learning_rate": 1.5252600657104653e-07, + "loss": 0.0142, + "step": 16231 + }, + { + "epoch": 4.93, + "learning_rate": 1.5122536967178135e-07, + "loss": 0.0476, + "step": 16232 + }, + { + "epoch": 4.93, + "learning_rate": 1.499302992150564e-07, + "loss": 0.0096, + "step": 16233 + }, + { + "epoch": 4.93, + "learning_rate": 1.486407952490165e-07, + "loss": 0.0272, + "step": 16234 + }, + { + "epoch": 4.93, + "learning_rate": 1.4735685782150673e-07, + "loss": 0.0227, + "step": 16235 + }, + { + "epoch": 4.93, + "learning_rate": 1.4607848698027224e-07, + "loss": 0.0422, + "step": 16236 + }, + { + "epoch": 4.93, + "learning_rate": 1.448056827727584e-07, + "loss": 0.0271, + "step": 16237 + }, + { + "epoch": 4.93, + "learning_rate": 1.4353844524627734e-07, + "loss": 0.0221, + "step": 16238 + }, + { + "epoch": 4.93, + "learning_rate": 1.4227677444789143e-07, + "loss": 0.0399, + "step": 16239 + }, + { + "epoch": 4.93, + "learning_rate": 1.4102067042447984e-07, + "loss": 0.039, + "step": 16240 + }, + { + "epoch": 4.93, + "learning_rate": 1.397701332226886e-07, + "loss": 0.0196, + "step": 16241 + }, + { + "epoch": 4.93, + "learning_rate": 1.3852516288896387e-07, + "loss": 0.0148, + "step": 16242 + }, + { + "epoch": 4.93, + "learning_rate": 1.3728575946956865e-07, + "loss": 0.0359, + "step": 16243 + }, + { + "epoch": 4.93, + "learning_rate": 1.3605192301054947e-07, + "loss": 0.0449, + "step": 16244 + }, + { + "epoch": 4.93, + "learning_rate": 1.3482365355770296e-07, + "loss": 0.0194, + "step": 16245 + }, + { + "epoch": 4.93, + "learning_rate": 1.3360095115670931e-07, + "loss": 0.0257, + "step": 16246 + }, + { + "epoch": 4.93, + "learning_rate": 1.3238381585294888e-07, + "loss": 0.0417, + "step": 16247 + }, + { + "epoch": 4.93, + "learning_rate": 1.3117224769165214e-07, + "loss": 0.024, + "step": 16248 + }, + { + "epoch": 4.93, + "learning_rate": 1.299662467178164e-07, + "loss": 0.0332, + "step": 16249 + }, + { + "epoch": 4.93, + "learning_rate": 1.2876581297623923e-07, + "loss": 0.0443, + "step": 16250 + }, + { + "epoch": 4.93, + "learning_rate": 1.2757094651151823e-07, + "loss": 0.0194, + "step": 16251 + }, + { + "epoch": 4.93, + "learning_rate": 1.2638164736805124e-07, + "loss": 0.0321, + "step": 16252 + }, + { + "epoch": 4.93, + "learning_rate": 1.2519791558998626e-07, + "loss": 0.0118, + "step": 16253 + }, + { + "epoch": 4.94, + "learning_rate": 1.2401975122133812e-07, + "loss": 0.0113, + "step": 16254 + }, + { + "epoch": 4.94, + "learning_rate": 1.2284715430583847e-07, + "loss": 0.0373, + "step": 16255 + }, + { + "epoch": 4.94, + "learning_rate": 1.216801248870525e-07, + "loss": 0.0067, + "step": 16256 + }, + { + "epoch": 4.94, + "learning_rate": 1.2051866300832879e-07, + "loss": 0.0469, + "step": 16257 + }, + { + "epoch": 4.94, + "learning_rate": 1.193627687128329e-07, + "loss": 0.0142, + "step": 16258 + }, + { + "epoch": 4.94, + "learning_rate": 1.1821244204348047e-07, + "loss": 0.0177, + "step": 16259 + }, + { + "epoch": 4.94, + "learning_rate": 1.170676830430206e-07, + "loss": 0.0104, + "step": 16260 + }, + { + "epoch": 4.94, + "learning_rate": 1.159284917539527e-07, + "loss": 0.0272, + "step": 16261 + }, + { + "epoch": 4.94, + "learning_rate": 1.1479486821862615e-07, + "loss": 0.0326, + "step": 16262 + }, + { + "epoch": 4.94, + "learning_rate": 1.1366681247912402e-07, + "loss": 0.0656, + "step": 16263 + }, + { + "epoch": 4.94, + "learning_rate": 1.1254432457736273e-07, + "loss": 0.0201, + "step": 16264 + }, + { + "epoch": 4.94, + "learning_rate": 1.1142740455504229e-07, + "loss": 0.0325, + "step": 16265 + }, + { + "epoch": 4.94, + "learning_rate": 1.1031605245364616e-07, + "loss": 0.0157, + "step": 16266 + }, + { + "epoch": 4.94, + "learning_rate": 1.09210268314458e-07, + "loss": 0.0117, + "step": 16267 + }, + { + "epoch": 4.94, + "learning_rate": 1.0811005217856161e-07, + "loss": 0.0085, + "step": 16268 + }, + { + "epoch": 4.94, + "learning_rate": 1.070154040868243e-07, + "loss": 0.0399, + "step": 16269 + }, + { + "epoch": 4.94, + "learning_rate": 1.0592632407991352e-07, + "loss": 0.0412, + "step": 16270 + }, + { + "epoch": 4.94, + "learning_rate": 1.0484281219826363e-07, + "loss": 0.0399, + "step": 16271 + }, + { + "epoch": 4.94, + "learning_rate": 1.0376486848215904e-07, + "loss": 0.0437, + "step": 16272 + }, + { + "epoch": 4.94, + "learning_rate": 1.026924929716011e-07, + "loss": 0.0372, + "step": 16273 + }, + { + "epoch": 4.94, + "learning_rate": 1.016256857064579e-07, + "loss": 0.0366, + "step": 16274 + }, + { + "epoch": 4.94, + "learning_rate": 1.0056444672634778e-07, + "loss": 0.0371, + "step": 16275 + }, + { + "epoch": 4.94, + "learning_rate": 9.950877607068919e-08, + "loss": 0.0573, + "step": 16276 + }, + { + "epoch": 4.94, + "learning_rate": 9.845867377870076e-08, + "loss": 0.0304, + "step": 16277 + }, + { + "epoch": 4.94, + "learning_rate": 9.741413988938462e-08, + "loss": 0.0123, + "step": 16278 + }, + { + "epoch": 4.94, + "learning_rate": 9.637517444155973e-08, + "loss": 0.0244, + "step": 16279 + }, + { + "epoch": 4.94, + "learning_rate": 9.534177747379523e-08, + "loss": 0.0462, + "step": 16280 + }, + { + "epoch": 4.94, + "learning_rate": 9.431394902451039e-08, + "loss": 0.0361, + "step": 16281 + }, + { + "epoch": 4.94, + "learning_rate": 9.329168913184138e-08, + "loss": 0.0342, + "step": 16282 + }, + { + "epoch": 4.94, + "learning_rate": 9.227499783380776e-08, + "loss": 0.0156, + "step": 16283 + }, + { + "epoch": 4.94, + "learning_rate": 9.126387516814604e-08, + "loss": 0.027, + "step": 16284 + }, + { + "epoch": 4.94, + "learning_rate": 9.02583211724428e-08, + "loss": 0.0382, + "step": 16285 + }, + { + "epoch": 4.94, + "learning_rate": 8.92583358840182e-08, + "loss": 0.0285, + "step": 16286 + }, + { + "epoch": 4.95, + "learning_rate": 8.826391934004251e-08, + "loss": 0.0261, + "step": 16287 + }, + { + "epoch": 4.95, + "learning_rate": 8.727507157745284e-08, + "loss": 0.0117, + "step": 16288 + }, + { + "epoch": 4.95, + "learning_rate": 8.629179263298647e-08, + "loss": 0.0323, + "step": 16289 + }, + { + "epoch": 4.95, + "learning_rate": 8.531408254314753e-08, + "loss": 0.0283, + "step": 16290 + }, + { + "epoch": 4.95, + "learning_rate": 8.434194134429029e-08, + "loss": 0.045, + "step": 16291 + }, + { + "epoch": 4.95, + "learning_rate": 8.337536907248588e-08, + "loss": 0.0264, + "step": 16292 + }, + { + "epoch": 4.95, + "learning_rate": 8.241436576365556e-08, + "loss": 0.0449, + "step": 16293 + }, + { + "epoch": 4.95, + "learning_rate": 8.145893145352078e-08, + "loss": 0.0116, + "step": 16294 + }, + { + "epoch": 4.95, + "learning_rate": 8.05090661775365e-08, + "loss": 0.0151, + "step": 16295 + }, + { + "epoch": 4.95, + "learning_rate": 7.956476997102446e-08, + "loss": 0.0275, + "step": 16296 + }, + { + "epoch": 4.95, + "learning_rate": 7.86260428690233e-08, + "loss": 0.0229, + "step": 16297 + }, + { + "epoch": 4.95, + "learning_rate": 7.769288490643843e-08, + "loss": 0.0395, + "step": 16298 + }, + { + "epoch": 4.95, + "learning_rate": 7.676529611790883e-08, + "loss": 0.0243, + "step": 16299 + }, + { + "epoch": 4.95, + "learning_rate": 7.584327653790689e-08, + "loss": 0.0219, + "step": 16300 + }, + { + "epoch": 4.95, + "learning_rate": 7.492682620067192e-08, + "loss": 0.014, + "step": 16301 + }, + { + "epoch": 4.95, + "learning_rate": 7.401594514025999e-08, + "loss": 0.0364, + "step": 16302 + }, + { + "epoch": 4.95, + "learning_rate": 7.311063339051071e-08, + "loss": 0.0136, + "step": 16303 + }, + { + "epoch": 4.95, + "learning_rate": 7.221089098503052e-08, + "loss": 0.0198, + "step": 16304 + }, + { + "epoch": 4.95, + "learning_rate": 7.131671795725935e-08, + "loss": 0.0157, + "step": 16305 + }, + { + "epoch": 4.95, + "learning_rate": 7.042811434042061e-08, + "loss": 0.0308, + "step": 16306 + }, + { + "epoch": 4.95, + "learning_rate": 6.95450801675046e-08, + "loss": 0.0232, + "step": 16307 + }, + { + "epoch": 4.95, + "learning_rate": 6.866761547131838e-08, + "loss": 0.0115, + "step": 16308 + }, + { + "epoch": 4.95, + "learning_rate": 6.779572028445257e-08, + "loss": 0.0334, + "step": 16309 + }, + { + "epoch": 4.95, + "learning_rate": 6.692939463931457e-08, + "loss": 0.0529, + "step": 16310 + }, + { + "epoch": 4.95, + "learning_rate": 6.606863856807865e-08, + "loss": 0.0051, + "step": 16311 + }, + { + "epoch": 4.95, + "learning_rate": 6.521345210270257e-08, + "loss": 0.0468, + "step": 16312 + }, + { + "epoch": 4.95, + "learning_rate": 6.436383527497758e-08, + "loss": 0.0279, + "step": 16313 + }, + { + "epoch": 4.95, + "learning_rate": 6.351978811644509e-08, + "loss": 0.0347, + "step": 16314 + }, + { + "epoch": 4.95, + "learning_rate": 6.268131065848003e-08, + "loss": 0.0661, + "step": 16315 + }, + { + "epoch": 4.95, + "learning_rate": 6.18484029322075e-08, + "loss": 0.03, + "step": 16316 + }, + { + "epoch": 4.95, + "learning_rate": 6.10210649685694e-08, + "loss": 0.0149, + "step": 16317 + }, + { + "epoch": 4.95, + "learning_rate": 6.019929679832447e-08, + "loss": 0.0199, + "step": 16318 + }, + { + "epoch": 4.95, + "learning_rate": 5.938309845196498e-08, + "loss": 0.0107, + "step": 16319 + }, + { + "epoch": 4.96, + "learning_rate": 5.8572469959833335e-08, + "loss": 0.0307, + "step": 16320 + }, + { + "epoch": 4.96, + "learning_rate": 5.776741135203877e-08, + "loss": 0.0171, + "step": 16321 + }, + { + "epoch": 4.96, + "learning_rate": 5.6967922658474055e-08, + "loss": 0.0242, + "step": 16322 + }, + { + "epoch": 4.96, + "learning_rate": 5.617400390884874e-08, + "loss": 0.037, + "step": 16323 + }, + { + "epoch": 4.96, + "learning_rate": 5.538565513265592e-08, + "loss": 0.0266, + "step": 16324 + }, + { + "epoch": 4.96, + "learning_rate": 5.4602876359172155e-08, + "loss": 0.0206, + "step": 16325 + }, + { + "epoch": 4.96, + "learning_rate": 5.3825667617490855e-08, + "loss": 0.017, + "step": 16326 + }, + { + "epoch": 4.96, + "learning_rate": 5.305402893645561e-08, + "loss": 0.0156, + "step": 16327 + }, + { + "epoch": 4.96, + "learning_rate": 5.228796034476013e-08, + "loss": 0.0091, + "step": 16328 + }, + { + "epoch": 4.96, + "learning_rate": 5.152746187084833e-08, + "loss": 0.0203, + "step": 16329 + }, + { + "epoch": 4.96, + "learning_rate": 5.0772533542964286e-08, + "loss": 0.05, + "step": 16330 + }, + { + "epoch": 4.96, + "learning_rate": 5.002317538916889e-08, + "loss": 0.0528, + "step": 16331 + }, + { + "epoch": 4.96, + "learning_rate": 4.9279387437273223e-08, + "loss": 0.025, + "step": 16332 + }, + { + "epoch": 4.96, + "learning_rate": 4.8541169714938485e-08, + "loss": 0.0347, + "step": 16333 + }, + { + "epoch": 4.96, + "learning_rate": 4.7808522249559446e-08, + "loss": 0.0272, + "step": 16334 + }, + { + "epoch": 4.96, + "learning_rate": 4.708144506836431e-08, + "loss": 0.0304, + "step": 16335 + }, + { + "epoch": 4.96, + "learning_rate": 4.6359938198348156e-08, + "loss": 0.029, + "step": 16336 + }, + { + "epoch": 4.96, + "learning_rate": 4.564400166633952e-08, + "loss": 0.027, + "step": 16337 + }, + { + "epoch": 4.96, + "learning_rate": 4.493363549891715e-08, + "loss": 0.0165, + "step": 16338 + }, + { + "epoch": 4.96, + "learning_rate": 4.42288397224766e-08, + "loss": 0.0317, + "step": 16339 + }, + { + "epoch": 4.96, + "learning_rate": 4.3529614363180255e-08, + "loss": 0.0119, + "step": 16340 + }, + { + "epoch": 4.96, + "learning_rate": 4.2835959447024004e-08, + "loss": 0.02, + "step": 16341 + }, + { + "epoch": 4.96, + "learning_rate": 4.214787499977057e-08, + "loss": 0.0106, + "step": 16342 + }, + { + "epoch": 4.96, + "learning_rate": 4.1465361046966184e-08, + "loss": 0.0376, + "step": 16343 + }, + { + "epoch": 4.96, + "learning_rate": 4.078841761397389e-08, + "loss": 0.0221, + "step": 16344 + }, + { + "epoch": 4.96, + "learning_rate": 4.011704472594024e-08, + "loss": 0.0429, + "step": 16345 + }, + { + "epoch": 4.96, + "learning_rate": 3.9451242407811945e-08, + "loss": 0.0195, + "step": 16346 + }, + { + "epoch": 4.96, + "learning_rate": 3.879101068430257e-08, + "loss": 0.0228, + "step": 16347 + }, + { + "epoch": 4.96, + "learning_rate": 3.8136349579959145e-08, + "loss": 0.0146, + "step": 16348 + }, + { + "epoch": 4.96, + "learning_rate": 3.7487259119078906e-08, + "loss": 0.0244, + "step": 16349 + }, + { + "epoch": 4.96, + "learning_rate": 3.6843739325809193e-08, + "loss": 0.0388, + "step": 16350 + }, + { + "epoch": 4.96, + "learning_rate": 3.620579022401427e-08, + "loss": 0.0355, + "step": 16351 + }, + { + "epoch": 4.96, + "learning_rate": 3.5573411837408475e-08, + "loss": 0.0431, + "step": 16352 + }, + { + "epoch": 4.97, + "learning_rate": 3.494660418948969e-08, + "loss": 0.0333, + "step": 16353 + }, + { + "epoch": 4.97, + "learning_rate": 3.432536730352264e-08, + "loss": 0.0348, + "step": 16354 + }, + { + "epoch": 4.97, + "learning_rate": 3.370970120260552e-08, + "loss": 0.0151, + "step": 16355 + }, + { + "epoch": 4.97, + "learning_rate": 3.3099605909603365e-08, + "loss": 0.0239, + "step": 16356 + }, + { + "epoch": 4.97, + "learning_rate": 3.249508144718138e-08, + "loss": 0.0218, + "step": 16357 + }, + { + "epoch": 4.97, + "learning_rate": 3.189612783778828e-08, + "loss": 0.0364, + "step": 16358 + }, + { + "epoch": 4.97, + "learning_rate": 3.130274510367292e-08, + "loss": 0.0173, + "step": 16359 + }, + { + "epoch": 4.97, + "learning_rate": 3.071493326688435e-08, + "loss": 0.0387, + "step": 16360 + }, + { + "epoch": 4.97, + "learning_rate": 3.013269234923843e-08, + "loss": 0.0396, + "step": 16361 + }, + { + "epoch": 4.97, + "learning_rate": 2.955602237240118e-08, + "loss": 0.0271, + "step": 16362 + }, + { + "epoch": 4.97, + "learning_rate": 2.8984923357755486e-08, + "loss": 0.0328, + "step": 16363 + }, + { + "epoch": 4.97, + "learning_rate": 2.841939532655102e-08, + "loss": 0.0347, + "step": 16364 + }, + { + "epoch": 4.97, + "learning_rate": 2.7859438299770997e-08, + "loss": 0.039, + "step": 16365 + }, + { + "epoch": 4.97, + "learning_rate": 2.7305052298215448e-08, + "loss": 0.021, + "step": 16366 + }, + { + "epoch": 4.97, + "learning_rate": 2.6756237342501208e-08, + "loss": 0.0297, + "step": 16367 + }, + { + "epoch": 4.97, + "learning_rate": 2.621299345297867e-08, + "loss": 0.0534, + "step": 16368 + }, + { + "epoch": 4.97, + "learning_rate": 2.5675320649864995e-08, + "loss": 0.0271, + "step": 16369 + }, + { + "epoch": 4.97, + "learning_rate": 2.514321895311089e-08, + "loss": 0.0304, + "step": 16370 + }, + { + "epoch": 4.97, + "learning_rate": 2.4616688382500526e-08, + "loss": 0.025, + "step": 16371 + }, + { + "epoch": 4.97, + "learning_rate": 2.4095728957568284e-08, + "loss": 0.0366, + "step": 16372 + }, + { + "epoch": 4.97, + "learning_rate": 2.3580340697682e-08, + "loss": 0.019, + "step": 16373 + }, + { + "epoch": 4.97, + "learning_rate": 2.307052362199302e-08, + "loss": 0.0013, + "step": 16374 + }, + { + "epoch": 4.97, + "learning_rate": 2.2566277749419548e-08, + "loss": 0.0369, + "step": 16375 + }, + { + "epoch": 4.97, + "learning_rate": 2.2067603098696595e-08, + "loss": 0.0253, + "step": 16376 + }, + { + "epoch": 4.97, + "learning_rate": 2.157449968837599e-08, + "loss": 0.0202, + "step": 16377 + }, + { + "epoch": 4.97, + "learning_rate": 2.1086967536743105e-08, + "loss": 0.0184, + "step": 16378 + }, + { + "epoch": 4.97, + "learning_rate": 2.060500666191678e-08, + "loss": 0.0261, + "step": 16379 + }, + { + "epoch": 4.97, + "learning_rate": 2.012861708179936e-08, + "loss": 0.0106, + "step": 16380 + }, + { + "epoch": 4.97, + "learning_rate": 1.9657798814110006e-08, + "loss": 0.0282, + "step": 16381 + }, + { + "epoch": 4.97, + "learning_rate": 1.919255187630142e-08, + "loss": 0.0112, + "step": 16382 + }, + { + "epoch": 4.97, + "learning_rate": 1.8732876285676436e-08, + "loss": 0.0038, + "step": 16383 + }, + { + "epoch": 4.97, + "learning_rate": 1.8278772059321378e-08, + "loss": 0.0168, + "step": 16384 + }, + { + "epoch": 4.97, + "learning_rate": 1.7830239214089435e-08, + "loss": 0.019, + "step": 16385 + }, + { + "epoch": 4.98, + "learning_rate": 1.7387277766633957e-08, + "loss": 0.0452, + "step": 16386 + }, + { + "epoch": 4.98, + "learning_rate": 1.6949887733425093e-08, + "loss": 0.0132, + "step": 16387 + }, + { + "epoch": 4.98, + "learning_rate": 1.6518069130716515e-08, + "loss": 0.0231, + "step": 16388 + }, + { + "epoch": 4.98, + "learning_rate": 1.609182197452874e-08, + "loss": 0.0208, + "step": 16389 + }, + { + "epoch": 4.98, + "learning_rate": 1.5671146280699097e-08, + "loss": 0.0387, + "step": 16390 + }, + { + "epoch": 4.98, + "learning_rate": 1.5256042064881736e-08, + "loss": 0.0085, + "step": 16391 + }, + { + "epoch": 4.98, + "learning_rate": 1.484650934246434e-08, + "loss": 0.02, + "step": 16392 + }, + { + "epoch": 4.98, + "learning_rate": 1.4442548128668074e-08, + "loss": 0.0337, + "step": 16393 + }, + { + "epoch": 4.98, + "learning_rate": 1.4044158438497599e-08, + "loss": 0.015, + "step": 16394 + }, + { + "epoch": 4.98, + "learning_rate": 1.3651340286757739e-08, + "loss": 0.0185, + "step": 16395 + }, + { + "epoch": 4.98, + "learning_rate": 1.326409368805348e-08, + "loss": 0.0352, + "step": 16396 + }, + { + "epoch": 4.98, + "learning_rate": 1.2882418656740001e-08, + "loss": 0.032, + "step": 16397 + }, + { + "epoch": 4.98, + "learning_rate": 1.250631520702261e-08, + "loss": 0.0123, + "step": 16398 + }, + { + "epoch": 4.98, + "learning_rate": 1.2135783352856809e-08, + "loss": 0.0192, + "step": 16399 + }, + { + "epoch": 4.98, + "learning_rate": 1.1770823108014915e-08, + "loss": 0.0232, + "step": 16400 + }, + { + "epoch": 4.98, + "learning_rate": 1.1411434486036098e-08, + "loss": 0.037, + "step": 16401 + }, + { + "epoch": 4.98, + "learning_rate": 1.1057617500309646e-08, + "loss": 0.0178, + "step": 16402 + }, + { + "epoch": 4.98, + "learning_rate": 1.070937216392509e-08, + "loss": 0.0426, + "step": 16403 + }, + { + "epoch": 4.98, + "learning_rate": 1.0366698489872038e-08, + "loss": 0.07, + "step": 16404 + }, + { + "epoch": 4.98, + "learning_rate": 1.0029596490840341e-08, + "loss": 0.0066, + "step": 16405 + }, + { + "epoch": 4.98, + "learning_rate": 9.698066179369968e-09, + "loss": 0.0195, + "step": 16406 + }, + { + "epoch": 4.98, + "learning_rate": 9.372107567767738e-09, + "loss": 0.0255, + "step": 16407 + }, + { + "epoch": 4.98, + "learning_rate": 9.051720668157291e-09, + "loss": 0.0061, + "step": 16408 + }, + { + "epoch": 4.98, + "learning_rate": 8.736905492429114e-09, + "loss": 0.0323, + "step": 16409 + }, + { + "epoch": 4.98, + "learning_rate": 8.427662052273853e-09, + "loss": 0.009, + "step": 16410 + }, + { + "epoch": 4.98, + "learning_rate": 8.123990359198973e-09, + "loss": 0.032, + "step": 16411 + }, + { + "epoch": 4.98, + "learning_rate": 7.825890424462133e-09, + "loss": 0.0343, + "step": 16412 + }, + { + "epoch": 4.98, + "learning_rate": 7.533362259137809e-09, + "loss": 0.0417, + "step": 16413 + }, + { + "epoch": 4.98, + "learning_rate": 7.246405874100636e-09, + "loss": 0.0342, + "step": 16414 + }, + { + "epoch": 4.98, + "learning_rate": 6.9650212800087536e-09, + "loss": 0.0264, + "step": 16415 + }, + { + "epoch": 4.98, + "learning_rate": 6.689208487320463e-09, + "loss": 0.0325, + "step": 16416 + }, + { + "epoch": 4.98, + "learning_rate": 6.418967506277573e-09, + "loss": 0.0315, + "step": 16417 + }, + { + "epoch": 4.98, + "learning_rate": 6.1542983469220486e-09, + "loss": 0.0216, + "step": 16418 + }, + { + "epoch": 4.99, + "learning_rate": 5.8952010190793654e-09, + "loss": 0.0148, + "step": 16419 + }, + { + "epoch": 4.99, + "learning_rate": 5.6416755323585025e-09, + "loss": 0.0209, + "step": 16420 + }, + { + "epoch": 4.99, + "learning_rate": 5.393721896218561e-09, + "loss": 0.0096, + "step": 16421 + }, + { + "epoch": 4.99, + "learning_rate": 5.151340119835534e-09, + "loss": 0.0221, + "step": 16422 + }, + { + "epoch": 4.99, + "learning_rate": 4.914530212218881e-09, + "loss": 0.0253, + "step": 16423 + }, + { + "epoch": 4.99, + "learning_rate": 4.683292182178222e-09, + "loss": 0.0459, + "step": 16424 + }, + { + "epoch": 4.99, + "learning_rate": 4.45762603829003e-09, + "loss": 0.0214, + "step": 16425 + }, + { + "epoch": 4.99, + "learning_rate": 4.23753178894759e-09, + "loss": 0.0225, + "step": 16426 + }, + { + "epoch": 4.99, + "learning_rate": 4.023009442311042e-09, + "loss": 0.0192, + "step": 16427 + }, + { + "epoch": 4.99, + "learning_rate": 3.8140590063739925e-09, + "loss": 0.0476, + "step": 16428 + }, + { + "epoch": 4.99, + "learning_rate": 3.6106804888802465e-09, + "loss": 0.0324, + "step": 16429 + }, + { + "epoch": 4.99, + "learning_rate": 3.4128738973904222e-09, + "loss": 0.0044, + "step": 16430 + }, + { + "epoch": 4.99, + "learning_rate": 3.220639239248646e-09, + "loss": 0.0141, + "step": 16431 + }, + { + "epoch": 4.99, + "learning_rate": 3.0339765215992016e-09, + "loss": 0.0135, + "step": 16432 + }, + { + "epoch": 4.99, + "learning_rate": 2.852885751369882e-09, + "loss": 0.007, + "step": 16433 + }, + { + "epoch": 4.99, + "learning_rate": 2.6773669352886384e-09, + "loss": 0.0095, + "step": 16434 + }, + { + "epoch": 4.99, + "learning_rate": 2.5074200799002354e-09, + "loss": 0.0364, + "step": 16435 + }, + { + "epoch": 4.99, + "learning_rate": 2.343045191482984e-09, + "loss": 0.0335, + "step": 16436 + }, + { + "epoch": 4.99, + "learning_rate": 2.184242276148662e-09, + "loss": 0.0219, + "step": 16437 + }, + { + "epoch": 4.99, + "learning_rate": 2.031011339825861e-09, + "loss": 0.0142, + "step": 16438 + }, + { + "epoch": 4.99, + "learning_rate": 1.8833523881767175e-09, + "loss": 0.011, + "step": 16439 + }, + { + "epoch": 4.99, + "learning_rate": 1.7412654266801828e-09, + "loss": 0.0068, + "step": 16440 + }, + { + "epoch": 4.99, + "learning_rate": 1.6047504606486738e-09, + "loss": 0.0472, + "step": 16441 + }, + { + "epoch": 4.99, + "learning_rate": 1.4738074951281543e-09, + "loss": 0.0435, + "step": 16442 + }, + { + "epoch": 4.99, + "learning_rate": 1.3484365349980542e-09, + "loss": 0.0266, + "step": 16443 + }, + { + "epoch": 4.99, + "learning_rate": 1.2286375848880038e-09, + "loss": 0.0189, + "step": 16444 + }, + { + "epoch": 4.99, + "learning_rate": 1.1144106492777527e-09, + "loss": 0.0172, + "step": 16445 + }, + { + "epoch": 4.99, + "learning_rate": 1.005755732397251e-09, + "loss": 0.0138, + "step": 16446 + }, + { + "epoch": 4.99, + "learning_rate": 9.02672838276608e-10, + "loss": 0.0243, + "step": 16447 + }, + { + "epoch": 4.99, + "learning_rate": 8.051619707627466e-10, + "loss": 0.0116, + "step": 16448 + }, + { + "epoch": 4.99, + "learning_rate": 7.132231334694427e-10, + "loss": 0.024, + "step": 16449 + }, + { + "epoch": 4.99, + "learning_rate": 6.268563298106321e-10, + "loss": 0.0286, + "step": 16450 + }, + { + "epoch": 4.99, + "learning_rate": 5.460615629837572e-10, + "loss": 0.0314, + "step": 16451 + }, + { + "epoch": 5.0, + "learning_rate": 4.708388360030735e-10, + "loss": 0.0269, + "step": 16452 + }, + { + "epoch": 5.0, + "learning_rate": 4.011881516663429e-10, + "loss": 0.0308, + "step": 16453 + }, + { + "epoch": 5.0, + "learning_rate": 3.37109512554834e-10, + "loss": 0.0575, + "step": 16454 + }, + { + "epoch": 5.0, + "learning_rate": 2.786029210499752e-10, + "loss": 0.0372, + "step": 16455 + }, + { + "epoch": 5.0, + "learning_rate": 2.2566837931670133e-10, + "loss": 0.0347, + "step": 16456 + }, + { + "epoch": 5.0, + "learning_rate": 1.783058893367606e-10, + "loss": 0.0179, + "step": 16457 + }, + { + "epoch": 5.0, + "learning_rate": 1.3651545287540754e-10, + "loss": 0.0193, + "step": 16458 + }, + { + "epoch": 5.0, + "learning_rate": 1.0029707144809662e-10, + "loss": 0.0301, + "step": 16459 + }, + { + "epoch": 5.0, + "learning_rate": 6.965074643705549e-11, + "loss": 0.006, + "step": 16460 + }, + { + "epoch": 5.0, + "learning_rate": 4.457647895805827e-11, + "loss": 0.0242, + "step": 16461 + }, + { + "epoch": 5.0, + "learning_rate": 2.5074269960345672e-11, + "loss": 0.0493, + "step": 16462 + }, + { + "epoch": 5.0, + "learning_rate": 1.1144120160011538e-11, + "loss": 0.0081, + "step": 16463 + }, + { + "epoch": 5.0, + "learning_rate": 2.78603005665623e-12, + "loss": 0.023, + "step": 16464 + }, + { + "epoch": 5.0, + "learning_rate": 0.0, + "loss": 0.0655, + "step": 16465 + }, + { + "epoch": 5.0, + "step": 16465, + "total_flos": 5.860962792017756e+18, + "train_loss": 0.12529179257165163, + "train_runtime": 58502.878, + "train_samples_per_second": 2.252, + "train_steps_per_second": 0.281 + } + ], + "logging_steps": 1.0, + "max_steps": 16465, + "num_train_epochs": 5, + "save_steps": 0, + "total_flos": 5.860962792017756e+18, + "trial_name": null, + "trial_params": null +}