{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998457504241863, "eval_steps": 500, "global_step": 3241, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00030849915162733303, "grad_norm": 0.0, "learning_rate": 2.0408163265306121e-07, "loss": 2.1748, "step": 1 }, { "epoch": 0.0006169983032546661, "grad_norm": 0.0, "learning_rate": 4.0816326530612243e-07, "loss": 2.3846, "step": 2 }, { "epoch": 0.0009254974548819991, "grad_norm": 0.0, "learning_rate": 6.122448979591837e-07, "loss": 2.3896, "step": 3 }, { "epoch": 0.0012339966065093321, "grad_norm": 0.0, "learning_rate": 8.163265306122449e-07, "loss": 2.2741, "step": 4 }, { "epoch": 0.001542495758136665, "grad_norm": 0.0, "learning_rate": 1.0204081632653063e-06, "loss": 2.4158, "step": 5 }, { "epoch": 0.0018509949097639982, "grad_norm": 0.0, "learning_rate": 1.2244897959183673e-06, "loss": 1.4049, "step": 6 }, { "epoch": 0.0021594940613913313, "grad_norm": 0.0, "learning_rate": 1.4285714285714286e-06, "loss": 2.1787, "step": 7 }, { "epoch": 0.0024679932130186643, "grad_norm": 0.0, "learning_rate": 1.6326530612244897e-06, "loss": 2.5618, "step": 8 }, { "epoch": 0.002776492364645997, "grad_norm": 0.0, "learning_rate": 1.8367346938775512e-06, "loss": 2.3799, "step": 9 }, { "epoch": 0.00308499151627333, "grad_norm": 0.0, "learning_rate": 2.0408163265306125e-06, "loss": 2.1604, "step": 10 }, { "epoch": 0.0033934906679006635, "grad_norm": 0.0, "learning_rate": 2.244897959183674e-06, "loss": 1.9946, "step": 11 }, { "epoch": 0.0037019898195279964, "grad_norm": 0.0, "learning_rate": 2.4489795918367347e-06, "loss": 1.996, "step": 12 }, { "epoch": 0.004010488971155329, "grad_norm": 0.0, "learning_rate": 2.6530612244897964e-06, "loss": 2.084, "step": 13 }, { "epoch": 0.004318988122782663, "grad_norm": 0.0, "learning_rate": 2.8571428571428573e-06, "loss": 1.7759, "step": 14 }, { "epoch": 0.004627487274409995, "grad_norm": 0.0, "learning_rate": 3.0612244897959185e-06, "loss": 1.6765, "step": 15 }, { "epoch": 0.0049359864260373285, "grad_norm": 0.0, "learning_rate": 3.2653061224489794e-06, "loss": 1.6134, "step": 16 }, { "epoch": 0.005244485577664661, "grad_norm": 0.0, "learning_rate": 3.469387755102041e-06, "loss": 1.6945, "step": 17 }, { "epoch": 0.005552984729291994, "grad_norm": 0.0, "learning_rate": 3.6734693877551024e-06, "loss": 1.367, "step": 18 }, { "epoch": 0.005861483880919328, "grad_norm": 0.0, "learning_rate": 3.877551020408164e-06, "loss": 1.5206, "step": 19 }, { "epoch": 0.00616998303254666, "grad_norm": 0.0, "learning_rate": 4.081632653061225e-06, "loss": 1.4788, "step": 20 }, { "epoch": 0.006478482184173994, "grad_norm": 0.0, "learning_rate": 4.2857142857142855e-06, "loss": 1.4541, "step": 21 }, { "epoch": 0.006786981335801327, "grad_norm": 0.0, "learning_rate": 4.489795918367348e-06, "loss": 1.3626, "step": 22 }, { "epoch": 0.007095480487428659, "grad_norm": 0.0, "learning_rate": 4.693877551020409e-06, "loss": 1.3584, "step": 23 }, { "epoch": 0.007403979639055993, "grad_norm": 0.0, "learning_rate": 4.897959183673469e-06, "loss": 1.3817, "step": 24 }, { "epoch": 0.007712478790683325, "grad_norm": 0.0, "learning_rate": 5.1020408163265315e-06, "loss": 1.4, "step": 25 }, { "epoch": 0.008020977942310659, "grad_norm": 0.0, "learning_rate": 5.306122448979593e-06, "loss": 1.2967, "step": 26 }, { "epoch": 0.008329477093937991, "grad_norm": 0.0, "learning_rate": 5.510204081632653e-06, "loss": 1.3896, "step": 27 }, { "epoch": 0.008637976245565325, "grad_norm": 0.0, "learning_rate": 5.7142857142857145e-06, "loss": 1.3068, "step": 28 }, { "epoch": 0.008946475397192658, "grad_norm": 0.0, "learning_rate": 5.918367346938776e-06, "loss": 1.2201, "step": 29 }, { "epoch": 0.00925497454881999, "grad_norm": 0.0, "learning_rate": 6.122448979591837e-06, "loss": 1.262, "step": 30 }, { "epoch": 0.009563473700447325, "grad_norm": 0.0, "learning_rate": 6.326530612244899e-06, "loss": 1.3774, "step": 31 }, { "epoch": 0.009871972852074657, "grad_norm": 0.0, "learning_rate": 6.530612244897959e-06, "loss": 1.2003, "step": 32 }, { "epoch": 0.01018047200370199, "grad_norm": 0.0, "learning_rate": 6.734693877551021e-06, "loss": 1.2146, "step": 33 }, { "epoch": 0.010488971155329322, "grad_norm": 0.0, "learning_rate": 6.938775510204082e-06, "loss": 1.1563, "step": 34 }, { "epoch": 0.010797470306956656, "grad_norm": 0.0, "learning_rate": 7.1428571428571436e-06, "loss": 1.2303, "step": 35 }, { "epoch": 0.011105969458583989, "grad_norm": 0.0, "learning_rate": 7.346938775510205e-06, "loss": 1.0958, "step": 36 }, { "epoch": 0.011414468610211321, "grad_norm": 0.0, "learning_rate": 7.551020408163265e-06, "loss": 1.1453, "step": 37 }, { "epoch": 0.011722967761838655, "grad_norm": 0.0, "learning_rate": 7.755102040816327e-06, "loss": 1.1907, "step": 38 }, { "epoch": 0.012031466913465988, "grad_norm": 0.0, "learning_rate": 7.959183673469388e-06, "loss": 1.181, "step": 39 }, { "epoch": 0.01233996606509332, "grad_norm": 0.0, "learning_rate": 8.16326530612245e-06, "loss": 1.147, "step": 40 }, { "epoch": 0.012648465216720655, "grad_norm": 0.0, "learning_rate": 8.36734693877551e-06, "loss": 1.1413, "step": 41 }, { "epoch": 0.012956964368347987, "grad_norm": 0.0, "learning_rate": 8.571428571428571e-06, "loss": 1.1279, "step": 42 }, { "epoch": 0.01326546351997532, "grad_norm": 0.0, "learning_rate": 8.775510204081633e-06, "loss": 1.1662, "step": 43 }, { "epoch": 0.013573962671602654, "grad_norm": 0.0, "learning_rate": 8.979591836734695e-06, "loss": 1.2089, "step": 44 }, { "epoch": 0.013882461823229986, "grad_norm": 0.0, "learning_rate": 9.183673469387756e-06, "loss": 1.1917, "step": 45 }, { "epoch": 0.014190960974857319, "grad_norm": 0.0, "learning_rate": 9.387755102040818e-06, "loss": 1.1331, "step": 46 }, { "epoch": 0.014499460126484651, "grad_norm": 0.0, "learning_rate": 9.591836734693878e-06, "loss": 1.0763, "step": 47 }, { "epoch": 0.014807959278111986, "grad_norm": 0.0, "learning_rate": 9.795918367346939e-06, "loss": 1.1443, "step": 48 }, { "epoch": 0.015116458429739318, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 1.1878, "step": 49 }, { "epoch": 0.01542495758136665, "grad_norm": 0.0, "learning_rate": 1.0204081632653063e-05, "loss": 0.9932, "step": 50 }, { "epoch": 0.015733456732993985, "grad_norm": 0.0, "learning_rate": 1.0408163265306123e-05, "loss": 1.1024, "step": 51 }, { "epoch": 0.016041955884621317, "grad_norm": 0.0, "learning_rate": 1.0612244897959186e-05, "loss": 1.132, "step": 52 }, { "epoch": 0.01635045503624865, "grad_norm": 0.0, "learning_rate": 1.0816326530612246e-05, "loss": 1.0862, "step": 53 }, { "epoch": 0.016658954187875982, "grad_norm": 0.0, "learning_rate": 1.1020408163265306e-05, "loss": 1.0846, "step": 54 }, { "epoch": 0.016967453339503315, "grad_norm": 0.0, "learning_rate": 1.1224489795918367e-05, "loss": 1.1018, "step": 55 }, { "epoch": 0.01727595249113065, "grad_norm": 0.0, "learning_rate": 1.1428571428571429e-05, "loss": 1.06, "step": 56 }, { "epoch": 0.017584451642757983, "grad_norm": 0.0, "learning_rate": 1.1632653061224491e-05, "loss": 1.0426, "step": 57 }, { "epoch": 0.017892950794385316, "grad_norm": 0.0, "learning_rate": 1.1836734693877552e-05, "loss": 1.1386, "step": 58 }, { "epoch": 0.018201449946012648, "grad_norm": 0.0, "learning_rate": 1.2040816326530614e-05, "loss": 1.0103, "step": 59 }, { "epoch": 0.01850994909763998, "grad_norm": 0.0, "learning_rate": 1.2244897959183674e-05, "loss": 1.0551, "step": 60 }, { "epoch": 0.018818448249267313, "grad_norm": 0.0, "learning_rate": 1.2448979591836736e-05, "loss": 0.9686, "step": 61 }, { "epoch": 0.01912694740089465, "grad_norm": 0.0, "learning_rate": 1.2653061224489798e-05, "loss": 1.0008, "step": 62 }, { "epoch": 0.01943544655252198, "grad_norm": 0.0, "learning_rate": 1.2857142857142859e-05, "loss": 1.0213, "step": 63 }, { "epoch": 0.019743945704149314, "grad_norm": 0.0, "learning_rate": 1.3061224489795918e-05, "loss": 1.0082, "step": 64 }, { "epoch": 0.020052444855776647, "grad_norm": 0.0, "learning_rate": 1.326530612244898e-05, "loss": 1.0559, "step": 65 }, { "epoch": 0.02036094400740398, "grad_norm": 0.0, "learning_rate": 1.3469387755102042e-05, "loss": 1.0025, "step": 66 }, { "epoch": 0.02066944315903131, "grad_norm": 0.0, "learning_rate": 1.3673469387755102e-05, "loss": 0.8757, "step": 67 }, { "epoch": 0.020977942310658644, "grad_norm": 0.0, "learning_rate": 1.3877551020408165e-05, "loss": 1.0191, "step": 68 }, { "epoch": 0.02128644146228598, "grad_norm": 0.0, "learning_rate": 1.4081632653061225e-05, "loss": 1.0413, "step": 69 }, { "epoch": 0.021594940613913313, "grad_norm": 0.0, "learning_rate": 1.4285714285714287e-05, "loss": 1.002, "step": 70 }, { "epoch": 0.021903439765540645, "grad_norm": 0.0, "learning_rate": 1.448979591836735e-05, "loss": 1.0391, "step": 71 }, { "epoch": 0.022211938917167977, "grad_norm": 0.0, "learning_rate": 1.469387755102041e-05, "loss": 1.0641, "step": 72 }, { "epoch": 0.02252043806879531, "grad_norm": 0.0, "learning_rate": 1.4897959183673472e-05, "loss": 1.0567, "step": 73 }, { "epoch": 0.022828937220422642, "grad_norm": 0.0, "learning_rate": 1.510204081632653e-05, "loss": 1.0336, "step": 74 }, { "epoch": 0.02313743637204998, "grad_norm": 0.0, "learning_rate": 1.530612244897959e-05, "loss": 1.0058, "step": 75 }, { "epoch": 0.02344593552367731, "grad_norm": 0.0, "learning_rate": 1.5510204081632655e-05, "loss": 1.119, "step": 76 }, { "epoch": 0.023754434675304643, "grad_norm": 0.0, "learning_rate": 1.5714285714285715e-05, "loss": 1.0142, "step": 77 }, { "epoch": 0.024062933826931976, "grad_norm": 0.0, "learning_rate": 1.5918367346938776e-05, "loss": 1.0398, "step": 78 }, { "epoch": 0.02437143297855931, "grad_norm": 0.0, "learning_rate": 1.612244897959184e-05, "loss": 0.9653, "step": 79 }, { "epoch": 0.02467993213018664, "grad_norm": 0.0, "learning_rate": 1.63265306122449e-05, "loss": 1.2841, "step": 80 }, { "epoch": 0.024988431281813973, "grad_norm": 0.0, "learning_rate": 1.653061224489796e-05, "loss": 1.02, "step": 81 }, { "epoch": 0.02529693043344131, "grad_norm": 0.0, "learning_rate": 1.673469387755102e-05, "loss": 0.9886, "step": 82 }, { "epoch": 0.025605429585068642, "grad_norm": 0.0, "learning_rate": 1.6938775510204085e-05, "loss": 0.9234, "step": 83 }, { "epoch": 0.025913928736695974, "grad_norm": 0.0, "learning_rate": 1.7142857142857142e-05, "loss": 0.9129, "step": 84 }, { "epoch": 0.026222427888323307, "grad_norm": 0.0, "learning_rate": 1.7346938775510206e-05, "loss": 0.9701, "step": 85 }, { "epoch": 0.02653092703995064, "grad_norm": 0.0, "learning_rate": 1.7551020408163266e-05, "loss": 1.0317, "step": 86 }, { "epoch": 0.026839426191577972, "grad_norm": 0.0, "learning_rate": 1.7755102040816327e-05, "loss": 0.9648, "step": 87 }, { "epoch": 0.027147925343205308, "grad_norm": 0.0, "learning_rate": 1.795918367346939e-05, "loss": 1.2338, "step": 88 }, { "epoch": 0.02745642449483264, "grad_norm": 0.0, "learning_rate": 1.816326530612245e-05, "loss": 0.9986, "step": 89 }, { "epoch": 0.027764923646459973, "grad_norm": 0.0, "learning_rate": 1.836734693877551e-05, "loss": 1.0245, "step": 90 }, { "epoch": 0.028073422798087305, "grad_norm": 0.0, "learning_rate": 1.8571428571428575e-05, "loss": 0.9307, "step": 91 }, { "epoch": 0.028381921949714638, "grad_norm": 0.0, "learning_rate": 1.8775510204081636e-05, "loss": 1.0043, "step": 92 }, { "epoch": 0.02869042110134197, "grad_norm": 0.0, "learning_rate": 1.8979591836734696e-05, "loss": 1.0452, "step": 93 }, { "epoch": 0.028998920252969303, "grad_norm": 0.0, "learning_rate": 1.9183673469387756e-05, "loss": 0.9908, "step": 94 }, { "epoch": 0.02930741940459664, "grad_norm": 0.0, "learning_rate": 1.9387755102040817e-05, "loss": 0.933, "step": 95 }, { "epoch": 0.02961591855622397, "grad_norm": 0.0, "learning_rate": 1.9591836734693877e-05, "loss": 0.9802, "step": 96 }, { "epoch": 0.029924417707851304, "grad_norm": 0.0, "learning_rate": 1.979591836734694e-05, "loss": 1.0628, "step": 97 }, { "epoch": 0.030232916859478636, "grad_norm": 0.0, "learning_rate": 2e-05, "loss": 1.0759, "step": 98 }, { "epoch": 0.03054141601110597, "grad_norm": 0.0, "learning_rate": 1.999999500447713e-05, "loss": 1.026, "step": 99 }, { "epoch": 0.0308499151627333, "grad_norm": 0.0, "learning_rate": 1.9999980017913514e-05, "loss": 0.9354, "step": 100 }, { "epoch": 0.031158414314360637, "grad_norm": 0.0, "learning_rate": 1.999995504032412e-05, "loss": 1.0366, "step": 101 }, { "epoch": 0.03146691346598797, "grad_norm": 0.0, "learning_rate": 1.999992007173391e-05, "loss": 0.9773, "step": 102 }, { "epoch": 0.0317754126176153, "grad_norm": 0.0, "learning_rate": 1.999987511217781e-05, "loss": 0.9804, "step": 103 }, { "epoch": 0.032083911769242635, "grad_norm": 0.0, "learning_rate": 1.999982016170075e-05, "loss": 0.9758, "step": 104 }, { "epoch": 0.03239241092086997, "grad_norm": 0.0, "learning_rate": 1.999975522035763e-05, "loss": 0.9966, "step": 105 }, { "epoch": 0.0327009100724973, "grad_norm": 0.0, "learning_rate": 1.999968028821333e-05, "loss": 0.9426, "step": 106 }, { "epoch": 0.03300940922412463, "grad_norm": 0.0, "learning_rate": 1.9999595365342715e-05, "loss": 1.0034, "step": 107 }, { "epoch": 0.033317908375751965, "grad_norm": 0.0, "learning_rate": 1.9999500451830634e-05, "loss": 0.9326, "step": 108 }, { "epoch": 0.0336264075273793, "grad_norm": 0.0, "learning_rate": 1.9999395547771914e-05, "loss": 1.0403, "step": 109 }, { "epoch": 0.03393490667900663, "grad_norm": 0.0, "learning_rate": 1.9999280653271364e-05, "loss": 1.0244, "step": 110 }, { "epoch": 0.03424340583063397, "grad_norm": 0.0, "learning_rate": 1.9999155768443777e-05, "loss": 0.9906, "step": 111 }, { "epoch": 0.0345519049822613, "grad_norm": 0.0, "learning_rate": 1.999902089341393e-05, "loss": 0.9533, "step": 112 }, { "epoch": 0.034860404133888634, "grad_norm": 0.0, "learning_rate": 1.999887602831657e-05, "loss": 1.0805, "step": 113 }, { "epoch": 0.035168903285515966, "grad_norm": 0.0, "learning_rate": 1.999872117329644e-05, "loss": 1.3153, "step": 114 }, { "epoch": 0.0354774024371433, "grad_norm": 0.0, "learning_rate": 1.999855632850825e-05, "loss": 0.9861, "step": 115 }, { "epoch": 0.03578590158877063, "grad_norm": 0.0, "learning_rate": 1.9998381494116693e-05, "loss": 1.0677, "step": 116 }, { "epoch": 0.036094400740397964, "grad_norm": 0.0, "learning_rate": 1.999819667029646e-05, "loss": 0.9798, "step": 117 }, { "epoch": 0.036402899892025296, "grad_norm": 0.0, "learning_rate": 1.99980018572322e-05, "loss": 0.9872, "step": 118 }, { "epoch": 0.03671139904365263, "grad_norm": 0.0, "learning_rate": 1.999779705511856e-05, "loss": 0.9897, "step": 119 }, { "epoch": 0.03701989819527996, "grad_norm": 0.0, "learning_rate": 1.9997582264160147e-05, "loss": 1.0007, "step": 120 }, { "epoch": 0.037328397346907294, "grad_norm": 0.0, "learning_rate": 1.9997357484571566e-05, "loss": 0.9305, "step": 121 }, { "epoch": 0.037636896498534626, "grad_norm": 0.0, "learning_rate": 1.9997122716577397e-05, "loss": 0.9443, "step": 122 }, { "epoch": 0.03794539565016196, "grad_norm": 0.0, "learning_rate": 1.9996877960412192e-05, "loss": 0.9924, "step": 123 }, { "epoch": 0.0382538948017893, "grad_norm": 0.0, "learning_rate": 1.9996623216320496e-05, "loss": 1.0336, "step": 124 }, { "epoch": 0.03856239395341663, "grad_norm": 0.0, "learning_rate": 1.9996358484556818e-05, "loss": 0.9855, "step": 125 }, { "epoch": 0.03887089310504396, "grad_norm": 0.0, "learning_rate": 1.9996083765385656e-05, "loss": 0.9731, "step": 126 }, { "epoch": 0.039179392256671296, "grad_norm": 0.0, "learning_rate": 1.999579905908148e-05, "loss": 1.0336, "step": 127 }, { "epoch": 0.03948789140829863, "grad_norm": 0.0, "learning_rate": 1.9995504365928746e-05, "loss": 0.9698, "step": 128 }, { "epoch": 0.03979639055992596, "grad_norm": 0.0, "learning_rate": 1.999519968622188e-05, "loss": 1.0549, "step": 129 }, { "epoch": 0.04010488971155329, "grad_norm": 0.0, "learning_rate": 1.9994885020265293e-05, "loss": 0.906, "step": 130 }, { "epoch": 0.040413388863180626, "grad_norm": 0.0, "learning_rate": 1.9994560368373366e-05, "loss": 1.0536, "step": 131 }, { "epoch": 0.04072188801480796, "grad_norm": 0.0, "learning_rate": 1.999422573087046e-05, "loss": 0.9394, "step": 132 }, { "epoch": 0.04103038716643529, "grad_norm": 0.0, "learning_rate": 1.999388110809091e-05, "loss": 1.0359, "step": 133 }, { "epoch": 0.04133888631806262, "grad_norm": 0.0, "learning_rate": 1.9993526500379037e-05, "loss": 1.0034, "step": 134 }, { "epoch": 0.041647385469689956, "grad_norm": 0.0, "learning_rate": 1.9993161908089125e-05, "loss": 0.9976, "step": 135 }, { "epoch": 0.04195588462131729, "grad_norm": 0.0, "learning_rate": 1.9992787331585447e-05, "loss": 0.9835, "step": 136 }, { "epoch": 0.04226438377294463, "grad_norm": 0.0, "learning_rate": 1.9992402771242236e-05, "loss": 0.9441, "step": 137 }, { "epoch": 0.04257288292457196, "grad_norm": 0.0, "learning_rate": 1.999200822744371e-05, "loss": 0.873, "step": 138 }, { "epoch": 0.04288138207619929, "grad_norm": 0.0, "learning_rate": 1.9991603700584063e-05, "loss": 1.0707, "step": 139 }, { "epoch": 0.043189881227826625, "grad_norm": 0.0, "learning_rate": 1.9991189191067457e-05, "loss": 0.9523, "step": 140 }, { "epoch": 0.04349838037945396, "grad_norm": 0.0, "learning_rate": 1.9990764699308033e-05, "loss": 0.9397, "step": 141 }, { "epoch": 0.04380687953108129, "grad_norm": 0.0, "learning_rate": 1.99903302257299e-05, "loss": 0.9902, "step": 142 }, { "epoch": 0.04411537868270862, "grad_norm": 0.0, "learning_rate": 1.9989885770767143e-05, "loss": 0.9969, "step": 143 }, { "epoch": 0.044423877834335955, "grad_norm": 0.0, "learning_rate": 1.9989431334863817e-05, "loss": 1.0595, "step": 144 }, { "epoch": 0.04473237698596329, "grad_norm": 0.0, "learning_rate": 1.9988966918473957e-05, "loss": 0.9438, "step": 145 }, { "epoch": 0.04504087613759062, "grad_norm": 0.0, "learning_rate": 1.998849252206156e-05, "loss": 0.9605, "step": 146 }, { "epoch": 0.04534937528921795, "grad_norm": 0.0, "learning_rate": 1.9988008146100594e-05, "loss": 1.1187, "step": 147 }, { "epoch": 0.045657874440845285, "grad_norm": 0.0, "learning_rate": 1.9987513791075007e-05, "loss": 0.9843, "step": 148 }, { "epoch": 0.04596637359247262, "grad_norm": 0.0, "learning_rate": 1.998700945747871e-05, "loss": 0.9245, "step": 149 }, { "epoch": 0.04627487274409996, "grad_norm": 0.0, "learning_rate": 1.998649514581558e-05, "loss": 0.9899, "step": 150 }, { "epoch": 0.04658337189572729, "grad_norm": 0.0, "learning_rate": 1.9985970856599476e-05, "loss": 1.0891, "step": 151 }, { "epoch": 0.04689187104735462, "grad_norm": 0.0, "learning_rate": 1.9985436590354207e-05, "loss": 1.0649, "step": 152 }, { "epoch": 0.047200370198981954, "grad_norm": 0.0, "learning_rate": 1.9984892347613577e-05, "loss": 1.0248, "step": 153 }, { "epoch": 0.04750886935060929, "grad_norm": 0.0, "learning_rate": 1.9984338128921327e-05, "loss": 0.9197, "step": 154 }, { "epoch": 0.04781736850223662, "grad_norm": 0.0, "learning_rate": 1.9983773934831185e-05, "loss": 1.0391, "step": 155 }, { "epoch": 0.04812586765386395, "grad_norm": 0.0, "learning_rate": 1.998319976590684e-05, "loss": 1.0306, "step": 156 }, { "epoch": 0.048434366805491284, "grad_norm": 0.0, "learning_rate": 1.9982615622721948e-05, "loss": 0.8968, "step": 157 }, { "epoch": 0.04874286595711862, "grad_norm": 0.0, "learning_rate": 1.9982021505860128e-05, "loss": 0.9711, "step": 158 }, { "epoch": 0.04905136510874595, "grad_norm": 0.0, "learning_rate": 1.9981417415914957e-05, "loss": 0.963, "step": 159 }, { "epoch": 0.04935986426037328, "grad_norm": 0.0, "learning_rate": 1.9980803353490002e-05, "loss": 1.0086, "step": 160 }, { "epoch": 0.049668363412000614, "grad_norm": 0.0, "learning_rate": 1.9980179319198757e-05, "loss": 0.9816, "step": 161 }, { "epoch": 0.04997686256362795, "grad_norm": 0.0, "learning_rate": 1.997954531366471e-05, "loss": 1.0639, "step": 162 }, { "epoch": 0.050285361715255286, "grad_norm": 0.0, "learning_rate": 1.997890133752129e-05, "loss": 0.9421, "step": 163 }, { "epoch": 0.05059386086688262, "grad_norm": 0.0, "learning_rate": 1.9978247391411905e-05, "loss": 0.9511, "step": 164 }, { "epoch": 0.05090236001850995, "grad_norm": 0.0, "learning_rate": 1.9977583475989907e-05, "loss": 0.9865, "step": 165 }, { "epoch": 0.051210859170137284, "grad_norm": 0.0, "learning_rate": 1.9976909591918624e-05, "loss": 0.9379, "step": 166 }, { "epoch": 0.051519358321764616, "grad_norm": 0.0, "learning_rate": 1.9976225739871335e-05, "loss": 1.0527, "step": 167 }, { "epoch": 0.05182785747339195, "grad_norm": 0.0, "learning_rate": 1.9975531920531277e-05, "loss": 0.9074, "step": 168 }, { "epoch": 0.05213635662501928, "grad_norm": 0.0, "learning_rate": 1.997482813459165e-05, "loss": 0.949, "step": 169 }, { "epoch": 0.052444855776646614, "grad_norm": 0.0, "learning_rate": 1.997411438275561e-05, "loss": 1.0138, "step": 170 }, { "epoch": 0.052753354928273946, "grad_norm": 0.0, "learning_rate": 1.997339066573627e-05, "loss": 0.9684, "step": 171 }, { "epoch": 0.05306185407990128, "grad_norm": 0.0, "learning_rate": 1.9972656984256695e-05, "loss": 0.8981, "step": 172 }, { "epoch": 0.05337035323152861, "grad_norm": 0.0, "learning_rate": 1.9971913339049914e-05, "loss": 0.9672, "step": 173 }, { "epoch": 0.053678852383155944, "grad_norm": 0.0, "learning_rate": 1.9971159730858903e-05, "loss": 1.0605, "step": 174 }, { "epoch": 0.053987351534783276, "grad_norm": 0.0, "learning_rate": 1.9970396160436602e-05, "loss": 0.8753, "step": 175 }, { "epoch": 0.054295850686410616, "grad_norm": 0.0, "learning_rate": 1.996962262854589e-05, "loss": 0.9577, "step": 176 }, { "epoch": 0.05460434983803795, "grad_norm": 0.0, "learning_rate": 1.996883913595961e-05, "loss": 1.0418, "step": 177 }, { "epoch": 0.05491284898966528, "grad_norm": 0.0, "learning_rate": 1.9968045683460554e-05, "loss": 0.8894, "step": 178 }, { "epoch": 0.05522134814129261, "grad_norm": 0.0, "learning_rate": 1.9967242271841458e-05, "loss": 0.9815, "step": 179 }, { "epoch": 0.055529847292919945, "grad_norm": 0.0, "learning_rate": 1.9966428901905025e-05, "loss": 0.9228, "step": 180 }, { "epoch": 0.05583834644454728, "grad_norm": 0.0, "learning_rate": 1.9965605574463885e-05, "loss": 0.9451, "step": 181 }, { "epoch": 0.05614684559617461, "grad_norm": 0.0, "learning_rate": 1.9964772290340633e-05, "loss": 1.0085, "step": 182 }, { "epoch": 0.05645534474780194, "grad_norm": 0.0, "learning_rate": 1.996392905036781e-05, "loss": 0.9334, "step": 183 }, { "epoch": 0.056763843899429275, "grad_norm": 0.0, "learning_rate": 1.9963075855387898e-05, "loss": 1.0261, "step": 184 }, { "epoch": 0.05707234305105661, "grad_norm": 0.0, "learning_rate": 1.9962212706253327e-05, "loss": 1.0683, "step": 185 }, { "epoch": 0.05738084220268394, "grad_norm": 0.0, "learning_rate": 1.9961339603826476e-05, "loss": 1.0197, "step": 186 }, { "epoch": 0.05768934135431127, "grad_norm": 0.0, "learning_rate": 1.9960456548979664e-05, "loss": 0.9587, "step": 187 }, { "epoch": 0.057997840505938605, "grad_norm": 0.0, "learning_rate": 1.9959563542595157e-05, "loss": 0.9745, "step": 188 }, { "epoch": 0.058306339657565945, "grad_norm": 0.0, "learning_rate": 1.995866058556516e-05, "loss": 0.9802, "step": 189 }, { "epoch": 0.05861483880919328, "grad_norm": 0.0, "learning_rate": 1.995774767879182e-05, "loss": 0.9877, "step": 190 }, { "epoch": 0.05892333796082061, "grad_norm": 0.0, "learning_rate": 1.995682482318723e-05, "loss": 0.967, "step": 191 }, { "epoch": 0.05923183711244794, "grad_norm": 0.0, "learning_rate": 1.9955892019673412e-05, "loss": 1.065, "step": 192 }, { "epoch": 0.059540336264075275, "grad_norm": 0.0, "learning_rate": 1.9954949269182343e-05, "loss": 0.9254, "step": 193 }, { "epoch": 0.05984883541570261, "grad_norm": 0.0, "learning_rate": 1.9953996572655928e-05, "loss": 0.947, "step": 194 }, { "epoch": 0.06015733456732994, "grad_norm": 0.0, "learning_rate": 1.9953033931046005e-05, "loss": 1.0769, "step": 195 }, { "epoch": 0.06046583371895727, "grad_norm": 0.0, "learning_rate": 1.9952061345314355e-05, "loss": 1.0375, "step": 196 }, { "epoch": 0.060774332870584605, "grad_norm": 0.0, "learning_rate": 1.99510788164327e-05, "loss": 0.9665, "step": 197 }, { "epoch": 0.06108283202221194, "grad_norm": 0.0, "learning_rate": 1.995008634538268e-05, "loss": 0.9598, "step": 198 }, { "epoch": 0.06139133117383927, "grad_norm": 0.0, "learning_rate": 1.994908393315588e-05, "loss": 0.9486, "step": 199 }, { "epoch": 0.0616998303254666, "grad_norm": 0.0, "learning_rate": 1.994807158075382e-05, "loss": 0.9422, "step": 200 }, { "epoch": 0.062008329477093935, "grad_norm": 0.0, "learning_rate": 1.9947049289187942e-05, "loss": 0.8275, "step": 201 }, { "epoch": 0.062316828628721274, "grad_norm": 0.0, "learning_rate": 1.9946017059479617e-05, "loss": 0.9641, "step": 202 }, { "epoch": 0.0626253277803486, "grad_norm": 0.0, "learning_rate": 1.9944974892660158e-05, "loss": 1.0779, "step": 203 }, { "epoch": 0.06293382693197594, "grad_norm": 0.0, "learning_rate": 1.9943922789770797e-05, "loss": 1.0373, "step": 204 }, { "epoch": 0.06324232608360326, "grad_norm": 0.0, "learning_rate": 1.9942860751862696e-05, "loss": 1.0035, "step": 205 }, { "epoch": 0.0635508252352306, "grad_norm": 0.0, "learning_rate": 1.9941788779996937e-05, "loss": 1.0022, "step": 206 }, { "epoch": 0.06385932438685793, "grad_norm": 0.0, "learning_rate": 1.994070687524453e-05, "loss": 0.9576, "step": 207 }, { "epoch": 0.06416782353848527, "grad_norm": 0.0, "learning_rate": 1.9939615038686423e-05, "loss": 0.9669, "step": 208 }, { "epoch": 0.06447632269011261, "grad_norm": 0.0, "learning_rate": 1.9938513271413464e-05, "loss": 0.9042, "step": 209 }, { "epoch": 0.06478482184173993, "grad_norm": 0.0, "learning_rate": 1.993740157452644e-05, "loss": 0.8647, "step": 210 }, { "epoch": 0.06509332099336727, "grad_norm": 0.0, "learning_rate": 1.9936279949136047e-05, "loss": 0.9643, "step": 211 }, { "epoch": 0.0654018201449946, "grad_norm": 0.0, "learning_rate": 1.993514839636291e-05, "loss": 0.8882, "step": 212 }, { "epoch": 0.06571031929662194, "grad_norm": 0.0, "learning_rate": 1.993400691733757e-05, "loss": 1.0237, "step": 213 }, { "epoch": 0.06601881844824926, "grad_norm": 0.0, "learning_rate": 1.993285551320048e-05, "loss": 0.96, "step": 214 }, { "epoch": 0.0663273175998766, "grad_norm": 0.0, "learning_rate": 1.993169418510201e-05, "loss": 0.9605, "step": 215 }, { "epoch": 0.06663581675150393, "grad_norm": 0.0, "learning_rate": 1.9930522934202455e-05, "loss": 0.837, "step": 216 }, { "epoch": 0.06694431590313127, "grad_norm": 0.0, "learning_rate": 1.9929341761672017e-05, "loss": 0.9478, "step": 217 }, { "epoch": 0.0672528150547586, "grad_norm": 0.0, "learning_rate": 1.9928150668690807e-05, "loss": 1.013, "step": 218 }, { "epoch": 0.06756131420638593, "grad_norm": 0.0, "learning_rate": 1.992694965644885e-05, "loss": 0.8715, "step": 219 }, { "epoch": 0.06786981335801326, "grad_norm": 0.0, "learning_rate": 1.9925738726146094e-05, "loss": 0.9242, "step": 220 }, { "epoch": 0.0681783125096406, "grad_norm": 0.0, "learning_rate": 1.992451787899237e-05, "loss": 0.9094, "step": 221 }, { "epoch": 0.06848681166126794, "grad_norm": 0.0, "learning_rate": 1.9923287116207442e-05, "loss": 0.9811, "step": 222 }, { "epoch": 0.06879531081289526, "grad_norm": 0.0, "learning_rate": 1.9922046439020967e-05, "loss": 0.927, "step": 223 }, { "epoch": 0.0691038099645226, "grad_norm": 0.0, "learning_rate": 1.9920795848672512e-05, "loss": 0.9411, "step": 224 }, { "epoch": 0.06941230911614993, "grad_norm": 0.0, "learning_rate": 1.9919535346411546e-05, "loss": 0.9485, "step": 225 }, { "epoch": 0.06972080826777727, "grad_norm": 0.0, "learning_rate": 1.9918264933497444e-05, "loss": 0.97, "step": 226 }, { "epoch": 0.0700293074194046, "grad_norm": 0.0, "learning_rate": 1.9916984611199485e-05, "loss": 0.9, "step": 227 }, { "epoch": 0.07033780657103193, "grad_norm": 0.0, "learning_rate": 1.9915694380796838e-05, "loss": 1.0275, "step": 228 }, { "epoch": 0.07064630572265926, "grad_norm": 0.0, "learning_rate": 1.9914394243578582e-05, "loss": 0.8988, "step": 229 }, { "epoch": 0.0709548048742866, "grad_norm": 0.0, "learning_rate": 1.991308420084369e-05, "loss": 1.0184, "step": 230 }, { "epoch": 0.07126330402591392, "grad_norm": 0.0, "learning_rate": 1.9911764253901032e-05, "loss": 0.9032, "step": 231 }, { "epoch": 0.07157180317754126, "grad_norm": 0.0, "learning_rate": 1.9910434404069368e-05, "loss": 0.9098, "step": 232 }, { "epoch": 0.07188030232916859, "grad_norm": 0.0, "learning_rate": 1.9909094652677364e-05, "loss": 0.9501, "step": 233 }, { "epoch": 0.07218880148079593, "grad_norm": 0.0, "learning_rate": 1.990774500106357e-05, "loss": 0.9101, "step": 234 }, { "epoch": 0.07249730063242327, "grad_norm": 0.0, "learning_rate": 1.9906385450576424e-05, "loss": 0.9228, "step": 235 }, { "epoch": 0.07280579978405059, "grad_norm": 0.0, "learning_rate": 1.9905016002574266e-05, "loss": 1.0251, "step": 236 }, { "epoch": 0.07311429893567793, "grad_norm": 0.0, "learning_rate": 1.9903636658425316e-05, "loss": 0.9231, "step": 237 }, { "epoch": 0.07342279808730526, "grad_norm": 0.0, "learning_rate": 1.990224741950768e-05, "loss": 0.9371, "step": 238 }, { "epoch": 0.0737312972389326, "grad_norm": 0.0, "learning_rate": 1.9900848287209356e-05, "loss": 1.0318, "step": 239 }, { "epoch": 0.07403979639055992, "grad_norm": 0.0, "learning_rate": 1.989943926292822e-05, "loss": 0.9119, "step": 240 }, { "epoch": 0.07434829554218726, "grad_norm": 0.0, "learning_rate": 1.989802034807204e-05, "loss": 1.0122, "step": 241 }, { "epoch": 0.07465679469381459, "grad_norm": 0.0, "learning_rate": 1.9896591544058458e-05, "loss": 1.006, "step": 242 }, { "epoch": 0.07496529384544193, "grad_norm": 0.0, "learning_rate": 1.9895152852314995e-05, "loss": 0.9525, "step": 243 }, { "epoch": 0.07527379299706925, "grad_norm": 0.0, "learning_rate": 1.9893704274279057e-05, "loss": 0.8847, "step": 244 }, { "epoch": 0.07558229214869659, "grad_norm": 0.0, "learning_rate": 1.9892245811397924e-05, "loss": 1.0279, "step": 245 }, { "epoch": 0.07589079130032392, "grad_norm": 0.0, "learning_rate": 1.989077746512876e-05, "loss": 0.9846, "step": 246 }, { "epoch": 0.07619929045195126, "grad_norm": 0.0, "learning_rate": 1.9889299236938585e-05, "loss": 1.023, "step": 247 }, { "epoch": 0.0765077896035786, "grad_norm": 0.0, "learning_rate": 1.9887811128304312e-05, "loss": 0.9629, "step": 248 }, { "epoch": 0.07681628875520592, "grad_norm": 0.0, "learning_rate": 1.9886313140712717e-05, "loss": 1.0206, "step": 249 }, { "epoch": 0.07712478790683326, "grad_norm": 0.0, "learning_rate": 1.988480527566044e-05, "loss": 0.9065, "step": 250 }, { "epoch": 0.07743328705846059, "grad_norm": 0.0, "learning_rate": 1.9883287534654e-05, "loss": 0.951, "step": 251 }, { "epoch": 0.07774178621008793, "grad_norm": 0.0, "learning_rate": 1.988175991920978e-05, "loss": 1.0458, "step": 252 }, { "epoch": 0.07805028536171525, "grad_norm": 0.0, "learning_rate": 1.9880222430854025e-05, "loss": 0.9257, "step": 253 }, { "epoch": 0.07835878451334259, "grad_norm": 0.0, "learning_rate": 1.9878675071122848e-05, "loss": 0.9411, "step": 254 }, { "epoch": 0.07866728366496992, "grad_norm": 0.0, "learning_rate": 1.9877117841562222e-05, "loss": 1.002, "step": 255 }, { "epoch": 0.07897578281659726, "grad_norm": 0.0, "learning_rate": 1.9875550743727982e-05, "loss": 0.8932, "step": 256 }, { "epoch": 0.07928428196822458, "grad_norm": 0.0, "learning_rate": 1.9873973779185828e-05, "loss": 0.9698, "step": 257 }, { "epoch": 0.07959278111985192, "grad_norm": 0.0, "learning_rate": 1.9872386949511308e-05, "loss": 0.8699, "step": 258 }, { "epoch": 0.07990128027147925, "grad_norm": 0.0, "learning_rate": 1.9870790256289827e-05, "loss": 0.9997, "step": 259 }, { "epoch": 0.08020977942310659, "grad_norm": 0.0, "learning_rate": 1.9869183701116655e-05, "loss": 0.9943, "step": 260 }, { "epoch": 0.08051827857473393, "grad_norm": 0.0, "learning_rate": 1.9867567285596903e-05, "loss": 1.0419, "step": 261 }, { "epoch": 0.08082677772636125, "grad_norm": 0.0, "learning_rate": 1.9865941011345547e-05, "loss": 1.3233, "step": 262 }, { "epoch": 0.08113527687798859, "grad_norm": 0.0, "learning_rate": 1.9864304879987397e-05, "loss": 0.8918, "step": 263 }, { "epoch": 0.08144377602961592, "grad_norm": 0.0, "learning_rate": 1.9862658893157124e-05, "loss": 0.9424, "step": 264 }, { "epoch": 0.08175227518124326, "grad_norm": 0.0, "learning_rate": 1.986100305249924e-05, "loss": 0.9601, "step": 265 }, { "epoch": 0.08206077433287058, "grad_norm": 0.0, "learning_rate": 1.9859337359668102e-05, "loss": 0.9477, "step": 266 }, { "epoch": 0.08236927348449792, "grad_norm": 0.0, "learning_rate": 1.9857661816327913e-05, "loss": 0.9606, "step": 267 }, { "epoch": 0.08267777263612525, "grad_norm": 0.0, "learning_rate": 1.9855976424152713e-05, "loss": 1.0228, "step": 268 }, { "epoch": 0.08298627178775259, "grad_norm": 0.0, "learning_rate": 1.9854281184826386e-05, "loss": 0.9454, "step": 269 }, { "epoch": 0.08329477093937991, "grad_norm": 0.0, "learning_rate": 1.9852576100042656e-05, "loss": 0.9233, "step": 270 }, { "epoch": 0.08360327009100725, "grad_norm": 0.0, "learning_rate": 1.985086117150508e-05, "loss": 0.9667, "step": 271 }, { "epoch": 0.08391176924263458, "grad_norm": 0.0, "learning_rate": 1.9849136400927048e-05, "loss": 0.9803, "step": 272 }, { "epoch": 0.08422026839426192, "grad_norm": 0.0, "learning_rate": 1.9847401790031792e-05, "loss": 0.861, "step": 273 }, { "epoch": 0.08452876754588926, "grad_norm": 0.0, "learning_rate": 1.9845657340552366e-05, "loss": 0.9085, "step": 274 }, { "epoch": 0.08483726669751658, "grad_norm": 0.0, "learning_rate": 1.9843903054231653e-05, "loss": 0.996, "step": 275 }, { "epoch": 0.08514576584914392, "grad_norm": 0.0, "learning_rate": 1.9842138932822377e-05, "loss": 0.9158, "step": 276 }, { "epoch": 0.08545426500077125, "grad_norm": 0.0, "learning_rate": 1.9840364978087073e-05, "loss": 0.842, "step": 277 }, { "epoch": 0.08576276415239859, "grad_norm": 0.0, "learning_rate": 1.9838581191798117e-05, "loss": 0.9036, "step": 278 }, { "epoch": 0.08607126330402591, "grad_norm": 0.0, "learning_rate": 1.9836787575737683e-05, "loss": 0.8667, "step": 279 }, { "epoch": 0.08637976245565325, "grad_norm": 0.0, "learning_rate": 1.9834984131697796e-05, "loss": 0.9167, "step": 280 }, { "epoch": 0.08668826160728058, "grad_norm": 0.0, "learning_rate": 1.983317086148027e-05, "loss": 0.9307, "step": 281 }, { "epoch": 0.08699676075890792, "grad_norm": 0.0, "learning_rate": 1.9831347766896766e-05, "loss": 1.3152, "step": 282 }, { "epoch": 0.08730525991053524, "grad_norm": 0.0, "learning_rate": 1.9829514849768737e-05, "loss": 0.9292, "step": 283 }, { "epoch": 0.08761375906216258, "grad_norm": 0.0, "learning_rate": 1.9827672111927466e-05, "loss": 0.9636, "step": 284 }, { "epoch": 0.0879222582137899, "grad_norm": 0.0, "learning_rate": 1.9825819555214035e-05, "loss": 0.9255, "step": 285 }, { "epoch": 0.08823075736541725, "grad_norm": 0.0, "learning_rate": 1.982395718147934e-05, "loss": 0.8859, "step": 286 }, { "epoch": 0.08853925651704458, "grad_norm": 0.0, "learning_rate": 1.9822084992584098e-05, "loss": 0.8519, "step": 287 }, { "epoch": 0.08884775566867191, "grad_norm": 0.0, "learning_rate": 1.982020299039881e-05, "loss": 0.9121, "step": 288 }, { "epoch": 0.08915625482029925, "grad_norm": 0.0, "learning_rate": 1.9818311176803796e-05, "loss": 0.9241, "step": 289 }, { "epoch": 0.08946475397192657, "grad_norm": 0.0, "learning_rate": 1.9816409553689178e-05, "loss": 0.9331, "step": 290 }, { "epoch": 0.08977325312355391, "grad_norm": 0.0, "learning_rate": 1.9814498122954875e-05, "loss": 0.9826, "step": 291 }, { "epoch": 0.09008175227518124, "grad_norm": 0.0, "learning_rate": 1.9812576886510607e-05, "loss": 0.9549, "step": 292 }, { "epoch": 0.09039025142680858, "grad_norm": 0.0, "learning_rate": 1.981064584627589e-05, "loss": 0.9691, "step": 293 }, { "epoch": 0.0906987505784359, "grad_norm": 0.0, "learning_rate": 1.9808705004180032e-05, "loss": 0.907, "step": 294 }, { "epoch": 0.09100724973006324, "grad_norm": 0.0, "learning_rate": 1.9806754362162143e-05, "loss": 0.8657, "step": 295 }, { "epoch": 0.09131574888169057, "grad_norm": 0.0, "learning_rate": 1.9804793922171114e-05, "loss": 1.0198, "step": 296 }, { "epoch": 0.09162424803331791, "grad_norm": 0.0, "learning_rate": 1.980282368616563e-05, "loss": 0.8932, "step": 297 }, { "epoch": 0.09193274718494523, "grad_norm": 0.0, "learning_rate": 1.9800843656114167e-05, "loss": 0.8682, "step": 298 }, { "epoch": 0.09224124633657257, "grad_norm": 0.0, "learning_rate": 1.9798853833994975e-05, "loss": 0.9221, "step": 299 }, { "epoch": 0.09254974548819991, "grad_norm": 0.0, "learning_rate": 1.9796854221796097e-05, "loss": 1.0219, "step": 300 }, { "epoch": 0.09285824463982724, "grad_norm": 0.0, "learning_rate": 1.979484482151536e-05, "loss": 1.0476, "step": 301 }, { "epoch": 0.09316674379145458, "grad_norm": 0.0, "learning_rate": 1.9792825635160357e-05, "loss": 0.8485, "step": 302 }, { "epoch": 0.0934752429430819, "grad_norm": 0.0, "learning_rate": 1.979079666474847e-05, "loss": 0.9505, "step": 303 }, { "epoch": 0.09378374209470924, "grad_norm": 0.0, "learning_rate": 1.9788757912306856e-05, "loss": 0.926, "step": 304 }, { "epoch": 0.09409224124633657, "grad_norm": 0.0, "learning_rate": 1.9786709379872436e-05, "loss": 0.9169, "step": 305 }, { "epoch": 0.09440074039796391, "grad_norm": 0.0, "learning_rate": 1.9784651069491914e-05, "loss": 0.8907, "step": 306 }, { "epoch": 0.09470923954959123, "grad_norm": 0.0, "learning_rate": 1.978258298322175e-05, "loss": 0.9022, "step": 307 }, { "epoch": 0.09501773870121857, "grad_norm": 0.0, "learning_rate": 1.9780505123128187e-05, "loss": 0.8888, "step": 308 }, { "epoch": 0.0953262378528459, "grad_norm": 0.0, "learning_rate": 1.9778417491287217e-05, "loss": 0.967, "step": 309 }, { "epoch": 0.09563473700447324, "grad_norm": 0.0, "learning_rate": 1.9776320089784605e-05, "loss": 0.9804, "step": 310 }, { "epoch": 0.09594323615610056, "grad_norm": 0.0, "learning_rate": 1.9774212920715876e-05, "loss": 0.9284, "step": 311 }, { "epoch": 0.0962517353077279, "grad_norm": 0.0, "learning_rate": 1.977209598618631e-05, "loss": 0.9764, "step": 312 }, { "epoch": 0.09656023445935524, "grad_norm": 0.0, "learning_rate": 1.976996928831095e-05, "loss": 0.9649, "step": 313 }, { "epoch": 0.09686873361098257, "grad_norm": 0.0, "learning_rate": 1.9767832829214588e-05, "loss": 0.9338, "step": 314 }, { "epoch": 0.09717723276260991, "grad_norm": 0.0, "learning_rate": 1.976568661103176e-05, "loss": 0.9494, "step": 315 }, { "epoch": 0.09748573191423723, "grad_norm": 0.0, "learning_rate": 1.976353063590678e-05, "loss": 0.9908, "step": 316 }, { "epoch": 0.09779423106586457, "grad_norm": 0.0, "learning_rate": 1.976136490599368e-05, "loss": 0.9387, "step": 317 }, { "epoch": 0.0981027302174919, "grad_norm": 0.0, "learning_rate": 1.975918942345626e-05, "loss": 0.8793, "step": 318 }, { "epoch": 0.09841122936911924, "grad_norm": 0.0, "learning_rate": 1.975700419046804e-05, "loss": 0.8569, "step": 319 }, { "epoch": 0.09871972852074656, "grad_norm": 0.0, "learning_rate": 1.975480920921231e-05, "loss": 1.0324, "step": 320 }, { "epoch": 0.0990282276723739, "grad_norm": 0.0, "learning_rate": 1.975260448188208e-05, "loss": 0.9234, "step": 321 }, { "epoch": 0.09933672682400123, "grad_norm": 0.0, "learning_rate": 1.975039001068011e-05, "loss": 0.9475, "step": 322 }, { "epoch": 0.09964522597562857, "grad_norm": 0.0, "learning_rate": 1.9748165797818875e-05, "loss": 0.9887, "step": 323 }, { "epoch": 0.0999537251272559, "grad_norm": 0.0, "learning_rate": 1.974593184552061e-05, "loss": 0.9338, "step": 324 }, { "epoch": 0.10026222427888323, "grad_norm": 0.0, "learning_rate": 1.9743688156017254e-05, "loss": 0.9126, "step": 325 }, { "epoch": 0.10057072343051057, "grad_norm": 0.0, "learning_rate": 1.9741434731550498e-05, "loss": 0.9219, "step": 326 }, { "epoch": 0.1008792225821379, "grad_norm": 0.0, "learning_rate": 1.9739171574371744e-05, "loss": 0.8622, "step": 327 }, { "epoch": 0.10118772173376524, "grad_norm": 0.0, "learning_rate": 1.9736898686742125e-05, "loss": 0.9795, "step": 328 }, { "epoch": 0.10149622088539256, "grad_norm": 0.0, "learning_rate": 1.973461607093249e-05, "loss": 0.8908, "step": 329 }, { "epoch": 0.1018047200370199, "grad_norm": 0.0, "learning_rate": 1.9732323729223414e-05, "loss": 0.9311, "step": 330 }, { "epoch": 0.10211321918864723, "grad_norm": 0.0, "learning_rate": 1.9730021663905186e-05, "loss": 0.9054, "step": 331 }, { "epoch": 0.10242171834027457, "grad_norm": 0.0, "learning_rate": 1.972770987727781e-05, "loss": 0.9287, "step": 332 }, { "epoch": 0.10273021749190189, "grad_norm": 0.0, "learning_rate": 1.9725388371651e-05, "loss": 0.9404, "step": 333 }, { "epoch": 0.10303871664352923, "grad_norm": 0.0, "learning_rate": 1.9723057149344184e-05, "loss": 0.9188, "step": 334 }, { "epoch": 0.10334721579515656, "grad_norm": 0.0, "learning_rate": 1.97207162126865e-05, "loss": 1.0049, "step": 335 }, { "epoch": 0.1036557149467839, "grad_norm": 0.0, "learning_rate": 1.9718365564016785e-05, "loss": 0.9919, "step": 336 }, { "epoch": 0.10396421409841122, "grad_norm": 0.0, "learning_rate": 1.971600520568358e-05, "loss": 0.8696, "step": 337 }, { "epoch": 0.10427271325003856, "grad_norm": 0.0, "learning_rate": 1.9713635140045134e-05, "loss": 0.9094, "step": 338 }, { "epoch": 0.1045812124016659, "grad_norm": 0.0, "learning_rate": 1.9711255369469394e-05, "loss": 0.9393, "step": 339 }, { "epoch": 0.10488971155329323, "grad_norm": 0.0, "learning_rate": 1.9708865896333993e-05, "loss": 0.9828, "step": 340 }, { "epoch": 0.10519821070492057, "grad_norm": 0.0, "learning_rate": 1.970646672302627e-05, "loss": 0.8656, "step": 341 }, { "epoch": 0.10550670985654789, "grad_norm": 0.0, "learning_rate": 1.9704057851943244e-05, "loss": 0.9252, "step": 342 }, { "epoch": 0.10581520900817523, "grad_norm": 0.0, "learning_rate": 1.9701639285491633e-05, "loss": 0.8904, "step": 343 }, { "epoch": 0.10612370815980256, "grad_norm": 0.0, "learning_rate": 1.969921102608784e-05, "loss": 0.906, "step": 344 }, { "epoch": 0.1064322073114299, "grad_norm": 0.0, "learning_rate": 1.9696773076157942e-05, "loss": 1.0674, "step": 345 }, { "epoch": 0.10674070646305722, "grad_norm": 0.0, "learning_rate": 1.9694325438137716e-05, "loss": 0.9022, "step": 346 }, { "epoch": 0.10704920561468456, "grad_norm": 0.0, "learning_rate": 1.9691868114472602e-05, "loss": 0.9366, "step": 347 }, { "epoch": 0.10735770476631189, "grad_norm": 0.0, "learning_rate": 1.9689401107617722e-05, "loss": 0.8698, "step": 348 }, { "epoch": 0.10766620391793923, "grad_norm": 0.0, "learning_rate": 1.9686924420037877e-05, "loss": 0.9393, "step": 349 }, { "epoch": 0.10797470306956655, "grad_norm": 0.0, "learning_rate": 1.968443805420754e-05, "loss": 0.9386, "step": 350 }, { "epoch": 0.10828320222119389, "grad_norm": 0.0, "learning_rate": 1.9681942012610847e-05, "loss": 0.8884, "step": 351 }, { "epoch": 0.10859170137282123, "grad_norm": 0.0, "learning_rate": 1.96794362977416e-05, "loss": 0.9948, "step": 352 }, { "epoch": 0.10890020052444856, "grad_norm": 0.0, "learning_rate": 1.9676920912103278e-05, "loss": 0.9183, "step": 353 }, { "epoch": 0.1092086996760759, "grad_norm": 0.0, "learning_rate": 1.9674395858209014e-05, "loss": 0.9598, "step": 354 }, { "epoch": 0.10951719882770322, "grad_norm": 0.0, "learning_rate": 1.9671861138581594e-05, "loss": 0.9646, "step": 355 }, { "epoch": 0.10982569797933056, "grad_norm": 0.0, "learning_rate": 1.966931675575347e-05, "loss": 0.9969, "step": 356 }, { "epoch": 0.11013419713095789, "grad_norm": 0.0, "learning_rate": 1.966676271226675e-05, "loss": 0.9328, "step": 357 }, { "epoch": 0.11044269628258523, "grad_norm": 0.0, "learning_rate": 1.9664199010673192e-05, "loss": 0.8916, "step": 358 }, { "epoch": 0.11075119543421255, "grad_norm": 0.0, "learning_rate": 1.9661625653534196e-05, "loss": 0.9142, "step": 359 }, { "epoch": 0.11105969458583989, "grad_norm": 0.0, "learning_rate": 1.9659042643420817e-05, "loss": 0.9221, "step": 360 }, { "epoch": 0.11136819373746722, "grad_norm": 0.0, "learning_rate": 1.9656449982913757e-05, "loss": 0.9171, "step": 361 }, { "epoch": 0.11167669288909456, "grad_norm": 0.0, "learning_rate": 1.9653847674603348e-05, "loss": 0.8394, "step": 362 }, { "epoch": 0.11198519204072188, "grad_norm": 0.0, "learning_rate": 1.9651235721089575e-05, "loss": 0.8754, "step": 363 }, { "epoch": 0.11229369119234922, "grad_norm": 0.0, "learning_rate": 1.9648614124982044e-05, "loss": 0.9579, "step": 364 }, { "epoch": 0.11260219034397656, "grad_norm": 0.0, "learning_rate": 1.964598288890001e-05, "loss": 0.9248, "step": 365 }, { "epoch": 0.11291068949560389, "grad_norm": 0.0, "learning_rate": 1.964334201547235e-05, "loss": 1.0278, "step": 366 }, { "epoch": 0.11321918864723123, "grad_norm": 0.0, "learning_rate": 1.964069150733758e-05, "loss": 0.8592, "step": 367 }, { "epoch": 0.11352768779885855, "grad_norm": 0.0, "learning_rate": 1.963803136714382e-05, "loss": 0.9319, "step": 368 }, { "epoch": 0.11383618695048589, "grad_norm": 0.0, "learning_rate": 1.9635361597548844e-05, "loss": 1.0014, "step": 369 }, { "epoch": 0.11414468610211322, "grad_norm": 0.0, "learning_rate": 1.9632682201220022e-05, "loss": 0.9013, "step": 370 }, { "epoch": 0.11445318525374056, "grad_norm": 0.0, "learning_rate": 1.9629993180834356e-05, "loss": 0.8994, "step": 371 }, { "epoch": 0.11476168440536788, "grad_norm": 0.0, "learning_rate": 1.9627294539078454e-05, "loss": 0.901, "step": 372 }, { "epoch": 0.11507018355699522, "grad_norm": 0.0, "learning_rate": 1.9624586278648544e-05, "loss": 1.0434, "step": 373 }, { "epoch": 0.11537868270862255, "grad_norm": 0.0, "learning_rate": 1.962186840225046e-05, "loss": 0.9425, "step": 374 }, { "epoch": 0.11568718186024989, "grad_norm": 0.0, "learning_rate": 1.9619140912599643e-05, "loss": 0.89, "step": 375 }, { "epoch": 0.11599568101187721, "grad_norm": 0.0, "learning_rate": 1.9616403812421147e-05, "loss": 0.997, "step": 376 }, { "epoch": 0.11630418016350455, "grad_norm": 0.0, "learning_rate": 1.9613657104449615e-05, "loss": 0.866, "step": 377 }, { "epoch": 0.11661267931513189, "grad_norm": 0.0, "learning_rate": 1.96109007914293e-05, "loss": 0.9479, "step": 378 }, { "epoch": 0.11692117846675922, "grad_norm": 0.0, "learning_rate": 1.960813487611404e-05, "loss": 0.9128, "step": 379 }, { "epoch": 0.11722967761838655, "grad_norm": 0.0, "learning_rate": 1.9605359361267282e-05, "loss": 0.9501, "step": 380 }, { "epoch": 0.11753817677001388, "grad_norm": 0.0, "learning_rate": 1.960257424966205e-05, "loss": 0.9546, "step": 381 }, { "epoch": 0.11784667592164122, "grad_norm": 0.0, "learning_rate": 1.9599779544080966e-05, "loss": 0.9422, "step": 382 }, { "epoch": 0.11815517507326855, "grad_norm": 0.0, "learning_rate": 1.9596975247316226e-05, "loss": 0.949, "step": 383 }, { "epoch": 0.11846367422489588, "grad_norm": 0.0, "learning_rate": 1.9594161362169627e-05, "loss": 0.9639, "step": 384 }, { "epoch": 0.11877217337652321, "grad_norm": 0.0, "learning_rate": 1.9591337891452525e-05, "loss": 0.8488, "step": 385 }, { "epoch": 0.11908067252815055, "grad_norm": 0.0, "learning_rate": 1.958850483798586e-05, "loss": 0.9729, "step": 386 }, { "epoch": 0.11938917167977788, "grad_norm": 0.0, "learning_rate": 1.958566220460016e-05, "loss": 0.944, "step": 387 }, { "epoch": 0.11969767083140521, "grad_norm": 0.0, "learning_rate": 1.9582809994135505e-05, "loss": 0.996, "step": 388 }, { "epoch": 0.12000616998303254, "grad_norm": 0.0, "learning_rate": 1.9579948209441558e-05, "loss": 0.891, "step": 389 }, { "epoch": 0.12031466913465988, "grad_norm": 0.0, "learning_rate": 1.957707685337753e-05, "loss": 0.933, "step": 390 }, { "epoch": 0.12062316828628722, "grad_norm": 0.0, "learning_rate": 1.957419592881222e-05, "loss": 0.9058, "step": 391 }, { "epoch": 0.12093166743791454, "grad_norm": 0.0, "learning_rate": 1.957130543862396e-05, "loss": 0.9237, "step": 392 }, { "epoch": 0.12124016658954188, "grad_norm": 0.0, "learning_rate": 1.9568405385700658e-05, "loss": 0.9637, "step": 393 }, { "epoch": 0.12154866574116921, "grad_norm": 0.0, "learning_rate": 1.956549577293977e-05, "loss": 1.0328, "step": 394 }, { "epoch": 0.12185716489279655, "grad_norm": 0.0, "learning_rate": 1.9562576603248306e-05, "loss": 0.9848, "step": 395 }, { "epoch": 0.12216566404442387, "grad_norm": 0.0, "learning_rate": 1.9559647879542817e-05, "loss": 0.973, "step": 396 }, { "epoch": 0.12247416319605121, "grad_norm": 0.0, "learning_rate": 1.9556709604749408e-05, "loss": 0.9057, "step": 397 }, { "epoch": 0.12278266234767854, "grad_norm": 0.0, "learning_rate": 1.9553761781803718e-05, "loss": 0.8314, "step": 398 }, { "epoch": 0.12309116149930588, "grad_norm": 0.0, "learning_rate": 1.9550804413650935e-05, "loss": 0.9103, "step": 399 }, { "epoch": 0.1233996606509332, "grad_norm": 0.0, "learning_rate": 1.954783750324578e-05, "loss": 0.9178, "step": 400 }, { "epoch": 0.12370815980256054, "grad_norm": 0.0, "learning_rate": 1.9544861053552503e-05, "loss": 0.9996, "step": 401 }, { "epoch": 0.12401665895418787, "grad_norm": 0.0, "learning_rate": 1.954187506754489e-05, "loss": 0.9381, "step": 402 }, { "epoch": 0.12432515810581521, "grad_norm": 0.0, "learning_rate": 1.953887954820625e-05, "loss": 0.9707, "step": 403 }, { "epoch": 0.12463365725744255, "grad_norm": 0.0, "learning_rate": 1.9535874498529423e-05, "loss": 0.8752, "step": 404 }, { "epoch": 0.12494215640906987, "grad_norm": 0.0, "learning_rate": 1.953285992151677e-05, "loss": 0.9752, "step": 405 }, { "epoch": 0.1252506555606972, "grad_norm": 0.0, "learning_rate": 1.9529835820180166e-05, "loss": 0.9104, "step": 406 }, { "epoch": 0.12555915471232454, "grad_norm": 0.0, "learning_rate": 1.952680219754101e-05, "loss": 0.9053, "step": 407 }, { "epoch": 0.12586765386395188, "grad_norm": 0.0, "learning_rate": 1.9523759056630196e-05, "loss": 0.8727, "step": 408 }, { "epoch": 0.12617615301557922, "grad_norm": 0.0, "learning_rate": 1.952070640048815e-05, "loss": 0.871, "step": 409 }, { "epoch": 0.12648465216720653, "grad_norm": 0.0, "learning_rate": 1.9517644232164793e-05, "loss": 0.8591, "step": 410 }, { "epoch": 0.12679315131883387, "grad_norm": 0.0, "learning_rate": 1.951457255471955e-05, "loss": 0.9535, "step": 411 }, { "epoch": 0.1271016504704612, "grad_norm": 0.0, "learning_rate": 1.9511491371221347e-05, "loss": 0.9831, "step": 412 }, { "epoch": 0.12741014962208855, "grad_norm": 0.0, "learning_rate": 1.9508400684748615e-05, "loss": 0.9473, "step": 413 }, { "epoch": 0.12771864877371586, "grad_norm": 0.0, "learning_rate": 1.950530049838926e-05, "loss": 1.0712, "step": 414 }, { "epoch": 0.1280271479253432, "grad_norm": 0.0, "learning_rate": 1.9502190815240708e-05, "loss": 0.9336, "step": 415 }, { "epoch": 0.12833564707697054, "grad_norm": 0.0, "learning_rate": 1.9499071638409845e-05, "loss": 0.8945, "step": 416 }, { "epoch": 0.12864414622859788, "grad_norm": 0.0, "learning_rate": 1.949594297101306e-05, "loss": 0.8821, "step": 417 }, { "epoch": 0.12895264538022522, "grad_norm": 0.0, "learning_rate": 1.9492804816176223e-05, "loss": 0.8441, "step": 418 }, { "epoch": 0.12926114453185253, "grad_norm": 0.0, "learning_rate": 1.9489657177034673e-05, "loss": 0.8545, "step": 419 }, { "epoch": 0.12956964368347987, "grad_norm": 0.0, "learning_rate": 1.948650005673323e-05, "loss": 0.9616, "step": 420 }, { "epoch": 0.1298781428351072, "grad_norm": 0.0, "learning_rate": 1.9483333458426192e-05, "loss": 1.0049, "step": 421 }, { "epoch": 0.13018664198673455, "grad_norm": 0.0, "learning_rate": 1.948015738527732e-05, "loss": 0.9311, "step": 422 }, { "epoch": 0.13049514113836186, "grad_norm": 0.0, "learning_rate": 1.947697184045984e-05, "loss": 0.9357, "step": 423 }, { "epoch": 0.1308036402899892, "grad_norm": 0.0, "learning_rate": 1.947377682715645e-05, "loss": 0.8408, "step": 424 }, { "epoch": 0.13111213944161654, "grad_norm": 0.0, "learning_rate": 1.9470572348559295e-05, "loss": 0.885, "step": 425 }, { "epoch": 0.13142063859324388, "grad_norm": 0.0, "learning_rate": 1.946735840786999e-05, "loss": 0.9128, "step": 426 }, { "epoch": 0.1317291377448712, "grad_norm": 0.0, "learning_rate": 1.94641350082996e-05, "loss": 0.9241, "step": 427 }, { "epoch": 0.13203763689649853, "grad_norm": 0.0, "learning_rate": 1.9460902153068633e-05, "loss": 0.975, "step": 428 }, { "epoch": 0.13234613604812587, "grad_norm": 0.0, "learning_rate": 1.945765984540705e-05, "loss": 0.8414, "step": 429 }, { "epoch": 0.1326546351997532, "grad_norm": 0.0, "learning_rate": 1.945440808855426e-05, "loss": 0.9713, "step": 430 }, { "epoch": 0.13296313435138055, "grad_norm": 0.0, "learning_rate": 1.94511468857591e-05, "loss": 0.9718, "step": 431 }, { "epoch": 0.13327163350300786, "grad_norm": 0.0, "learning_rate": 1.944787624027986e-05, "loss": 0.8874, "step": 432 }, { "epoch": 0.1335801326546352, "grad_norm": 0.0, "learning_rate": 1.9444596155384253e-05, "loss": 0.9717, "step": 433 }, { "epoch": 0.13388863180626254, "grad_norm": 0.0, "learning_rate": 1.944130663434943e-05, "loss": 0.9274, "step": 434 }, { "epoch": 0.13419713095788988, "grad_norm": 0.0, "learning_rate": 1.9438007680461965e-05, "loss": 0.879, "step": 435 }, { "epoch": 0.1345056301095172, "grad_norm": 0.0, "learning_rate": 1.9434699297017855e-05, "loss": 0.8611, "step": 436 }, { "epoch": 0.13481412926114453, "grad_norm": 0.0, "learning_rate": 1.9431381487322527e-05, "loss": 0.9236, "step": 437 }, { "epoch": 0.13512262841277187, "grad_norm": 0.0, "learning_rate": 1.9428054254690812e-05, "loss": 0.9343, "step": 438 }, { "epoch": 0.1354311275643992, "grad_norm": 0.0, "learning_rate": 1.9424717602446973e-05, "loss": 0.831, "step": 439 }, { "epoch": 0.13573962671602652, "grad_norm": 0.0, "learning_rate": 1.9421371533924664e-05, "loss": 0.8447, "step": 440 }, { "epoch": 0.13604812586765386, "grad_norm": 0.0, "learning_rate": 1.9418016052466965e-05, "loss": 0.9319, "step": 441 }, { "epoch": 0.1363566250192812, "grad_norm": 0.0, "learning_rate": 1.941465116142635e-05, "loss": 1.0648, "step": 442 }, { "epoch": 0.13666512417090854, "grad_norm": 0.0, "learning_rate": 1.94112768641647e-05, "loss": 0.8477, "step": 443 }, { "epoch": 0.13697362332253588, "grad_norm": 0.0, "learning_rate": 1.9407893164053285e-05, "loss": 0.8829, "step": 444 }, { "epoch": 0.1372821224741632, "grad_norm": 0.0, "learning_rate": 1.940450006447278e-05, "loss": 0.9168, "step": 445 }, { "epoch": 0.13759062162579053, "grad_norm": 0.0, "learning_rate": 1.9401097568813244e-05, "loss": 0.936, "step": 446 }, { "epoch": 0.13789912077741787, "grad_norm": 0.0, "learning_rate": 1.939768568047413e-05, "loss": 0.8714, "step": 447 }, { "epoch": 0.1382076199290452, "grad_norm": 0.0, "learning_rate": 1.9394264402864265e-05, "loss": 0.9071, "step": 448 }, { "epoch": 0.13851611908067252, "grad_norm": 0.0, "learning_rate": 1.939083373940187e-05, "loss": 0.8958, "step": 449 }, { "epoch": 0.13882461823229986, "grad_norm": 0.0, "learning_rate": 1.9387393693514532e-05, "loss": 0.9762, "step": 450 }, { "epoch": 0.1391331173839272, "grad_norm": 0.0, "learning_rate": 1.9383944268639213e-05, "loss": 0.942, "step": 451 }, { "epoch": 0.13944161653555454, "grad_norm": 0.0, "learning_rate": 1.9380485468222257e-05, "loss": 0.9979, "step": 452 }, { "epoch": 0.13975011568718185, "grad_norm": 0.0, "learning_rate": 1.9377017295719362e-05, "loss": 0.9849, "step": 453 }, { "epoch": 0.1400586148388092, "grad_norm": 0.0, "learning_rate": 1.9373539754595598e-05, "loss": 0.9884, "step": 454 }, { "epoch": 0.14036711399043653, "grad_norm": 0.0, "learning_rate": 1.9370052848325392e-05, "loss": 0.9648, "step": 455 }, { "epoch": 0.14067561314206387, "grad_norm": 0.0, "learning_rate": 1.9366556580392527e-05, "loss": 0.893, "step": 456 }, { "epoch": 0.1409841122936912, "grad_norm": 0.0, "learning_rate": 1.936305095429014e-05, "loss": 0.8304, "step": 457 }, { "epoch": 0.14129261144531852, "grad_norm": 0.0, "learning_rate": 1.935953597352072e-05, "loss": 0.936, "step": 458 }, { "epoch": 0.14160111059694586, "grad_norm": 0.0, "learning_rate": 1.9356011641596096e-05, "loss": 0.8423, "step": 459 }, { "epoch": 0.1419096097485732, "grad_norm": 0.0, "learning_rate": 1.9352477962037448e-05, "loss": 0.9692, "step": 460 }, { "epoch": 0.14221810890020053, "grad_norm": 0.0, "learning_rate": 1.934893493837529e-05, "loss": 0.9955, "step": 461 }, { "epoch": 0.14252660805182785, "grad_norm": 0.0, "learning_rate": 1.9345382574149473e-05, "loss": 1.0374, "step": 462 }, { "epoch": 0.14283510720345519, "grad_norm": 0.0, "learning_rate": 1.9341820872909184e-05, "loss": 0.9431, "step": 463 }, { "epoch": 0.14314360635508253, "grad_norm": 0.0, "learning_rate": 1.933824983821293e-05, "loss": 0.951, "step": 464 }, { "epoch": 0.14345210550670986, "grad_norm": 0.0, "learning_rate": 1.933466947362855e-05, "loss": 0.8325, "step": 465 }, { "epoch": 0.14376060465833718, "grad_norm": 0.0, "learning_rate": 1.9331079782733204e-05, "loss": 0.859, "step": 466 }, { "epoch": 0.14406910380996452, "grad_norm": 0.0, "learning_rate": 1.9327480769113366e-05, "loss": 0.8799, "step": 467 }, { "epoch": 0.14437760296159186, "grad_norm": 0.0, "learning_rate": 1.9323872436364825e-05, "loss": 0.8794, "step": 468 }, { "epoch": 0.1446861021132192, "grad_norm": 0.0, "learning_rate": 1.932025478809269e-05, "loss": 0.928, "step": 469 }, { "epoch": 0.14499460126484653, "grad_norm": 0.0, "learning_rate": 1.9316627827911366e-05, "loss": 0.895, "step": 470 }, { "epoch": 0.14530310041647385, "grad_norm": 0.0, "learning_rate": 1.9312991559444565e-05, "loss": 0.9044, "step": 471 }, { "epoch": 0.14561159956810119, "grad_norm": 0.0, "learning_rate": 1.9309345986325298e-05, "loss": 0.975, "step": 472 }, { "epoch": 0.14592009871972852, "grad_norm": 0.0, "learning_rate": 1.9305691112195876e-05, "loss": 0.9109, "step": 473 }, { "epoch": 0.14622859787135586, "grad_norm": 0.0, "learning_rate": 1.9302026940707903e-05, "loss": 0.8694, "step": 474 }, { "epoch": 0.14653709702298318, "grad_norm": 0.0, "learning_rate": 1.9298353475522263e-05, "loss": 0.8612, "step": 475 }, { "epoch": 0.14684559617461052, "grad_norm": 0.0, "learning_rate": 1.929467072030914e-05, "loss": 0.9448, "step": 476 }, { "epoch": 0.14715409532623785, "grad_norm": 0.0, "learning_rate": 1.9290978678747984e-05, "loss": 0.9417, "step": 477 }, { "epoch": 0.1474625944778652, "grad_norm": 0.0, "learning_rate": 1.9287277354527535e-05, "loss": 0.8778, "step": 478 }, { "epoch": 0.1477710936294925, "grad_norm": 0.0, "learning_rate": 1.92835667513458e-05, "loss": 0.9273, "step": 479 }, { "epoch": 0.14807959278111985, "grad_norm": 0.0, "learning_rate": 1.927984687291006e-05, "loss": 0.9406, "step": 480 }, { "epoch": 0.14838809193274718, "grad_norm": 0.0, "learning_rate": 1.9276117722936867e-05, "loss": 0.9262, "step": 481 }, { "epoch": 0.14869659108437452, "grad_norm": 0.0, "learning_rate": 1.9272379305152026e-05, "loss": 1.0137, "step": 482 }, { "epoch": 0.14900509023600186, "grad_norm": 0.0, "learning_rate": 1.926863162329061e-05, "loss": 0.8827, "step": 483 }, { "epoch": 0.14931358938762918, "grad_norm": 0.0, "learning_rate": 1.9264874681096948e-05, "loss": 0.8382, "step": 484 }, { "epoch": 0.14962208853925651, "grad_norm": 0.0, "learning_rate": 1.9261108482324612e-05, "loss": 0.8825, "step": 485 }, { "epoch": 0.14993058769088385, "grad_norm": 0.0, "learning_rate": 1.9257333030736433e-05, "loss": 0.8738, "step": 486 }, { "epoch": 0.1502390868425112, "grad_norm": 0.0, "learning_rate": 1.925354833010448e-05, "loss": 0.9005, "step": 487 }, { "epoch": 0.1505475859941385, "grad_norm": 0.0, "learning_rate": 1.9249754384210066e-05, "loss": 0.9739, "step": 488 }, { "epoch": 0.15085608514576584, "grad_norm": 0.0, "learning_rate": 1.9245951196843736e-05, "loss": 0.9109, "step": 489 }, { "epoch": 0.15116458429739318, "grad_norm": 0.0, "learning_rate": 1.924213877180528e-05, "loss": 0.9328, "step": 490 }, { "epoch": 0.15147308344902052, "grad_norm": 0.0, "learning_rate": 1.92383171129037e-05, "loss": 0.9959, "step": 491 }, { "epoch": 0.15178158260064784, "grad_norm": 0.0, "learning_rate": 1.9234486223957238e-05, "loss": 0.8328, "step": 492 }, { "epoch": 0.15209008175227517, "grad_norm": 0.0, "learning_rate": 1.9230646108793353e-05, "loss": 1.26, "step": 493 }, { "epoch": 0.15239858090390251, "grad_norm": 0.0, "learning_rate": 1.922679677124872e-05, "loss": 0.9531, "step": 494 }, { "epoch": 0.15270708005552985, "grad_norm": 0.0, "learning_rate": 1.9222938215169227e-05, "loss": 0.8436, "step": 495 }, { "epoch": 0.1530155792071572, "grad_norm": 0.0, "learning_rate": 1.921907044440998e-05, "loss": 0.909, "step": 496 }, { "epoch": 0.1533240783587845, "grad_norm": 0.0, "learning_rate": 1.9215193462835285e-05, "loss": 1.0067, "step": 497 }, { "epoch": 0.15363257751041184, "grad_norm": 0.0, "learning_rate": 1.921130727431865e-05, "loss": 0.8809, "step": 498 }, { "epoch": 0.15394107666203918, "grad_norm": 0.0, "learning_rate": 1.9207411882742784e-05, "loss": 1.0474, "step": 499 }, { "epoch": 0.15424957581366652, "grad_norm": 0.0, "learning_rate": 1.9203507291999598e-05, "loss": 0.9858, "step": 500 }, { "epoch": 0.15455807496529383, "grad_norm": 0.0, "learning_rate": 1.9199593505990173e-05, "loss": 0.9519, "step": 501 }, { "epoch": 0.15486657411692117, "grad_norm": 0.0, "learning_rate": 1.91956705286248e-05, "loss": 0.9109, "step": 502 }, { "epoch": 0.1551750732685485, "grad_norm": 0.0, "learning_rate": 1.9191738363822943e-05, "loss": 0.9116, "step": 503 }, { "epoch": 0.15548357242017585, "grad_norm": 0.0, "learning_rate": 1.9187797015513244e-05, "loss": 0.9334, "step": 504 }, { "epoch": 0.15579207157180316, "grad_norm": 0.0, "learning_rate": 1.9183846487633524e-05, "loss": 0.9153, "step": 505 }, { "epoch": 0.1561005707234305, "grad_norm": 0.0, "learning_rate": 1.9179886784130767e-05, "loss": 0.9827, "step": 506 }, { "epoch": 0.15640906987505784, "grad_norm": 0.0, "learning_rate": 1.917591790896114e-05, "loss": 0.9264, "step": 507 }, { "epoch": 0.15671756902668518, "grad_norm": 0.0, "learning_rate": 1.917193986608996e-05, "loss": 0.8661, "step": 508 }, { "epoch": 0.15702606817831252, "grad_norm": 0.0, "learning_rate": 1.9167952659491703e-05, "loss": 1.0055, "step": 509 }, { "epoch": 0.15733456732993983, "grad_norm": 0.0, "learning_rate": 1.9163956293150017e-05, "loss": 0.984, "step": 510 }, { "epoch": 0.15764306648156717, "grad_norm": 0.0, "learning_rate": 1.915995077105768e-05, "loss": 1.028, "step": 511 }, { "epoch": 0.1579515656331945, "grad_norm": 0.0, "learning_rate": 1.915593609721663e-05, "loss": 0.9713, "step": 512 }, { "epoch": 0.15826006478482185, "grad_norm": 0.0, "learning_rate": 1.9151912275637946e-05, "loss": 0.941, "step": 513 }, { "epoch": 0.15856856393644916, "grad_norm": 0.0, "learning_rate": 1.914787931034185e-05, "loss": 0.9633, "step": 514 }, { "epoch": 0.1588770630880765, "grad_norm": 0.0, "learning_rate": 1.914383720535769e-05, "loss": 0.9754, "step": 515 }, { "epoch": 0.15918556223970384, "grad_norm": 0.0, "learning_rate": 1.9139785964723955e-05, "loss": 0.9942, "step": 516 }, { "epoch": 0.15949406139133118, "grad_norm": 0.0, "learning_rate": 1.913572559248826e-05, "loss": 1.3288, "step": 517 }, { "epoch": 0.1598025605429585, "grad_norm": 0.0, "learning_rate": 1.9131656092707337e-05, "loss": 0.8349, "step": 518 }, { "epoch": 0.16011105969458583, "grad_norm": 0.0, "learning_rate": 1.9127577469447045e-05, "loss": 1.0034, "step": 519 }, { "epoch": 0.16041955884621317, "grad_norm": 0.0, "learning_rate": 1.912348972678235e-05, "loss": 1.005, "step": 520 }, { "epoch": 0.1607280579978405, "grad_norm": 0.0, "learning_rate": 1.9119392868797347e-05, "loss": 0.8971, "step": 521 }, { "epoch": 0.16103655714946785, "grad_norm": 0.0, "learning_rate": 1.9115286899585214e-05, "loss": 0.9835, "step": 522 }, { "epoch": 0.16134505630109516, "grad_norm": 0.0, "learning_rate": 1.9111171823248243e-05, "loss": 0.9274, "step": 523 }, { "epoch": 0.1616535554527225, "grad_norm": 0.0, "learning_rate": 1.9107047643897835e-05, "loss": 0.8566, "step": 524 }, { "epoch": 0.16196205460434984, "grad_norm": 0.0, "learning_rate": 1.9102914365654465e-05, "loss": 0.933, "step": 525 }, { "epoch": 0.16227055375597718, "grad_norm": 0.0, "learning_rate": 1.909877199264772e-05, "loss": 0.9228, "step": 526 }, { "epoch": 0.1625790529076045, "grad_norm": 0.0, "learning_rate": 1.909462052901626e-05, "loss": 0.9014, "step": 527 }, { "epoch": 0.16288755205923183, "grad_norm": 0.0, "learning_rate": 1.909045997890783e-05, "loss": 1.0034, "step": 528 }, { "epoch": 0.16319605121085917, "grad_norm": 0.0, "learning_rate": 1.9086290346479254e-05, "loss": 0.906, "step": 529 }, { "epoch": 0.1635045503624865, "grad_norm": 0.0, "learning_rate": 1.9082111635896438e-05, "loss": 0.9278, "step": 530 }, { "epoch": 0.16381304951411382, "grad_norm": 0.0, "learning_rate": 1.9077923851334342e-05, "loss": 0.9236, "step": 531 }, { "epoch": 0.16412154866574116, "grad_norm": 0.0, "learning_rate": 1.9073726996977004e-05, "loss": 0.9511, "step": 532 }, { "epoch": 0.1644300478173685, "grad_norm": 0.0, "learning_rate": 1.906952107701752e-05, "loss": 0.7983, "step": 533 }, { "epoch": 0.16473854696899584, "grad_norm": 0.0, "learning_rate": 1.9065306095658048e-05, "loss": 0.9158, "step": 534 }, { "epoch": 0.16504704612062318, "grad_norm": 0.0, "learning_rate": 1.9061082057109787e-05, "loss": 0.9543, "step": 535 }, { "epoch": 0.1653555452722505, "grad_norm": 0.0, "learning_rate": 1.9056848965593e-05, "loss": 0.9502, "step": 536 }, { "epoch": 0.16566404442387783, "grad_norm": 0.0, "learning_rate": 1.905260682533699e-05, "loss": 0.9167, "step": 537 }, { "epoch": 0.16597254357550517, "grad_norm": 0.0, "learning_rate": 1.9048355640580087e-05, "loss": 1.0018, "step": 538 }, { "epoch": 0.1662810427271325, "grad_norm": 0.0, "learning_rate": 1.904409541556968e-05, "loss": 0.8541, "step": 539 }, { "epoch": 0.16658954187875982, "grad_norm": 0.0, "learning_rate": 1.903982615456218e-05, "loss": 0.8704, "step": 540 }, { "epoch": 0.16689804103038716, "grad_norm": 0.0, "learning_rate": 1.9035547861823016e-05, "loss": 0.9562, "step": 541 }, { "epoch": 0.1672065401820145, "grad_norm": 0.0, "learning_rate": 1.903126054162666e-05, "loss": 0.9997, "step": 542 }, { "epoch": 0.16751503933364184, "grad_norm": 0.0, "learning_rate": 1.9026964198256583e-05, "loss": 0.9693, "step": 543 }, { "epoch": 0.16782353848526915, "grad_norm": 0.0, "learning_rate": 1.902265883600529e-05, "loss": 0.8984, "step": 544 }, { "epoch": 0.1681320376368965, "grad_norm": 0.0, "learning_rate": 1.9018344459174285e-05, "loss": 0.8469, "step": 545 }, { "epoch": 0.16844053678852383, "grad_norm": 0.0, "learning_rate": 1.901402107207408e-05, "loss": 0.8924, "step": 546 }, { "epoch": 0.16874903594015117, "grad_norm": 0.0, "learning_rate": 1.900968867902419e-05, "loss": 0.9235, "step": 547 }, { "epoch": 0.1690575350917785, "grad_norm": 0.0, "learning_rate": 1.9005347284353136e-05, "loss": 1.2873, "step": 548 }, { "epoch": 0.16936603424340582, "grad_norm": 0.0, "learning_rate": 1.9000996892398418e-05, "loss": 0.9419, "step": 549 }, { "epoch": 0.16967453339503316, "grad_norm": 0.0, "learning_rate": 1.8996637507506538e-05, "loss": 0.8735, "step": 550 }, { "epoch": 0.1699830325466605, "grad_norm": 0.0, "learning_rate": 1.899226913403297e-05, "loss": 0.8714, "step": 551 }, { "epoch": 0.17029153169828784, "grad_norm": 0.0, "learning_rate": 1.898789177634218e-05, "loss": 0.9299, "step": 552 }, { "epoch": 0.17060003084991515, "grad_norm": 0.0, "learning_rate": 1.898350543880761e-05, "loss": 0.8985, "step": 553 }, { "epoch": 0.1709085300015425, "grad_norm": 0.0, "learning_rate": 1.897911012581166e-05, "loss": 0.936, "step": 554 }, { "epoch": 0.17121702915316983, "grad_norm": 0.0, "learning_rate": 1.8974705841745717e-05, "loss": 0.9537, "step": 555 }, { "epoch": 0.17152552830479717, "grad_norm": 0.0, "learning_rate": 1.897029259101012e-05, "loss": 1.2198, "step": 556 }, { "epoch": 0.17183402745642448, "grad_norm": 0.0, "learning_rate": 1.896587037801416e-05, "loss": 0.9152, "step": 557 }, { "epoch": 0.17214252660805182, "grad_norm": 0.0, "learning_rate": 1.89614392071761e-05, "loss": 0.9322, "step": 558 }, { "epoch": 0.17245102575967916, "grad_norm": 0.0, "learning_rate": 1.895699908292314e-05, "loss": 0.9327, "step": 559 }, { "epoch": 0.1727595249113065, "grad_norm": 0.0, "learning_rate": 1.8952550009691424e-05, "loss": 0.9372, "step": 560 }, { "epoch": 0.17306802406293384, "grad_norm": 0.0, "learning_rate": 1.894809199192605e-05, "loss": 0.9337, "step": 561 }, { "epoch": 0.17337652321456115, "grad_norm": 0.0, "learning_rate": 1.8943625034081032e-05, "loss": 0.9088, "step": 562 }, { "epoch": 0.1736850223661885, "grad_norm": 0.0, "learning_rate": 1.8939149140619338e-05, "loss": 0.9155, "step": 563 }, { "epoch": 0.17399352151781583, "grad_norm": 0.0, "learning_rate": 1.8934664316012853e-05, "loss": 0.9342, "step": 564 }, { "epoch": 0.17430202066944317, "grad_norm": 0.0, "learning_rate": 1.8930170564742377e-05, "loss": 0.9431, "step": 565 }, { "epoch": 0.17461051982107048, "grad_norm": 0.0, "learning_rate": 1.8925667891297646e-05, "loss": 0.843, "step": 566 }, { "epoch": 0.17491901897269782, "grad_norm": 0.0, "learning_rate": 1.8921156300177303e-05, "loss": 0.881, "step": 567 }, { "epoch": 0.17522751812432516, "grad_norm": 0.0, "learning_rate": 1.8916635795888895e-05, "loss": 0.9073, "step": 568 }, { "epoch": 0.1755360172759525, "grad_norm": 0.0, "learning_rate": 1.8912106382948875e-05, "loss": 0.9315, "step": 569 }, { "epoch": 0.1758445164275798, "grad_norm": 0.0, "learning_rate": 1.890756806588261e-05, "loss": 0.9232, "step": 570 }, { "epoch": 0.17615301557920715, "grad_norm": 0.0, "learning_rate": 1.8903020849224343e-05, "loss": 0.8926, "step": 571 }, { "epoch": 0.1764615147308345, "grad_norm": 0.0, "learning_rate": 1.8898464737517225e-05, "loss": 0.8986, "step": 572 }, { "epoch": 0.17677001388246183, "grad_norm": 0.0, "learning_rate": 1.8893899735313285e-05, "loss": 0.9223, "step": 573 }, { "epoch": 0.17707851303408917, "grad_norm": 0.0, "learning_rate": 1.8889325847173444e-05, "loss": 0.8557, "step": 574 }, { "epoch": 0.17738701218571648, "grad_norm": 0.0, "learning_rate": 1.8884743077667487e-05, "loss": 0.924, "step": 575 }, { "epoch": 0.17769551133734382, "grad_norm": 0.0, "learning_rate": 1.8880151431374082e-05, "loss": 0.9458, "step": 576 }, { "epoch": 0.17800401048897116, "grad_norm": 0.0, "learning_rate": 1.8875550912880766e-05, "loss": 0.9072, "step": 577 }, { "epoch": 0.1783125096405985, "grad_norm": 0.0, "learning_rate": 1.8870941526783933e-05, "loss": 0.895, "step": 578 }, { "epoch": 0.1786210087922258, "grad_norm": 0.0, "learning_rate": 1.886632327768885e-05, "loss": 0.9086, "step": 579 }, { "epoch": 0.17892950794385315, "grad_norm": 0.0, "learning_rate": 1.8861696170209626e-05, "loss": 0.8276, "step": 580 }, { "epoch": 0.1792380070954805, "grad_norm": 0.0, "learning_rate": 1.8857060208969225e-05, "loss": 0.9947, "step": 581 }, { "epoch": 0.17954650624710783, "grad_norm": 0.0, "learning_rate": 1.8852415398599455e-05, "loss": 0.9668, "step": 582 }, { "epoch": 0.17985500539873514, "grad_norm": 0.0, "learning_rate": 1.8847761743740972e-05, "loss": 0.9431, "step": 583 }, { "epoch": 0.18016350455036248, "grad_norm": 0.0, "learning_rate": 1.8843099249043258e-05, "loss": 0.9453, "step": 584 }, { "epoch": 0.18047200370198982, "grad_norm": 0.0, "learning_rate": 1.8838427919164643e-05, "loss": 0.9127, "step": 585 }, { "epoch": 0.18078050285361716, "grad_norm": 0.0, "learning_rate": 1.8833747758772264e-05, "loss": 0.9754, "step": 586 }, { "epoch": 0.1810890020052445, "grad_norm": 0.0, "learning_rate": 1.8829058772542092e-05, "loss": 0.9359, "step": 587 }, { "epoch": 0.1813975011568718, "grad_norm": 0.0, "learning_rate": 1.882436096515892e-05, "loss": 0.9143, "step": 588 }, { "epoch": 0.18170600030849915, "grad_norm": 0.0, "learning_rate": 1.8819654341316344e-05, "loss": 0.9026, "step": 589 }, { "epoch": 0.1820144994601265, "grad_norm": 0.0, "learning_rate": 1.8814938905716778e-05, "loss": 0.9464, "step": 590 }, { "epoch": 0.18232299861175383, "grad_norm": 0.0, "learning_rate": 1.8810214663071428e-05, "loss": 0.9162, "step": 591 }, { "epoch": 0.18263149776338114, "grad_norm": 0.0, "learning_rate": 1.8805481618100314e-05, "loss": 1.0065, "step": 592 }, { "epoch": 0.18293999691500848, "grad_norm": 0.0, "learning_rate": 1.8800739775532238e-05, "loss": 0.92, "step": 593 }, { "epoch": 0.18324849606663582, "grad_norm": 0.0, "learning_rate": 1.8795989140104797e-05, "loss": 0.936, "step": 594 }, { "epoch": 0.18355699521826316, "grad_norm": 0.0, "learning_rate": 1.8791229716564374e-05, "loss": 0.9698, "step": 595 }, { "epoch": 0.18386549436989047, "grad_norm": 0.0, "learning_rate": 1.8786461509666135e-05, "loss": 0.9489, "step": 596 }, { "epoch": 0.1841739935215178, "grad_norm": 0.0, "learning_rate": 1.8781684524174006e-05, "loss": 0.9536, "step": 597 }, { "epoch": 0.18448249267314515, "grad_norm": 0.0, "learning_rate": 1.8776898764860707e-05, "loss": 0.9249, "step": 598 }, { "epoch": 0.1847909918247725, "grad_norm": 0.0, "learning_rate": 1.8772104236507703e-05, "loss": 0.9112, "step": 599 }, { "epoch": 0.18509949097639983, "grad_norm": 0.0, "learning_rate": 1.8767300943905238e-05, "loss": 0.9114, "step": 600 }, { "epoch": 0.18540799012802714, "grad_norm": 0.0, "learning_rate": 1.8762488891852296e-05, "loss": 0.9409, "step": 601 }, { "epoch": 0.18571648927965448, "grad_norm": 0.0, "learning_rate": 1.8757668085156628e-05, "loss": 0.9692, "step": 602 }, { "epoch": 0.18602498843128182, "grad_norm": 0.0, "learning_rate": 1.875283852863471e-05, "loss": 0.8082, "step": 603 }, { "epoch": 0.18633348758290916, "grad_norm": 0.0, "learning_rate": 1.874800022711179e-05, "loss": 1.0027, "step": 604 }, { "epoch": 0.18664198673453647, "grad_norm": 0.0, "learning_rate": 1.8743153185421826e-05, "loss": 0.9285, "step": 605 }, { "epoch": 0.1869504858861638, "grad_norm": 0.0, "learning_rate": 1.8738297408407525e-05, "loss": 0.867, "step": 606 }, { "epoch": 0.18725898503779115, "grad_norm": 0.0, "learning_rate": 1.8733432900920316e-05, "loss": 0.9368, "step": 607 }, { "epoch": 0.1875674841894185, "grad_norm": 0.0, "learning_rate": 1.872855966782035e-05, "loss": 1.0111, "step": 608 }, { "epoch": 0.1878759833410458, "grad_norm": 0.0, "learning_rate": 1.8723677713976494e-05, "loss": 0.9438, "step": 609 }, { "epoch": 0.18818448249267314, "grad_norm": 0.0, "learning_rate": 1.8718787044266334e-05, "loss": 0.9616, "step": 610 }, { "epoch": 0.18849298164430048, "grad_norm": 0.0, "learning_rate": 1.8713887663576156e-05, "loss": 1.1784, "step": 611 }, { "epoch": 0.18880148079592782, "grad_norm": 0.0, "learning_rate": 1.8708979576800962e-05, "loss": 0.9202, "step": 612 }, { "epoch": 0.18910997994755516, "grad_norm": 0.0, "learning_rate": 1.8704062788844435e-05, "loss": 0.9447, "step": 613 }, { "epoch": 0.18941847909918247, "grad_norm": 0.0, "learning_rate": 1.8699137304618963e-05, "loss": 0.8955, "step": 614 }, { "epoch": 0.1897269782508098, "grad_norm": 0.0, "learning_rate": 1.869420312904562e-05, "loss": 0.9599, "step": 615 }, { "epoch": 0.19003547740243715, "grad_norm": 0.0, "learning_rate": 1.868926026705417e-05, "loss": 0.8844, "step": 616 }, { "epoch": 0.1903439765540645, "grad_norm": 0.0, "learning_rate": 1.8684308723583037e-05, "loss": 0.8832, "step": 617 }, { "epoch": 0.1906524757056918, "grad_norm": 0.0, "learning_rate": 1.8679348503579336e-05, "loss": 0.9653, "step": 618 }, { "epoch": 0.19096097485731914, "grad_norm": 0.0, "learning_rate": 1.867437961199885e-05, "loss": 0.8365, "step": 619 }, { "epoch": 0.19126947400894648, "grad_norm": 0.0, "learning_rate": 1.8669402053806016e-05, "loss": 0.8715, "step": 620 }, { "epoch": 0.19157797316057382, "grad_norm": 0.0, "learning_rate": 1.8664415833973937e-05, "loss": 0.9752, "step": 621 }, { "epoch": 0.19188647231220113, "grad_norm": 0.0, "learning_rate": 1.8659420957484367e-05, "loss": 0.9336, "step": 622 }, { "epoch": 0.19219497146382847, "grad_norm": 0.0, "learning_rate": 1.865441742932771e-05, "loss": 0.9283, "step": 623 }, { "epoch": 0.1925034706154558, "grad_norm": 0.0, "learning_rate": 1.8649405254503018e-05, "loss": 0.879, "step": 624 }, { "epoch": 0.19281196976708315, "grad_norm": 0.0, "learning_rate": 1.864438443801797e-05, "loss": 1.036, "step": 625 }, { "epoch": 0.1931204689187105, "grad_norm": 0.0, "learning_rate": 1.863935498488889e-05, "loss": 1.0019, "step": 626 }, { "epoch": 0.1934289680703378, "grad_norm": 0.0, "learning_rate": 1.8634316900140733e-05, "loss": 0.9301, "step": 627 }, { "epoch": 0.19373746722196514, "grad_norm": 0.0, "learning_rate": 1.862927018880707e-05, "loss": 1.1868, "step": 628 }, { "epoch": 0.19404596637359248, "grad_norm": 0.0, "learning_rate": 1.862421485593009e-05, "loss": 1.0311, "step": 629 }, { "epoch": 0.19435446552521982, "grad_norm": 0.0, "learning_rate": 1.86191509065606e-05, "loss": 0.959, "step": 630 }, { "epoch": 0.19466296467684713, "grad_norm": 0.0, "learning_rate": 1.8614078345758014e-05, "loss": 0.8862, "step": 631 }, { "epoch": 0.19497146382847447, "grad_norm": 0.0, "learning_rate": 1.8608997178590354e-05, "loss": 0.8945, "step": 632 }, { "epoch": 0.1952799629801018, "grad_norm": 0.0, "learning_rate": 1.8603907410134233e-05, "loss": 0.889, "step": 633 }, { "epoch": 0.19558846213172915, "grad_norm": 0.0, "learning_rate": 1.8598809045474868e-05, "loss": 0.8235, "step": 634 }, { "epoch": 0.19589696128335646, "grad_norm": 0.0, "learning_rate": 1.859370208970605e-05, "loss": 0.8782, "step": 635 }, { "epoch": 0.1962054604349838, "grad_norm": 0.0, "learning_rate": 1.8588586547930176e-05, "loss": 0.9183, "step": 636 }, { "epoch": 0.19651395958661114, "grad_norm": 0.0, "learning_rate": 1.8583462425258192e-05, "loss": 0.866, "step": 637 }, { "epoch": 0.19682245873823848, "grad_norm": 0.0, "learning_rate": 1.857832972680964e-05, "loss": 0.9157, "step": 638 }, { "epoch": 0.19713095788986582, "grad_norm": 0.0, "learning_rate": 1.8573188457712624e-05, "loss": 0.8942, "step": 639 }, { "epoch": 0.19743945704149313, "grad_norm": 0.0, "learning_rate": 1.8568038623103802e-05, "loss": 0.95, "step": 640 }, { "epoch": 0.19774795619312047, "grad_norm": 0.0, "learning_rate": 1.8562880228128402e-05, "loss": 0.9938, "step": 641 }, { "epoch": 0.1980564553447478, "grad_norm": 0.0, "learning_rate": 1.8557713277940203e-05, "loss": 0.8951, "step": 642 }, { "epoch": 0.19836495449637515, "grad_norm": 0.0, "learning_rate": 1.8552537777701525e-05, "loss": 0.8841, "step": 643 }, { "epoch": 0.19867345364800246, "grad_norm": 0.0, "learning_rate": 1.8547353732583234e-05, "loss": 0.923, "step": 644 }, { "epoch": 0.1989819527996298, "grad_norm": 0.0, "learning_rate": 1.8542161147764735e-05, "loss": 0.8871, "step": 645 }, { "epoch": 0.19929045195125714, "grad_norm": 0.0, "learning_rate": 1.853696002843396e-05, "loss": 0.806, "step": 646 }, { "epoch": 0.19959895110288448, "grad_norm": 0.0, "learning_rate": 1.8531750379787375e-05, "loss": 0.8171, "step": 647 }, { "epoch": 0.1999074502545118, "grad_norm": 0.0, "learning_rate": 1.852653220702996e-05, "loss": 0.8989, "step": 648 }, { "epoch": 0.20021594940613913, "grad_norm": 0.0, "learning_rate": 1.8521305515375217e-05, "loss": 0.8877, "step": 649 }, { "epoch": 0.20052444855776647, "grad_norm": 0.0, "learning_rate": 1.8516070310045157e-05, "loss": 0.9185, "step": 650 }, { "epoch": 0.2008329477093938, "grad_norm": 0.0, "learning_rate": 1.85108265962703e-05, "loss": 0.883, "step": 651 }, { "epoch": 0.20114144686102114, "grad_norm": 0.0, "learning_rate": 1.850557437928966e-05, "loss": 0.8563, "step": 652 }, { "epoch": 0.20144994601264846, "grad_norm": 0.0, "learning_rate": 1.8500313664350758e-05, "loss": 0.9672, "step": 653 }, { "epoch": 0.2017584451642758, "grad_norm": 0.0, "learning_rate": 1.849504445670959e-05, "loss": 0.9432, "step": 654 }, { "epoch": 0.20206694431590314, "grad_norm": 0.0, "learning_rate": 1.848976676163065e-05, "loss": 0.9075, "step": 655 }, { "epoch": 0.20237544346753047, "grad_norm": 0.0, "learning_rate": 1.8484480584386907e-05, "loss": 0.9218, "step": 656 }, { "epoch": 0.2026839426191578, "grad_norm": 0.0, "learning_rate": 1.847918593025981e-05, "loss": 0.849, "step": 657 }, { "epoch": 0.20299244177078513, "grad_norm": 0.0, "learning_rate": 1.847388280453926e-05, "loss": 0.8514, "step": 658 }, { "epoch": 0.20330094092241247, "grad_norm": 0.0, "learning_rate": 1.846857121252364e-05, "loss": 0.8227, "step": 659 }, { "epoch": 0.2036094400740398, "grad_norm": 0.0, "learning_rate": 1.8463251159519793e-05, "loss": 0.8503, "step": 660 }, { "epoch": 0.20391793922566712, "grad_norm": 0.0, "learning_rate": 1.8457922650842998e-05, "loss": 0.9469, "step": 661 }, { "epoch": 0.20422643837729446, "grad_norm": 0.0, "learning_rate": 1.8452585691817e-05, "loss": 0.8618, "step": 662 }, { "epoch": 0.2045349375289218, "grad_norm": 0.0, "learning_rate": 1.8447240287773973e-05, "loss": 0.9251, "step": 663 }, { "epoch": 0.20484343668054913, "grad_norm": 0.0, "learning_rate": 1.8441886444054534e-05, "loss": 1.0748, "step": 664 }, { "epoch": 0.20515193583217647, "grad_norm": 0.0, "learning_rate": 1.8436524166007742e-05, "loss": 0.9532, "step": 665 }, { "epoch": 0.20546043498380379, "grad_norm": 0.0, "learning_rate": 1.8431153458991066e-05, "loss": 0.9112, "step": 666 }, { "epoch": 0.20576893413543113, "grad_norm": 0.0, "learning_rate": 1.8425774328370402e-05, "loss": 0.9206, "step": 667 }, { "epoch": 0.20607743328705846, "grad_norm": 0.0, "learning_rate": 1.8420386779520073e-05, "loss": 0.8469, "step": 668 }, { "epoch": 0.2063859324386858, "grad_norm": 0.0, "learning_rate": 1.8414990817822794e-05, "loss": 0.8662, "step": 669 }, { "epoch": 0.20669443159031312, "grad_norm": 0.0, "learning_rate": 1.84095864486697e-05, "loss": 0.9517, "step": 670 }, { "epoch": 0.20700293074194046, "grad_norm": 0.0, "learning_rate": 1.840417367746032e-05, "loss": 1.2155, "step": 671 }, { "epoch": 0.2073114298935678, "grad_norm": 0.0, "learning_rate": 1.839875250960258e-05, "loss": 0.882, "step": 672 }, { "epoch": 0.20761992904519513, "grad_norm": 0.0, "learning_rate": 1.8393322950512793e-05, "loss": 0.9063, "step": 673 }, { "epoch": 0.20792842819682245, "grad_norm": 0.0, "learning_rate": 1.8387885005615652e-05, "loss": 0.9307, "step": 674 }, { "epoch": 0.20823692734844979, "grad_norm": 0.0, "learning_rate": 1.838243868034424e-05, "loss": 0.9967, "step": 675 }, { "epoch": 0.20854542650007712, "grad_norm": 0.0, "learning_rate": 1.837698398014e-05, "loss": 0.885, "step": 676 }, { "epoch": 0.20885392565170446, "grad_norm": 0.0, "learning_rate": 1.837152091045275e-05, "loss": 0.923, "step": 677 }, { "epoch": 0.2091624248033318, "grad_norm": 0.0, "learning_rate": 1.8366049476740667e-05, "loss": 0.8932, "step": 678 }, { "epoch": 0.20947092395495912, "grad_norm": 0.0, "learning_rate": 1.8360569684470286e-05, "loss": 0.8664, "step": 679 }, { "epoch": 0.20977942310658645, "grad_norm": 0.0, "learning_rate": 1.8355081539116492e-05, "loss": 0.9405, "step": 680 }, { "epoch": 0.2100879222582138, "grad_norm": 0.0, "learning_rate": 1.8349585046162517e-05, "loss": 0.9038, "step": 681 }, { "epoch": 0.21039642140984113, "grad_norm": 0.0, "learning_rate": 1.8344080211099934e-05, "loss": 0.986, "step": 682 }, { "epoch": 0.21070492056146845, "grad_norm": 0.0, "learning_rate": 1.8338567039428642e-05, "loss": 0.871, "step": 683 }, { "epoch": 0.21101341971309578, "grad_norm": 0.0, "learning_rate": 1.8333045536656882e-05, "loss": 0.9415, "step": 684 }, { "epoch": 0.21132191886472312, "grad_norm": 0.0, "learning_rate": 1.8327515708301217e-05, "loss": 0.8756, "step": 685 }, { "epoch": 0.21163041801635046, "grad_norm": 0.0, "learning_rate": 1.8321977559886514e-05, "loss": 1.0333, "step": 686 }, { "epoch": 0.21193891716797778, "grad_norm": 0.0, "learning_rate": 1.8316431096945965e-05, "loss": 0.892, "step": 687 }, { "epoch": 0.21224741631960511, "grad_norm": 0.0, "learning_rate": 1.831087632502107e-05, "loss": 0.9391, "step": 688 }, { "epoch": 0.21255591547123245, "grad_norm": 0.0, "learning_rate": 1.8305313249661628e-05, "loss": 1.0055, "step": 689 }, { "epoch": 0.2128644146228598, "grad_norm": 0.0, "learning_rate": 1.8299741876425725e-05, "loss": 0.9493, "step": 690 }, { "epoch": 0.21317291377448713, "grad_norm": 0.0, "learning_rate": 1.8294162210879753e-05, "loss": 0.9286, "step": 691 }, { "epoch": 0.21348141292611444, "grad_norm": 0.0, "learning_rate": 1.8288574258598378e-05, "loss": 0.8777, "step": 692 }, { "epoch": 0.21378991207774178, "grad_norm": 0.0, "learning_rate": 1.8282978025164553e-05, "loss": 0.9229, "step": 693 }, { "epoch": 0.21409841122936912, "grad_norm": 0.0, "learning_rate": 1.8277373516169493e-05, "loss": 0.907, "step": 694 }, { "epoch": 0.21440691038099646, "grad_norm": 0.0, "learning_rate": 1.8271760737212697e-05, "loss": 0.9317, "step": 695 }, { "epoch": 0.21471540953262377, "grad_norm": 0.0, "learning_rate": 1.8266139693901914e-05, "loss": 0.865, "step": 696 }, { "epoch": 0.2150239086842511, "grad_norm": 0.0, "learning_rate": 1.826051039185315e-05, "loss": 0.9932, "step": 697 }, { "epoch": 0.21533240783587845, "grad_norm": 0.0, "learning_rate": 1.8254872836690672e-05, "loss": 0.8622, "step": 698 }, { "epoch": 0.2156409069875058, "grad_norm": 0.0, "learning_rate": 1.8249227034046986e-05, "loss": 0.9023, "step": 699 }, { "epoch": 0.2159494061391331, "grad_norm": 0.0, "learning_rate": 1.824357298956284e-05, "loss": 0.8664, "step": 700 }, { "epoch": 0.21625790529076044, "grad_norm": 0.0, "learning_rate": 1.823791070888721e-05, "loss": 0.9108, "step": 701 }, { "epoch": 0.21656640444238778, "grad_norm": 0.0, "learning_rate": 1.8232240197677318e-05, "loss": 0.9412, "step": 702 }, { "epoch": 0.21687490359401512, "grad_norm": 0.0, "learning_rate": 1.8226561461598583e-05, "loss": 0.9271, "step": 703 }, { "epoch": 0.21718340274564246, "grad_norm": 0.0, "learning_rate": 1.8220874506324667e-05, "loss": 0.9417, "step": 704 }, { "epoch": 0.21749190189726977, "grad_norm": 0.0, "learning_rate": 1.8215179337537433e-05, "loss": 0.9904, "step": 705 }, { "epoch": 0.2178004010488971, "grad_norm": 0.0, "learning_rate": 1.8209475960926946e-05, "loss": 0.8971, "step": 706 }, { "epoch": 0.21810890020052445, "grad_norm": 0.0, "learning_rate": 1.8203764382191476e-05, "loss": 0.9891, "step": 707 }, { "epoch": 0.2184173993521518, "grad_norm": 0.0, "learning_rate": 1.8198044607037486e-05, "loss": 0.8612, "step": 708 }, { "epoch": 0.2187258985037791, "grad_norm": 0.0, "learning_rate": 1.8192316641179634e-05, "loss": 0.865, "step": 709 }, { "epoch": 0.21903439765540644, "grad_norm": 0.0, "learning_rate": 1.8186580490340754e-05, "loss": 0.9252, "step": 710 }, { "epoch": 0.21934289680703378, "grad_norm": 0.0, "learning_rate": 1.8180836160251863e-05, "loss": 0.9649, "step": 711 }, { "epoch": 0.21965139595866112, "grad_norm": 0.0, "learning_rate": 1.8175083656652143e-05, "loss": 0.7586, "step": 712 }, { "epoch": 0.21995989511028843, "grad_norm": 0.0, "learning_rate": 1.816932298528895e-05, "loss": 0.8505, "step": 713 }, { "epoch": 0.22026839426191577, "grad_norm": 0.0, "learning_rate": 1.8163554151917796e-05, "loss": 0.8742, "step": 714 }, { "epoch": 0.2205768934135431, "grad_norm": 0.0, "learning_rate": 1.815777716230235e-05, "loss": 0.8806, "step": 715 }, { "epoch": 0.22088539256517045, "grad_norm": 0.0, "learning_rate": 1.8151992022214425e-05, "loss": 0.8527, "step": 716 }, { "epoch": 0.2211938917167978, "grad_norm": 0.0, "learning_rate": 1.8146198737433993e-05, "loss": 0.9671, "step": 717 }, { "epoch": 0.2215023908684251, "grad_norm": 0.0, "learning_rate": 1.8140397313749134e-05, "loss": 0.9928, "step": 718 }, { "epoch": 0.22181089002005244, "grad_norm": 0.0, "learning_rate": 1.8134587756956084e-05, "loss": 1.0005, "step": 719 }, { "epoch": 0.22211938917167978, "grad_norm": 0.0, "learning_rate": 1.8128770072859202e-05, "loss": 0.8197, "step": 720 }, { "epoch": 0.22242788832330712, "grad_norm": 0.0, "learning_rate": 1.812294426727096e-05, "loss": 0.8596, "step": 721 }, { "epoch": 0.22273638747493443, "grad_norm": 0.0, "learning_rate": 1.8117110346011946e-05, "loss": 0.947, "step": 722 }, { "epoch": 0.22304488662656177, "grad_norm": 0.0, "learning_rate": 1.8111268314910857e-05, "loss": 0.9231, "step": 723 }, { "epoch": 0.2233533857781891, "grad_norm": 0.0, "learning_rate": 1.81054181798045e-05, "loss": 0.9239, "step": 724 }, { "epoch": 0.22366188492981645, "grad_norm": 0.0, "learning_rate": 1.809955994653776e-05, "loss": 0.9616, "step": 725 }, { "epoch": 0.22397038408144376, "grad_norm": 0.0, "learning_rate": 1.809369362096363e-05, "loss": 0.8898, "step": 726 }, { "epoch": 0.2242788832330711, "grad_norm": 0.0, "learning_rate": 1.8087819208943186e-05, "loss": 0.8848, "step": 727 }, { "epoch": 0.22458738238469844, "grad_norm": 0.0, "learning_rate": 1.8081936716345574e-05, "loss": 0.8048, "step": 728 }, { "epoch": 0.22489588153632578, "grad_norm": 0.0, "learning_rate": 1.8076046149048024e-05, "loss": 0.8379, "step": 729 }, { "epoch": 0.22520438068795312, "grad_norm": 0.0, "learning_rate": 1.8070147512935828e-05, "loss": 0.8821, "step": 730 }, { "epoch": 0.22551287983958043, "grad_norm": 0.0, "learning_rate": 1.806424081390234e-05, "loss": 0.8625, "step": 731 }, { "epoch": 0.22582137899120777, "grad_norm": 0.0, "learning_rate": 1.8058326057848966e-05, "loss": 0.9501, "step": 732 }, { "epoch": 0.2261298781428351, "grad_norm": 0.0, "learning_rate": 1.8052403250685172e-05, "loss": 0.8423, "step": 733 }, { "epoch": 0.22643837729446245, "grad_norm": 0.0, "learning_rate": 1.804647239832846e-05, "loss": 0.884, "step": 734 }, { "epoch": 0.22674687644608976, "grad_norm": 0.0, "learning_rate": 1.804053350670437e-05, "loss": 0.9397, "step": 735 }, { "epoch": 0.2270553755977171, "grad_norm": 0.0, "learning_rate": 1.8034586581746474e-05, "loss": 0.9681, "step": 736 }, { "epoch": 0.22736387474934444, "grad_norm": 0.0, "learning_rate": 1.8028631629396377e-05, "loss": 0.9377, "step": 737 }, { "epoch": 0.22767237390097178, "grad_norm": 0.0, "learning_rate": 1.8022668655603696e-05, "loss": 0.9687, "step": 738 }, { "epoch": 0.2279808730525991, "grad_norm": 0.0, "learning_rate": 1.8016697666326066e-05, "loss": 0.9338, "step": 739 }, { "epoch": 0.22828937220422643, "grad_norm": 0.0, "learning_rate": 1.801071866752913e-05, "loss": 0.8938, "step": 740 }, { "epoch": 0.22859787135585377, "grad_norm": 0.0, "learning_rate": 1.8004731665186532e-05, "loss": 0.8316, "step": 741 }, { "epoch": 0.2289063705074811, "grad_norm": 0.0, "learning_rate": 1.7998736665279914e-05, "loss": 0.9167, "step": 742 }, { "epoch": 0.22921486965910845, "grad_norm": 0.0, "learning_rate": 1.799273367379891e-05, "loss": 0.9561, "step": 743 }, { "epoch": 0.22952336881073576, "grad_norm": 0.0, "learning_rate": 1.7986722696741132e-05, "loss": 0.9086, "step": 744 }, { "epoch": 0.2298318679623631, "grad_norm": 0.0, "learning_rate": 1.798070374011218e-05, "loss": 0.8398, "step": 745 }, { "epoch": 0.23014036711399044, "grad_norm": 0.0, "learning_rate": 1.7974676809925613e-05, "loss": 0.8583, "step": 746 }, { "epoch": 0.23044886626561778, "grad_norm": 0.0, "learning_rate": 1.7968641912202973e-05, "loss": 0.9205, "step": 747 }, { "epoch": 0.2307573654172451, "grad_norm": 0.0, "learning_rate": 1.7962599052973746e-05, "loss": 0.9376, "step": 748 }, { "epoch": 0.23106586456887243, "grad_norm": 0.0, "learning_rate": 1.7956548238275387e-05, "loss": 0.9171, "step": 749 }, { "epoch": 0.23137436372049977, "grad_norm": 0.0, "learning_rate": 1.7950489474153293e-05, "loss": 0.8935, "step": 750 }, { "epoch": 0.2316828628721271, "grad_norm": 0.0, "learning_rate": 1.7944422766660797e-05, "loss": 1.0081, "step": 751 }, { "epoch": 0.23199136202375442, "grad_norm": 0.0, "learning_rate": 1.7938348121859183e-05, "loss": 0.8754, "step": 752 }, { "epoch": 0.23229986117538176, "grad_norm": 0.0, "learning_rate": 1.7932265545817645e-05, "loss": 0.8547, "step": 753 }, { "epoch": 0.2326083603270091, "grad_norm": 0.0, "learning_rate": 1.792617504461332e-05, "loss": 0.8784, "step": 754 }, { "epoch": 0.23291685947863644, "grad_norm": 0.0, "learning_rate": 1.7920076624331254e-05, "loss": 0.9178, "step": 755 }, { "epoch": 0.23322535863026378, "grad_norm": 0.0, "learning_rate": 1.791397029106441e-05, "loss": 0.8957, "step": 756 }, { "epoch": 0.2335338577818911, "grad_norm": 0.0, "learning_rate": 1.7907856050913644e-05, "loss": 1.1664, "step": 757 }, { "epoch": 0.23384235693351843, "grad_norm": 0.0, "learning_rate": 1.7901733909987735e-05, "loss": 0.8931, "step": 758 }, { "epoch": 0.23415085608514577, "grad_norm": 0.0, "learning_rate": 1.7895603874403326e-05, "loss": 0.9065, "step": 759 }, { "epoch": 0.2344593552367731, "grad_norm": 0.0, "learning_rate": 1.788946595028498e-05, "loss": 0.8369, "step": 760 }, { "epoch": 0.23476785438840042, "grad_norm": 0.0, "learning_rate": 1.7883320143765113e-05, "loss": 0.9097, "step": 761 }, { "epoch": 0.23507635354002776, "grad_norm": 0.0, "learning_rate": 1.787716646098403e-05, "loss": 0.9211, "step": 762 }, { "epoch": 0.2353848526916551, "grad_norm": 0.0, "learning_rate": 1.787100490808991e-05, "loss": 0.9252, "step": 763 }, { "epoch": 0.23569335184328244, "grad_norm": 0.0, "learning_rate": 1.7864835491238785e-05, "loss": 0.8593, "step": 764 }, { "epoch": 0.23600185099490975, "grad_norm": 0.0, "learning_rate": 1.7858658216594545e-05, "loss": 0.9216, "step": 765 }, { "epoch": 0.2363103501465371, "grad_norm": 0.0, "learning_rate": 1.7852473090328937e-05, "loss": 0.8814, "step": 766 }, { "epoch": 0.23661884929816443, "grad_norm": 0.0, "learning_rate": 1.7846280118621547e-05, "loss": 0.9103, "step": 767 }, { "epoch": 0.23692734844979177, "grad_norm": 0.0, "learning_rate": 1.7840079307659803e-05, "loss": 0.8907, "step": 768 }, { "epoch": 0.2372358476014191, "grad_norm": 0.0, "learning_rate": 1.783387066363896e-05, "loss": 0.8361, "step": 769 }, { "epoch": 0.23754434675304642, "grad_norm": 0.0, "learning_rate": 1.782765419276211e-05, "loss": 0.8302, "step": 770 }, { "epoch": 0.23785284590467376, "grad_norm": 0.0, "learning_rate": 1.782142990124015e-05, "loss": 0.9117, "step": 771 }, { "epoch": 0.2381613450563011, "grad_norm": 0.0, "learning_rate": 1.78151977952918e-05, "loss": 0.952, "step": 772 }, { "epoch": 0.23846984420792844, "grad_norm": 0.0, "learning_rate": 1.7808957881143588e-05, "loss": 0.8969, "step": 773 }, { "epoch": 0.23877834335955575, "grad_norm": 0.0, "learning_rate": 1.780271016502984e-05, "loss": 0.8482, "step": 774 }, { "epoch": 0.2390868425111831, "grad_norm": 0.0, "learning_rate": 1.7796454653192675e-05, "loss": 0.7801, "step": 775 }, { "epoch": 0.23939534166281043, "grad_norm": 0.0, "learning_rate": 1.7790191351882006e-05, "loss": 0.9085, "step": 776 }, { "epoch": 0.23970384081443777, "grad_norm": 0.0, "learning_rate": 1.7783920267355527e-05, "loss": 0.8567, "step": 777 }, { "epoch": 0.24001233996606508, "grad_norm": 0.0, "learning_rate": 1.7777641405878706e-05, "loss": 0.8105, "step": 778 }, { "epoch": 0.24032083911769242, "grad_norm": 0.0, "learning_rate": 1.777135477372478e-05, "loss": 0.8481, "step": 779 }, { "epoch": 0.24062933826931976, "grad_norm": 0.0, "learning_rate": 1.7765060377174753e-05, "loss": 0.8734, "step": 780 }, { "epoch": 0.2409378374209471, "grad_norm": 0.0, "learning_rate": 1.7758758222517387e-05, "loss": 0.8233, "step": 781 }, { "epoch": 0.24124633657257444, "grad_norm": 0.0, "learning_rate": 1.7752448316049197e-05, "loss": 0.8362, "step": 782 }, { "epoch": 0.24155483572420175, "grad_norm": 0.0, "learning_rate": 1.774613066407443e-05, "loss": 0.9261, "step": 783 }, { "epoch": 0.2418633348758291, "grad_norm": 0.0, "learning_rate": 1.7739805272905087e-05, "loss": 0.9551, "step": 784 }, { "epoch": 0.24217183402745643, "grad_norm": 0.0, "learning_rate": 1.7733472148860893e-05, "loss": 0.8842, "step": 785 }, { "epoch": 0.24248033317908377, "grad_norm": 0.0, "learning_rate": 1.7727131298269306e-05, "loss": 0.9109, "step": 786 }, { "epoch": 0.24278883233071108, "grad_norm": 0.0, "learning_rate": 1.7720782727465494e-05, "loss": 0.845, "step": 787 }, { "epoch": 0.24309733148233842, "grad_norm": 0.0, "learning_rate": 1.7714426442792344e-05, "loss": 0.9728, "step": 788 }, { "epoch": 0.24340583063396576, "grad_norm": 0.0, "learning_rate": 1.770806245060045e-05, "loss": 0.9432, "step": 789 }, { "epoch": 0.2437143297855931, "grad_norm": 0.0, "learning_rate": 1.770169075724811e-05, "loss": 0.7779, "step": 790 }, { "epoch": 0.2440228289372204, "grad_norm": 0.0, "learning_rate": 1.76953113691013e-05, "loss": 0.7957, "step": 791 }, { "epoch": 0.24433132808884775, "grad_norm": 0.0, "learning_rate": 1.7688924292533706e-05, "loss": 0.8393, "step": 792 }, { "epoch": 0.2446398272404751, "grad_norm": 0.0, "learning_rate": 1.768252953392668e-05, "loss": 0.8052, "step": 793 }, { "epoch": 0.24494832639210243, "grad_norm": 0.0, "learning_rate": 1.7676127099669265e-05, "loss": 0.7727, "step": 794 }, { "epoch": 0.24525682554372977, "grad_norm": 0.0, "learning_rate": 1.7669716996158148e-05, "loss": 0.8535, "step": 795 }, { "epoch": 0.24556532469535708, "grad_norm": 0.0, "learning_rate": 1.76632992297977e-05, "loss": 0.8624, "step": 796 }, { "epoch": 0.24587382384698442, "grad_norm": 0.0, "learning_rate": 1.765687380699994e-05, "loss": 0.8997, "step": 797 }, { "epoch": 0.24618232299861176, "grad_norm": 0.0, "learning_rate": 1.765044073418454e-05, "loss": 0.8142, "step": 798 }, { "epoch": 0.2464908221502391, "grad_norm": 0.0, "learning_rate": 1.7644000017778807e-05, "loss": 0.9157, "step": 799 }, { "epoch": 0.2467993213018664, "grad_norm": 0.0, "learning_rate": 1.7637551664217695e-05, "loss": 0.8581, "step": 800 }, { "epoch": 0.24710782045349375, "grad_norm": 0.0, "learning_rate": 1.763109567994378e-05, "loss": 0.9645, "step": 801 }, { "epoch": 0.2474163196051211, "grad_norm": 0.0, "learning_rate": 1.7624632071407268e-05, "loss": 0.8924, "step": 802 }, { "epoch": 0.24772481875674843, "grad_norm": 0.0, "learning_rate": 1.7618160845065978e-05, "loss": 0.919, "step": 803 }, { "epoch": 0.24803331790837574, "grad_norm": 0.0, "learning_rate": 1.7611682007385345e-05, "loss": 0.889, "step": 804 }, { "epoch": 0.24834181706000308, "grad_norm": 0.0, "learning_rate": 1.76051955648384e-05, "loss": 0.9173, "step": 805 }, { "epoch": 0.24865031621163042, "grad_norm": 0.0, "learning_rate": 1.7598701523905783e-05, "loss": 0.9212, "step": 806 }, { "epoch": 0.24895881536325776, "grad_norm": 0.0, "learning_rate": 1.7592199891075714e-05, "loss": 0.7934, "step": 807 }, { "epoch": 0.2492673145148851, "grad_norm": 0.0, "learning_rate": 1.758569067284401e-05, "loss": 0.8689, "step": 808 }, { "epoch": 0.2495758136665124, "grad_norm": 0.0, "learning_rate": 1.7579173875714058e-05, "loss": 0.8654, "step": 809 }, { "epoch": 0.24988431281813975, "grad_norm": 0.0, "learning_rate": 1.757264950619682e-05, "loss": 0.8837, "step": 810 }, { "epoch": 0.25019281196976706, "grad_norm": 0.0, "learning_rate": 1.7566117570810822e-05, "loss": 0.8707, "step": 811 }, { "epoch": 0.2505013111213944, "grad_norm": 0.0, "learning_rate": 1.7559578076082156e-05, "loss": 0.9307, "step": 812 }, { "epoch": 0.25080981027302174, "grad_norm": 0.0, "learning_rate": 1.7553031028544452e-05, "loss": 0.9386, "step": 813 }, { "epoch": 0.2511183094246491, "grad_norm": 0.0, "learning_rate": 1.7546476434738904e-05, "loss": 0.8065, "step": 814 }, { "epoch": 0.2514268085762764, "grad_norm": 0.0, "learning_rate": 1.7539914301214233e-05, "loss": 0.8347, "step": 815 }, { "epoch": 0.25173530772790376, "grad_norm": 0.0, "learning_rate": 1.7533344634526693e-05, "loss": 0.8738, "step": 816 }, { "epoch": 0.2520438068795311, "grad_norm": 0.0, "learning_rate": 1.7526767441240075e-05, "loss": 0.8786, "step": 817 }, { "epoch": 0.25235230603115844, "grad_norm": 0.0, "learning_rate": 1.7520182727925678e-05, "loss": 0.8297, "step": 818 }, { "epoch": 0.2526608051827858, "grad_norm": 0.0, "learning_rate": 1.751359050116232e-05, "loss": 0.9052, "step": 819 }, { "epoch": 0.25296930433441306, "grad_norm": 0.0, "learning_rate": 1.7506990767536326e-05, "loss": 0.8696, "step": 820 }, { "epoch": 0.2532778034860404, "grad_norm": 0.0, "learning_rate": 1.750038353364152e-05, "loss": 0.8608, "step": 821 }, { "epoch": 0.25358630263766774, "grad_norm": 0.0, "learning_rate": 1.749376880607922e-05, "loss": 0.8843, "step": 822 }, { "epoch": 0.2538948017892951, "grad_norm": 0.0, "learning_rate": 1.748714659145823e-05, "loss": 0.9389, "step": 823 }, { "epoch": 0.2542033009409224, "grad_norm": 0.0, "learning_rate": 1.7480516896394833e-05, "loss": 0.9542, "step": 824 }, { "epoch": 0.25451180009254976, "grad_norm": 0.0, "learning_rate": 1.747387972751279e-05, "loss": 0.9504, "step": 825 }, { "epoch": 0.2548202992441771, "grad_norm": 0.0, "learning_rate": 1.7467235091443326e-05, "loss": 0.934, "step": 826 }, { "epoch": 0.25512879839580443, "grad_norm": 0.0, "learning_rate": 1.7460582994825127e-05, "loss": 0.8076, "step": 827 }, { "epoch": 0.2554372975474317, "grad_norm": 0.0, "learning_rate": 1.7453923444304334e-05, "loss": 0.879, "step": 828 }, { "epoch": 0.25574579669905906, "grad_norm": 0.0, "learning_rate": 1.7447256446534534e-05, "loss": 0.7934, "step": 829 }, { "epoch": 0.2560542958506864, "grad_norm": 0.0, "learning_rate": 1.7440582008176756e-05, "loss": 0.8131, "step": 830 }, { "epoch": 0.25636279500231374, "grad_norm": 0.0, "learning_rate": 1.743390013589946e-05, "loss": 0.8817, "step": 831 }, { "epoch": 0.2566712941539411, "grad_norm": 0.0, "learning_rate": 1.7427210836378535e-05, "loss": 0.8438, "step": 832 }, { "epoch": 0.2569797933055684, "grad_norm": 0.0, "learning_rate": 1.7420514116297294e-05, "loss": 0.8695, "step": 833 }, { "epoch": 0.25728829245719576, "grad_norm": 0.0, "learning_rate": 1.7413809982346458e-05, "loss": 0.8446, "step": 834 }, { "epoch": 0.2575967916088231, "grad_norm": 0.0, "learning_rate": 1.7407098441224154e-05, "loss": 0.8197, "step": 835 }, { "epoch": 0.25790529076045043, "grad_norm": 0.0, "learning_rate": 1.7400379499635926e-05, "loss": 0.8765, "step": 836 }, { "epoch": 0.2582137899120777, "grad_norm": 0.0, "learning_rate": 1.7393653164294685e-05, "loss": 0.8708, "step": 837 }, { "epoch": 0.25852228906370506, "grad_norm": 0.0, "learning_rate": 1.7386919441920748e-05, "loss": 0.7986, "step": 838 }, { "epoch": 0.2588307882153324, "grad_norm": 0.0, "learning_rate": 1.738017833924181e-05, "loss": 0.8454, "step": 839 }, { "epoch": 0.25913928736695974, "grad_norm": 0.0, "learning_rate": 1.737342986299294e-05, "loss": 0.8292, "step": 840 }, { "epoch": 0.2594477865185871, "grad_norm": 0.0, "learning_rate": 1.7366674019916567e-05, "loss": 0.8716, "step": 841 }, { "epoch": 0.2597562856702144, "grad_norm": 0.0, "learning_rate": 1.7359910816762487e-05, "loss": 0.7871, "step": 842 }, { "epoch": 0.26006478482184175, "grad_norm": 0.0, "learning_rate": 1.7353140260287845e-05, "loss": 0.9423, "step": 843 }, { "epoch": 0.2603732839734691, "grad_norm": 0.0, "learning_rate": 1.7346362357257135e-05, "loss": 0.8139, "step": 844 }, { "epoch": 0.26068178312509643, "grad_norm": 0.0, "learning_rate": 1.7339577114442194e-05, "loss": 0.8117, "step": 845 }, { "epoch": 0.2609902822767237, "grad_norm": 0.0, "learning_rate": 1.7332784538622184e-05, "loss": 0.9124, "step": 846 }, { "epoch": 0.26129878142835106, "grad_norm": 0.0, "learning_rate": 1.7325984636583606e-05, "loss": 0.8608, "step": 847 }, { "epoch": 0.2616072805799784, "grad_norm": 0.0, "learning_rate": 1.731917741512027e-05, "loss": 0.8616, "step": 848 }, { "epoch": 0.26191577973160574, "grad_norm": 0.0, "learning_rate": 1.7312362881033293e-05, "loss": 0.8625, "step": 849 }, { "epoch": 0.2622242788832331, "grad_norm": 0.0, "learning_rate": 1.730554104113112e-05, "loss": 0.8, "step": 850 }, { "epoch": 0.2625327780348604, "grad_norm": 0.0, "learning_rate": 1.7298711902229478e-05, "loss": 0.7432, "step": 851 }, { "epoch": 0.26284127718648775, "grad_norm": 0.0, "learning_rate": 1.7291875471151392e-05, "loss": 0.93, "step": 852 }, { "epoch": 0.2631497763381151, "grad_norm": 0.0, "learning_rate": 1.728503175472717e-05, "loss": 0.8965, "step": 853 }, { "epoch": 0.2634582754897424, "grad_norm": 0.0, "learning_rate": 1.7278180759794397e-05, "loss": 0.8166, "step": 854 }, { "epoch": 0.2637667746413697, "grad_norm": 0.0, "learning_rate": 1.727132249319794e-05, "loss": 0.8628, "step": 855 }, { "epoch": 0.26407527379299706, "grad_norm": 0.0, "learning_rate": 1.726445696178992e-05, "loss": 0.8244, "step": 856 }, { "epoch": 0.2643837729446244, "grad_norm": 0.0, "learning_rate": 1.7257584172429723e-05, "loss": 0.7755, "step": 857 }, { "epoch": 0.26469227209625174, "grad_norm": 0.0, "learning_rate": 1.7250704131983984e-05, "loss": 0.7426, "step": 858 }, { "epoch": 0.2650007712478791, "grad_norm": 0.0, "learning_rate": 1.724381684732658e-05, "loss": 0.889, "step": 859 }, { "epoch": 0.2653092703995064, "grad_norm": 0.0, "learning_rate": 1.723692232533863e-05, "loss": 0.9441, "step": 860 }, { "epoch": 0.26561776955113375, "grad_norm": 0.0, "learning_rate": 1.723002057290849e-05, "loss": 0.8384, "step": 861 }, { "epoch": 0.2659262687027611, "grad_norm": 0.0, "learning_rate": 1.7223111596931722e-05, "loss": 0.8027, "step": 862 }, { "epoch": 0.2662347678543884, "grad_norm": 0.0, "learning_rate": 1.721619540431112e-05, "loss": 0.8804, "step": 863 }, { "epoch": 0.2665432670060157, "grad_norm": 0.0, "learning_rate": 1.720927200195668e-05, "loss": 0.8112, "step": 864 }, { "epoch": 0.26685176615764306, "grad_norm": 0.0, "learning_rate": 1.7202341396785613e-05, "loss": 0.9347, "step": 865 }, { "epoch": 0.2671602653092704, "grad_norm": 0.0, "learning_rate": 1.719540359572231e-05, "loss": 0.8728, "step": 866 }, { "epoch": 0.26746876446089773, "grad_norm": 0.0, "learning_rate": 1.7188458605698365e-05, "loss": 0.8349, "step": 867 }, { "epoch": 0.2677772636125251, "grad_norm": 0.0, "learning_rate": 1.7181506433652545e-05, "loss": 0.7284, "step": 868 }, { "epoch": 0.2680857627641524, "grad_norm": 0.0, "learning_rate": 1.71745470865308e-05, "loss": 0.8875, "step": 869 }, { "epoch": 0.26839426191577975, "grad_norm": 0.0, "learning_rate": 1.7167580571286247e-05, "loss": 0.8623, "step": 870 }, { "epoch": 0.2687027610674071, "grad_norm": 0.0, "learning_rate": 1.716060689487916e-05, "loss": 0.817, "step": 871 }, { "epoch": 0.2690112602190344, "grad_norm": 0.0, "learning_rate": 1.7153626064276972e-05, "loss": 0.8666, "step": 872 }, { "epoch": 0.2693197593706617, "grad_norm": 0.0, "learning_rate": 1.7146638086454264e-05, "loss": 0.8331, "step": 873 }, { "epoch": 0.26962825852228905, "grad_norm": 0.0, "learning_rate": 1.7139642968392754e-05, "loss": 0.8649, "step": 874 }, { "epoch": 0.2699367576739164, "grad_norm": 0.0, "learning_rate": 1.71326407170813e-05, "loss": 0.9272, "step": 875 }, { "epoch": 0.27024525682554373, "grad_norm": 0.0, "learning_rate": 1.712563133951588e-05, "loss": 0.8894, "step": 876 }, { "epoch": 0.2705537559771711, "grad_norm": 0.0, "learning_rate": 1.7118614842699595e-05, "loss": 0.8173, "step": 877 }, { "epoch": 0.2708622551287984, "grad_norm": 0.0, "learning_rate": 1.711159123364266e-05, "loss": 0.8226, "step": 878 }, { "epoch": 0.27117075428042575, "grad_norm": 0.0, "learning_rate": 1.7104560519362398e-05, "loss": 0.8828, "step": 879 }, { "epoch": 0.27147925343205304, "grad_norm": 0.0, "learning_rate": 1.7097522706883225e-05, "loss": 0.7943, "step": 880 }, { "epoch": 0.2717877525836804, "grad_norm": 0.0, "learning_rate": 1.709047780323665e-05, "loss": 0.8355, "step": 881 }, { "epoch": 0.2720962517353077, "grad_norm": 0.0, "learning_rate": 1.7083425815461273e-05, "loss": 0.8576, "step": 882 }, { "epoch": 0.27240475088693505, "grad_norm": 0.0, "learning_rate": 1.707636675060276e-05, "loss": 0.8617, "step": 883 }, { "epoch": 0.2727132500385624, "grad_norm": 0.0, "learning_rate": 1.7069300615713866e-05, "loss": 0.7715, "step": 884 }, { "epoch": 0.27302174919018973, "grad_norm": 0.0, "learning_rate": 1.7062227417854388e-05, "loss": 0.8574, "step": 885 }, { "epoch": 0.2733302483418171, "grad_norm": 0.0, "learning_rate": 1.7055147164091197e-05, "loss": 0.8169, "step": 886 }, { "epoch": 0.2736387474934444, "grad_norm": 0.0, "learning_rate": 1.7048059861498205e-05, "loss": 0.8913, "step": 887 }, { "epoch": 0.27394724664507175, "grad_norm": 0.0, "learning_rate": 1.7040965517156365e-05, "loss": 0.898, "step": 888 }, { "epoch": 0.27425574579669904, "grad_norm": 0.0, "learning_rate": 1.703386413815367e-05, "loss": 0.8039, "step": 889 }, { "epoch": 0.2745642449483264, "grad_norm": 0.0, "learning_rate": 1.7026755731585146e-05, "loss": 0.8275, "step": 890 }, { "epoch": 0.2748727440999537, "grad_norm": 0.0, "learning_rate": 1.7019640304552832e-05, "loss": 0.8026, "step": 891 }, { "epoch": 0.27518124325158105, "grad_norm": 0.0, "learning_rate": 1.7012517864165778e-05, "loss": 0.9627, "step": 892 }, { "epoch": 0.2754897424032084, "grad_norm": 0.0, "learning_rate": 1.7005388417540055e-05, "loss": 0.7905, "step": 893 }, { "epoch": 0.27579824155483573, "grad_norm": 0.0, "learning_rate": 1.6998251971798717e-05, "loss": 0.8794, "step": 894 }, { "epoch": 0.27610674070646307, "grad_norm": 0.0, "learning_rate": 1.699110853407183e-05, "loss": 0.8351, "step": 895 }, { "epoch": 0.2764152398580904, "grad_norm": 0.0, "learning_rate": 1.6983958111496428e-05, "loss": 0.8739, "step": 896 }, { "epoch": 0.27672373900971775, "grad_norm": 0.0, "learning_rate": 1.6976800711216527e-05, "loss": 0.9094, "step": 897 }, { "epoch": 0.27703223816134503, "grad_norm": 0.0, "learning_rate": 1.6969636340383134e-05, "loss": 0.8181, "step": 898 }, { "epoch": 0.2773407373129724, "grad_norm": 0.0, "learning_rate": 1.6962465006154186e-05, "loss": 0.8547, "step": 899 }, { "epoch": 0.2776492364645997, "grad_norm": 0.0, "learning_rate": 1.695528671569461e-05, "loss": 0.8662, "step": 900 }, { "epoch": 0.27795773561622705, "grad_norm": 0.0, "learning_rate": 1.694810147617626e-05, "loss": 0.8293, "step": 901 }, { "epoch": 0.2782662347678544, "grad_norm": 0.0, "learning_rate": 1.6940909294777945e-05, "loss": 0.8625, "step": 902 }, { "epoch": 0.27857473391948173, "grad_norm": 0.0, "learning_rate": 1.6933710178685406e-05, "loss": 0.8169, "step": 903 }, { "epoch": 0.27888323307110907, "grad_norm": 0.0, "learning_rate": 1.6926504135091315e-05, "loss": 0.7649, "step": 904 }, { "epoch": 0.2791917322227364, "grad_norm": 0.0, "learning_rate": 1.691929117119526e-05, "loss": 0.8727, "step": 905 }, { "epoch": 0.2795002313743637, "grad_norm": 0.0, "learning_rate": 1.6912071294203746e-05, "loss": 0.8505, "step": 906 }, { "epoch": 0.27980873052599103, "grad_norm": 0.0, "learning_rate": 1.690484451133019e-05, "loss": 0.817, "step": 907 }, { "epoch": 0.2801172296776184, "grad_norm": 0.0, "learning_rate": 1.6897610829794898e-05, "loss": 0.8791, "step": 908 }, { "epoch": 0.2804257288292457, "grad_norm": 0.0, "learning_rate": 1.6890370256825077e-05, "loss": 0.7989, "step": 909 }, { "epoch": 0.28073422798087305, "grad_norm": 0.0, "learning_rate": 1.6883122799654814e-05, "loss": 0.8845, "step": 910 }, { "epoch": 0.2810427271325004, "grad_norm": 0.0, "learning_rate": 1.6875868465525084e-05, "loss": 0.8552, "step": 911 }, { "epoch": 0.28135122628412773, "grad_norm": 0.0, "learning_rate": 1.6868607261683716e-05, "loss": 0.9005, "step": 912 }, { "epoch": 0.28165972543575507, "grad_norm": 0.0, "learning_rate": 1.686133919538542e-05, "loss": 0.8005, "step": 913 }, { "epoch": 0.2819682245873824, "grad_norm": 0.0, "learning_rate": 1.685406427389175e-05, "loss": 0.8123, "step": 914 }, { "epoch": 0.2822767237390097, "grad_norm": 0.0, "learning_rate": 1.6846782504471112e-05, "loss": 0.8071, "step": 915 }, { "epoch": 0.28258522289063703, "grad_norm": 0.0, "learning_rate": 1.6839493894398753e-05, "loss": 0.8606, "step": 916 }, { "epoch": 0.2828937220422644, "grad_norm": 0.0, "learning_rate": 1.6832198450956766e-05, "loss": 0.8498, "step": 917 }, { "epoch": 0.2832022211938917, "grad_norm": 0.0, "learning_rate": 1.6824896181434055e-05, "loss": 0.8226, "step": 918 }, { "epoch": 0.28351072034551905, "grad_norm": 0.0, "learning_rate": 1.6817587093126354e-05, "loss": 0.9171, "step": 919 }, { "epoch": 0.2838192194971464, "grad_norm": 0.0, "learning_rate": 1.6810271193336203e-05, "loss": 0.7303, "step": 920 }, { "epoch": 0.28412771864877373, "grad_norm": 0.0, "learning_rate": 1.6802948489372956e-05, "loss": 0.8212, "step": 921 }, { "epoch": 0.28443621780040107, "grad_norm": 0.0, "learning_rate": 1.6795618988552754e-05, "loss": 0.7725, "step": 922 }, { "epoch": 0.2847447169520284, "grad_norm": 0.0, "learning_rate": 1.6788282698198536e-05, "loss": 0.8705, "step": 923 }, { "epoch": 0.2850532161036557, "grad_norm": 0.0, "learning_rate": 1.678093962564003e-05, "loss": 0.7047, "step": 924 }, { "epoch": 0.28536171525528303, "grad_norm": 0.0, "learning_rate": 1.6773589778213724e-05, "loss": 0.7995, "step": 925 }, { "epoch": 0.28567021440691037, "grad_norm": 0.0, "learning_rate": 1.6766233163262893e-05, "loss": 0.8124, "step": 926 }, { "epoch": 0.2859787135585377, "grad_norm": 0.0, "learning_rate": 1.675886978813756e-05, "loss": 0.9003, "step": 927 }, { "epoch": 0.28628721271016505, "grad_norm": 0.0, "learning_rate": 1.6751499660194502e-05, "loss": 0.8927, "step": 928 }, { "epoch": 0.2865957118617924, "grad_norm": 0.0, "learning_rate": 1.6744122786797254e-05, "loss": 0.8025, "step": 929 }, { "epoch": 0.28690421101341973, "grad_norm": 0.0, "learning_rate": 1.6736739175316086e-05, "loss": 0.9125, "step": 930 }, { "epoch": 0.28721271016504707, "grad_norm": 0.0, "learning_rate": 1.672934883312799e-05, "loss": 0.8202, "step": 931 }, { "epoch": 0.28752120931667435, "grad_norm": 0.0, "learning_rate": 1.6721951767616696e-05, "loss": 0.8578, "step": 932 }, { "epoch": 0.2878297084683017, "grad_norm": 0.0, "learning_rate": 1.671454798617265e-05, "loss": 0.8437, "step": 933 }, { "epoch": 0.28813820761992903, "grad_norm": 0.0, "learning_rate": 1.6707137496192994e-05, "loss": 0.8265, "step": 934 }, { "epoch": 0.28844670677155637, "grad_norm": 0.0, "learning_rate": 1.669972030508159e-05, "loss": 0.8087, "step": 935 }, { "epoch": 0.2887552059231837, "grad_norm": 0.0, "learning_rate": 1.6692296420248985e-05, "loss": 0.7741, "step": 936 }, { "epoch": 0.28906370507481105, "grad_norm": 0.0, "learning_rate": 1.6684865849112414e-05, "loss": 0.8904, "step": 937 }, { "epoch": 0.2893722042264384, "grad_norm": 0.0, "learning_rate": 1.6677428599095796e-05, "loss": 0.867, "step": 938 }, { "epoch": 0.28968070337806573, "grad_norm": 0.0, "learning_rate": 1.666998467762973e-05, "loss": 0.765, "step": 939 }, { "epoch": 0.28998920252969307, "grad_norm": 0.0, "learning_rate": 1.6662534092151457e-05, "loss": 0.7802, "step": 940 }, { "epoch": 0.29029770168132035, "grad_norm": 0.0, "learning_rate": 1.6655076850104902e-05, "loss": 0.8506, "step": 941 }, { "epoch": 0.2906062008329477, "grad_norm": 0.0, "learning_rate": 1.6647612958940622e-05, "loss": 0.8015, "step": 942 }, { "epoch": 0.29091469998457503, "grad_norm": 0.0, "learning_rate": 1.6640142426115833e-05, "loss": 0.8601, "step": 943 }, { "epoch": 0.29122319913620237, "grad_norm": 0.0, "learning_rate": 1.663266525909437e-05, "loss": 0.8014, "step": 944 }, { "epoch": 0.2915316982878297, "grad_norm": 0.0, "learning_rate": 1.6625181465346717e-05, "loss": 0.7378, "step": 945 }, { "epoch": 0.29184019743945705, "grad_norm": 0.0, "learning_rate": 1.6617691052349954e-05, "loss": 0.9138, "step": 946 }, { "epoch": 0.2921486965910844, "grad_norm": 0.0, "learning_rate": 1.661019402758779e-05, "loss": 0.8472, "step": 947 }, { "epoch": 0.29245719574271173, "grad_norm": 0.0, "learning_rate": 1.6602690398550542e-05, "loss": 0.8478, "step": 948 }, { "epoch": 0.29276569489433907, "grad_norm": 0.0, "learning_rate": 1.6595180172735116e-05, "loss": 0.8009, "step": 949 }, { "epoch": 0.29307419404596635, "grad_norm": 0.0, "learning_rate": 1.658766335764501e-05, "loss": 0.8826, "step": 950 }, { "epoch": 0.2933826931975937, "grad_norm": 0.0, "learning_rate": 1.6580139960790316e-05, "loss": 0.89, "step": 951 }, { "epoch": 0.29369119234922103, "grad_norm": 0.0, "learning_rate": 1.6572609989687687e-05, "loss": 0.8615, "step": 952 }, { "epoch": 0.29399969150084837, "grad_norm": 0.0, "learning_rate": 1.6565073451860355e-05, "loss": 0.8855, "step": 953 }, { "epoch": 0.2943081906524757, "grad_norm": 0.0, "learning_rate": 1.6557530354838108e-05, "loss": 0.824, "step": 954 }, { "epoch": 0.29461668980410305, "grad_norm": 0.0, "learning_rate": 1.6549980706157295e-05, "loss": 0.8551, "step": 955 }, { "epoch": 0.2949251889557304, "grad_norm": 0.0, "learning_rate": 1.6542424513360793e-05, "loss": 0.8421, "step": 956 }, { "epoch": 0.29523368810735773, "grad_norm": 0.0, "learning_rate": 1.653486178399804e-05, "loss": 0.8077, "step": 957 }, { "epoch": 0.295542187258985, "grad_norm": 0.0, "learning_rate": 1.6527292525624986e-05, "loss": 0.8392, "step": 958 }, { "epoch": 0.29585068641061235, "grad_norm": 0.0, "learning_rate": 1.6519716745804112e-05, "loss": 0.7634, "step": 959 }, { "epoch": 0.2961591855622397, "grad_norm": 0.0, "learning_rate": 1.651213445210442e-05, "loss": 0.7943, "step": 960 }, { "epoch": 0.29646768471386703, "grad_norm": 0.0, "learning_rate": 1.650454565210141e-05, "loss": 0.848, "step": 961 }, { "epoch": 0.29677618386549437, "grad_norm": 0.0, "learning_rate": 1.649695035337709e-05, "loss": 0.8384, "step": 962 }, { "epoch": 0.2970846830171217, "grad_norm": 0.0, "learning_rate": 1.648934856351995e-05, "loss": 0.8023, "step": 963 }, { "epoch": 0.29739318216874905, "grad_norm": 0.0, "learning_rate": 1.648174029012498e-05, "loss": 0.872, "step": 964 }, { "epoch": 0.2977016813203764, "grad_norm": 0.0, "learning_rate": 1.647412554079364e-05, "loss": 0.7895, "step": 965 }, { "epoch": 0.2980101804720037, "grad_norm": 0.0, "learning_rate": 1.6466504323133857e-05, "loss": 0.8942, "step": 966 }, { "epoch": 0.298318679623631, "grad_norm": 0.0, "learning_rate": 1.6458876644760033e-05, "loss": 0.877, "step": 967 }, { "epoch": 0.29862717877525835, "grad_norm": 0.0, "learning_rate": 1.6451242513293005e-05, "loss": 0.7891, "step": 968 }, { "epoch": 0.2989356779268857, "grad_norm": 0.0, "learning_rate": 1.644360193636008e-05, "loss": 0.863, "step": 969 }, { "epoch": 0.29924417707851303, "grad_norm": 0.0, "learning_rate": 1.6435954921594985e-05, "loss": 0.8448, "step": 970 }, { "epoch": 0.29955267623014037, "grad_norm": 0.0, "learning_rate": 1.642830147663789e-05, "loss": 0.8145, "step": 971 }, { "epoch": 0.2998611753817677, "grad_norm": 0.0, "learning_rate": 1.6420641609135388e-05, "loss": 0.8186, "step": 972 }, { "epoch": 0.30016967453339505, "grad_norm": 0.0, "learning_rate": 1.6412975326740485e-05, "loss": 0.7535, "step": 973 }, { "epoch": 0.3004781736850224, "grad_norm": 0.0, "learning_rate": 1.6405302637112598e-05, "loss": 0.8412, "step": 974 }, { "epoch": 0.3007866728366497, "grad_norm": 0.0, "learning_rate": 1.6397623547917553e-05, "loss": 0.7729, "step": 975 }, { "epoch": 0.301095171988277, "grad_norm": 0.0, "learning_rate": 1.6389938066827556e-05, "loss": 0.7915, "step": 976 }, { "epoch": 0.30140367113990435, "grad_norm": 0.0, "learning_rate": 1.6382246201521213e-05, "loss": 0.7591, "step": 977 }, { "epoch": 0.3017121702915317, "grad_norm": 0.0, "learning_rate": 1.6374547959683497e-05, "loss": 0.8292, "step": 978 }, { "epoch": 0.30202066944315903, "grad_norm": 0.0, "learning_rate": 1.6366843349005755e-05, "loss": 0.7889, "step": 979 }, { "epoch": 0.30232916859478637, "grad_norm": 0.0, "learning_rate": 1.63591323771857e-05, "loss": 0.7864, "step": 980 }, { "epoch": 0.3026376677464137, "grad_norm": 0.0, "learning_rate": 1.6351415051927407e-05, "loss": 0.8059, "step": 981 }, { "epoch": 0.30294616689804105, "grad_norm": 0.0, "learning_rate": 1.634369138094128e-05, "loss": 0.8261, "step": 982 }, { "epoch": 0.3032546660496684, "grad_norm": 0.0, "learning_rate": 1.6335961371944084e-05, "loss": 0.8465, "step": 983 }, { "epoch": 0.30356316520129567, "grad_norm": 0.0, "learning_rate": 1.6328225032658892e-05, "loss": 0.8349, "step": 984 }, { "epoch": 0.303871664352923, "grad_norm": 0.0, "learning_rate": 1.6320482370815132e-05, "loss": 0.9298, "step": 985 }, { "epoch": 0.30418016350455035, "grad_norm": 0.0, "learning_rate": 1.6312733394148524e-05, "loss": 0.7644, "step": 986 }, { "epoch": 0.3044886626561777, "grad_norm": 0.0, "learning_rate": 1.6304978110401106e-05, "loss": 0.8733, "step": 987 }, { "epoch": 0.30479716180780503, "grad_norm": 0.0, "learning_rate": 1.6297216527321223e-05, "loss": 0.7945, "step": 988 }, { "epoch": 0.30510566095943237, "grad_norm": 0.0, "learning_rate": 1.62894486526635e-05, "loss": 0.8094, "step": 989 }, { "epoch": 0.3054141601110597, "grad_norm": 0.0, "learning_rate": 1.6281674494188863e-05, "loss": 0.8753, "step": 990 }, { "epoch": 0.30572265926268705, "grad_norm": 0.0, "learning_rate": 1.6273894059664507e-05, "loss": 0.7734, "step": 991 }, { "epoch": 0.3060311584143144, "grad_norm": 0.0, "learning_rate": 1.62661073568639e-05, "loss": 0.8767, "step": 992 }, { "epoch": 0.30633965756594167, "grad_norm": 0.0, "learning_rate": 1.625831439356677e-05, "loss": 0.843, "step": 993 }, { "epoch": 0.306648156717569, "grad_norm": 0.0, "learning_rate": 1.6250515177559106e-05, "loss": 0.8475, "step": 994 }, { "epoch": 0.30695665586919635, "grad_norm": 0.0, "learning_rate": 1.6242709716633137e-05, "loss": 0.806, "step": 995 }, { "epoch": 0.3072651550208237, "grad_norm": 0.0, "learning_rate": 1.6234898018587336e-05, "loss": 0.8225, "step": 996 }, { "epoch": 0.307573654172451, "grad_norm": 0.0, "learning_rate": 1.622708009122641e-05, "loss": 0.8084, "step": 997 }, { "epoch": 0.30788215332407837, "grad_norm": 0.0, "learning_rate": 1.621925594236128e-05, "loss": 0.8576, "step": 998 }, { "epoch": 0.3081906524757057, "grad_norm": 0.0, "learning_rate": 1.621142557980909e-05, "loss": 0.917, "step": 999 }, { "epoch": 0.30849915162733305, "grad_norm": 0.0, "learning_rate": 1.6203589011393198e-05, "loss": 0.8328, "step": 1000 }, { "epoch": 0.3088076507789604, "grad_norm": 0.0, "learning_rate": 1.6195746244943142e-05, "loss": 1.1333, "step": 1001 }, { "epoch": 0.30911614993058767, "grad_norm": 0.0, "learning_rate": 1.618789728829468e-05, "loss": 0.8886, "step": 1002 }, { "epoch": 0.309424649082215, "grad_norm": 0.0, "learning_rate": 1.618004214928973e-05, "loss": 0.7299, "step": 1003 }, { "epoch": 0.30973314823384235, "grad_norm": 0.0, "learning_rate": 1.6172180835776404e-05, "loss": 0.8005, "step": 1004 }, { "epoch": 0.3100416473854697, "grad_norm": 0.0, "learning_rate": 1.6164313355608974e-05, "loss": 0.7836, "step": 1005 }, { "epoch": 0.310350146537097, "grad_norm": 0.0, "learning_rate": 1.6156439716647875e-05, "loss": 0.8636, "step": 1006 }, { "epoch": 0.31065864568872437, "grad_norm": 0.0, "learning_rate": 1.6148559926759694e-05, "loss": 0.8338, "step": 1007 }, { "epoch": 0.3109671448403517, "grad_norm": 0.0, "learning_rate": 1.614067399381717e-05, "loss": 0.8371, "step": 1008 }, { "epoch": 0.31127564399197905, "grad_norm": 0.0, "learning_rate": 1.6132781925699168e-05, "loss": 1.1121, "step": 1009 }, { "epoch": 0.31158414314360633, "grad_norm": 0.0, "learning_rate": 1.6124883730290695e-05, "loss": 0.8098, "step": 1010 }, { "epoch": 0.31189264229523367, "grad_norm": 0.0, "learning_rate": 1.6116979415482875e-05, "loss": 0.9123, "step": 1011 }, { "epoch": 0.312201141446861, "grad_norm": 0.0, "learning_rate": 1.6109068989172937e-05, "loss": 0.8123, "step": 1012 }, { "epoch": 0.31250964059848835, "grad_norm": 0.0, "learning_rate": 1.610115245926423e-05, "loss": 0.8503, "step": 1013 }, { "epoch": 0.3128181397501157, "grad_norm": 0.0, "learning_rate": 1.60932298336662e-05, "loss": 0.7881, "step": 1014 }, { "epoch": 0.313126638901743, "grad_norm": 0.0, "learning_rate": 1.608530112029437e-05, "loss": 0.786, "step": 1015 }, { "epoch": 0.31343513805337037, "grad_norm": 0.0, "learning_rate": 1.6077366327070354e-05, "loss": 0.8752, "step": 1016 }, { "epoch": 0.3137436372049977, "grad_norm": 0.0, "learning_rate": 1.606942546192185e-05, "loss": 0.8288, "step": 1017 }, { "epoch": 0.31405213635662504, "grad_norm": 0.0, "learning_rate": 1.60614785327826e-05, "loss": 0.8726, "step": 1018 }, { "epoch": 0.31436063550825233, "grad_norm": 0.0, "learning_rate": 1.6053525547592424e-05, "loss": 0.7704, "step": 1019 }, { "epoch": 0.31466913465987967, "grad_norm": 0.0, "learning_rate": 1.6045566514297184e-05, "loss": 0.8248, "step": 1020 }, { "epoch": 0.314977633811507, "grad_norm": 0.0, "learning_rate": 1.603760144084879e-05, "loss": 0.8371, "step": 1021 }, { "epoch": 0.31528613296313435, "grad_norm": 0.0, "learning_rate": 1.602963033520518e-05, "loss": 0.829, "step": 1022 }, { "epoch": 0.3155946321147617, "grad_norm": 0.0, "learning_rate": 1.602165320533032e-05, "loss": 0.7444, "step": 1023 }, { "epoch": 0.315903131266389, "grad_norm": 0.0, "learning_rate": 1.6013670059194203e-05, "loss": 0.7885, "step": 1024 }, { "epoch": 0.31621163041801637, "grad_norm": 0.0, "learning_rate": 1.6005680904772822e-05, "loss": 0.7537, "step": 1025 }, { "epoch": 0.3165201295696437, "grad_norm": 0.0, "learning_rate": 1.5997685750048183e-05, "loss": 0.8151, "step": 1026 }, { "epoch": 0.31682862872127104, "grad_norm": 0.0, "learning_rate": 1.5989684603008274e-05, "loss": 0.7634, "step": 1027 }, { "epoch": 0.31713712787289833, "grad_norm": 0.0, "learning_rate": 1.5981677471647085e-05, "loss": 0.802, "step": 1028 }, { "epoch": 0.31744562702452567, "grad_norm": 0.0, "learning_rate": 1.5973664363964573e-05, "loss": 0.811, "step": 1029 }, { "epoch": 0.317754126176153, "grad_norm": 0.0, "learning_rate": 1.5965645287966674e-05, "loss": 0.8126, "step": 1030 }, { "epoch": 0.31806262532778035, "grad_norm": 0.0, "learning_rate": 1.5957620251665272e-05, "loss": 0.8259, "step": 1031 }, { "epoch": 0.3183711244794077, "grad_norm": 0.0, "learning_rate": 1.594958926307824e-05, "loss": 0.7898, "step": 1032 }, { "epoch": 0.318679623631035, "grad_norm": 0.0, "learning_rate": 1.5941552330229352e-05, "loss": 0.8517, "step": 1033 }, { "epoch": 0.31898812278266236, "grad_norm": 0.0, "learning_rate": 1.593350946114836e-05, "loss": 0.7696, "step": 1034 }, { "epoch": 0.3192966219342897, "grad_norm": 0.0, "learning_rate": 1.592546066387092e-05, "loss": 0.7539, "step": 1035 }, { "epoch": 0.319605121085917, "grad_norm": 0.0, "learning_rate": 1.5917405946438635e-05, "loss": 0.9824, "step": 1036 }, { "epoch": 0.3199136202375443, "grad_norm": 0.0, "learning_rate": 1.5909345316899e-05, "loss": 0.862, "step": 1037 }, { "epoch": 0.32022211938917167, "grad_norm": 0.0, "learning_rate": 1.590127878330543e-05, "loss": 0.8534, "step": 1038 }, { "epoch": 0.320530618540799, "grad_norm": 0.0, "learning_rate": 1.5893206353717234e-05, "loss": 0.8594, "step": 1039 }, { "epoch": 0.32083911769242635, "grad_norm": 0.0, "learning_rate": 1.5885128036199615e-05, "loss": 0.8474, "step": 1040 }, { "epoch": 0.3211476168440537, "grad_norm": 0.0, "learning_rate": 1.587704383882366e-05, "loss": 0.7864, "step": 1041 }, { "epoch": 0.321456115995681, "grad_norm": 0.0, "learning_rate": 1.586895376966632e-05, "loss": 0.9179, "step": 1042 }, { "epoch": 0.32176461514730836, "grad_norm": 0.0, "learning_rate": 1.5860857836810427e-05, "loss": 0.7946, "step": 1043 }, { "epoch": 0.3220731142989357, "grad_norm": 0.0, "learning_rate": 1.585275604834466e-05, "loss": 0.8053, "step": 1044 }, { "epoch": 0.322381613450563, "grad_norm": 0.0, "learning_rate": 1.584464841236356e-05, "loss": 0.8583, "step": 1045 }, { "epoch": 0.3226901126021903, "grad_norm": 0.0, "learning_rate": 1.5836534936967493e-05, "loss": 0.796, "step": 1046 }, { "epoch": 0.32299861175381767, "grad_norm": 0.0, "learning_rate": 1.5828415630262678e-05, "loss": 0.864, "step": 1047 }, { "epoch": 0.323307110905445, "grad_norm": 0.0, "learning_rate": 1.5820290500361147e-05, "loss": 0.791, "step": 1048 }, { "epoch": 0.32361561005707234, "grad_norm": 0.0, "learning_rate": 1.5812159555380752e-05, "loss": 0.8613, "step": 1049 }, { "epoch": 0.3239241092086997, "grad_norm": 0.0, "learning_rate": 1.5804022803445164e-05, "loss": 0.9215, "step": 1050 }, { "epoch": 0.324232608360327, "grad_norm": 0.0, "learning_rate": 1.5795880252683848e-05, "loss": 0.8645, "step": 1051 }, { "epoch": 0.32454110751195436, "grad_norm": 0.0, "learning_rate": 1.5787731911232057e-05, "loss": 0.8505, "step": 1052 }, { "epoch": 0.3248496066635817, "grad_norm": 0.0, "learning_rate": 1.5779577787230843e-05, "loss": 0.8863, "step": 1053 }, { "epoch": 0.325158105815209, "grad_norm": 0.0, "learning_rate": 1.5771417888827026e-05, "loss": 0.7677, "step": 1054 }, { "epoch": 0.3254666049668363, "grad_norm": 0.0, "learning_rate": 1.5763252224173196e-05, "loss": 0.7598, "step": 1055 }, { "epoch": 0.32577510411846367, "grad_norm": 0.0, "learning_rate": 1.575508080142771e-05, "loss": 0.8464, "step": 1056 }, { "epoch": 0.326083603270091, "grad_norm": 0.0, "learning_rate": 1.5746903628754672e-05, "loss": 0.7798, "step": 1057 }, { "epoch": 0.32639210242171834, "grad_norm": 0.0, "learning_rate": 1.5738720714323935e-05, "loss": 0.8526, "step": 1058 }, { "epoch": 0.3267006015733457, "grad_norm": 0.0, "learning_rate": 1.573053206631108e-05, "loss": 0.8373, "step": 1059 }, { "epoch": 0.327009100724973, "grad_norm": 0.0, "learning_rate": 1.5722337692897428e-05, "loss": 0.7411, "step": 1060 }, { "epoch": 0.32731759987660036, "grad_norm": 0.0, "learning_rate": 1.571413760227001e-05, "loss": 0.7517, "step": 1061 }, { "epoch": 0.32762609902822765, "grad_norm": 0.0, "learning_rate": 1.5705931802621583e-05, "loss": 0.7368, "step": 1062 }, { "epoch": 0.327934598179855, "grad_norm": 0.0, "learning_rate": 1.569772030215059e-05, "loss": 0.8144, "step": 1063 }, { "epoch": 0.3282430973314823, "grad_norm": 0.0, "learning_rate": 1.5689503109061185e-05, "loss": 0.8972, "step": 1064 }, { "epoch": 0.32855159648310966, "grad_norm": 0.0, "learning_rate": 1.5681280231563196e-05, "loss": 0.8487, "step": 1065 }, { "epoch": 0.328860095634737, "grad_norm": 0.0, "learning_rate": 1.5673051677872143e-05, "loss": 0.8097, "step": 1066 }, { "epoch": 0.32916859478636434, "grad_norm": 0.0, "learning_rate": 1.566481745620921e-05, "loss": 0.8631, "step": 1067 }, { "epoch": 0.3294770939379917, "grad_norm": 0.0, "learning_rate": 1.565657757480125e-05, "loss": 0.7832, "step": 1068 }, { "epoch": 0.329785593089619, "grad_norm": 0.0, "learning_rate": 1.564833204188076e-05, "loss": 0.8752, "step": 1069 }, { "epoch": 0.33009409224124636, "grad_norm": 0.0, "learning_rate": 1.5640080865685888e-05, "loss": 0.8046, "step": 1070 }, { "epoch": 0.33040259139287365, "grad_norm": 0.0, "learning_rate": 1.563182405446043e-05, "loss": 0.8132, "step": 1071 }, { "epoch": 0.330711090544501, "grad_norm": 0.0, "learning_rate": 1.5623561616453798e-05, "loss": 0.8263, "step": 1072 }, { "epoch": 0.3310195896961283, "grad_norm": 0.0, "learning_rate": 1.5615293559921037e-05, "loss": 0.8533, "step": 1073 }, { "epoch": 0.33132808884775566, "grad_norm": 0.0, "learning_rate": 1.5607019893122792e-05, "loss": 0.7371, "step": 1074 }, { "epoch": 0.331636587999383, "grad_norm": 0.0, "learning_rate": 1.5598740624325325e-05, "loss": 0.9294, "step": 1075 }, { "epoch": 0.33194508715101034, "grad_norm": 0.0, "learning_rate": 1.5590455761800494e-05, "loss": 0.7979, "step": 1076 }, { "epoch": 0.3322535863026377, "grad_norm": 0.0, "learning_rate": 1.558216531382574e-05, "loss": 0.8072, "step": 1077 }, { "epoch": 0.332562085454265, "grad_norm": 0.0, "learning_rate": 1.5573869288684087e-05, "loss": 0.8374, "step": 1078 }, { "epoch": 0.33287058460589236, "grad_norm": 0.0, "learning_rate": 1.556556769466414e-05, "loss": 0.7954, "step": 1079 }, { "epoch": 0.33317908375751965, "grad_norm": 0.0, "learning_rate": 1.5557260540060047e-05, "loss": 0.7761, "step": 1080 }, { "epoch": 0.333487582909147, "grad_norm": 0.0, "learning_rate": 1.554894783317153e-05, "loss": 0.8671, "step": 1081 }, { "epoch": 0.3337960820607743, "grad_norm": 0.0, "learning_rate": 1.554062958230385e-05, "loss": 0.8552, "step": 1082 }, { "epoch": 0.33410458121240166, "grad_norm": 0.0, "learning_rate": 1.5532305795767817e-05, "loss": 0.844, "step": 1083 }, { "epoch": 0.334413080364029, "grad_norm": 0.0, "learning_rate": 1.5523976481879754e-05, "loss": 0.7294, "step": 1084 }, { "epoch": 0.33472157951565634, "grad_norm": 0.0, "learning_rate": 1.5515641648961526e-05, "loss": 0.8419, "step": 1085 }, { "epoch": 0.3350300786672837, "grad_norm": 0.0, "learning_rate": 1.5507301305340496e-05, "loss": 0.8752, "step": 1086 }, { "epoch": 0.335338577818911, "grad_norm": 0.0, "learning_rate": 1.549895545934954e-05, "loss": 0.7934, "step": 1087 }, { "epoch": 0.3356470769705383, "grad_norm": 0.0, "learning_rate": 1.549060411932704e-05, "loss": 0.928, "step": 1088 }, { "epoch": 0.33595557612216564, "grad_norm": 0.0, "learning_rate": 1.5482247293616843e-05, "loss": 0.859, "step": 1089 }, { "epoch": 0.336264075273793, "grad_norm": 0.0, "learning_rate": 1.5473884990568298e-05, "loss": 0.9184, "step": 1090 }, { "epoch": 0.3365725744254203, "grad_norm": 0.0, "learning_rate": 1.5465517218536228e-05, "loss": 0.7502, "step": 1091 }, { "epoch": 0.33688107357704766, "grad_norm": 0.0, "learning_rate": 1.5457143985880905e-05, "loss": 0.7997, "step": 1092 }, { "epoch": 0.337189572728675, "grad_norm": 0.0, "learning_rate": 1.5448765300968066e-05, "loss": 0.7976, "step": 1093 }, { "epoch": 0.33749807188030234, "grad_norm": 0.0, "learning_rate": 1.544038117216889e-05, "loss": 0.7606, "step": 1094 }, { "epoch": 0.3378065710319297, "grad_norm": 0.0, "learning_rate": 1.5431991607859997e-05, "loss": 0.782, "step": 1095 }, { "epoch": 0.338115070183557, "grad_norm": 0.0, "learning_rate": 1.542359661642345e-05, "loss": 0.7834, "step": 1096 }, { "epoch": 0.3384235693351843, "grad_norm": 0.0, "learning_rate": 1.5415196206246712e-05, "loss": 0.8193, "step": 1097 }, { "epoch": 0.33873206848681164, "grad_norm": 0.0, "learning_rate": 1.5406790385722676e-05, "loss": 0.8953, "step": 1098 }, { "epoch": 0.339040567638439, "grad_norm": 0.0, "learning_rate": 1.5398379163249636e-05, "loss": 0.7241, "step": 1099 }, { "epoch": 0.3393490667900663, "grad_norm": 0.0, "learning_rate": 1.5389962547231286e-05, "loss": 0.8359, "step": 1100 }, { "epoch": 0.33965756594169366, "grad_norm": 0.0, "learning_rate": 1.5381540546076694e-05, "loss": 0.7672, "step": 1101 }, { "epoch": 0.339966065093321, "grad_norm": 0.0, "learning_rate": 1.5373113168200332e-05, "loss": 0.7012, "step": 1102 }, { "epoch": 0.34027456424494834, "grad_norm": 0.0, "learning_rate": 1.536468042202203e-05, "loss": 0.7484, "step": 1103 }, { "epoch": 0.3405830633965757, "grad_norm": 0.0, "learning_rate": 1.5356242315966974e-05, "loss": 0.8494, "step": 1104 }, { "epoch": 0.340891562548203, "grad_norm": 0.0, "learning_rate": 1.5347798858465727e-05, "loss": 0.8927, "step": 1105 }, { "epoch": 0.3412000616998303, "grad_norm": 0.0, "learning_rate": 1.5339350057954178e-05, "loss": 0.8585, "step": 1106 }, { "epoch": 0.34150856085145764, "grad_norm": 0.0, "learning_rate": 1.5330895922873562e-05, "loss": 0.8511, "step": 1107 }, { "epoch": 0.341817060003085, "grad_norm": 0.0, "learning_rate": 1.5322436461670445e-05, "loss": 0.8373, "step": 1108 }, { "epoch": 0.3421255591547123, "grad_norm": 0.0, "learning_rate": 1.531397168279672e-05, "loss": 0.7488, "step": 1109 }, { "epoch": 0.34243405830633966, "grad_norm": 0.0, "learning_rate": 1.5305501594709578e-05, "loss": 0.78, "step": 1110 }, { "epoch": 0.342742557457967, "grad_norm": 0.0, "learning_rate": 1.5297026205871528e-05, "loss": 0.8525, "step": 1111 }, { "epoch": 0.34305105660959434, "grad_norm": 0.0, "learning_rate": 1.5288545524750366e-05, "loss": 0.7766, "step": 1112 }, { "epoch": 0.3433595557612217, "grad_norm": 0.0, "learning_rate": 1.5280059559819177e-05, "loss": 0.7915, "step": 1113 }, { "epoch": 0.34366805491284896, "grad_norm": 0.0, "learning_rate": 1.5271568319556336e-05, "loss": 0.8904, "step": 1114 }, { "epoch": 0.3439765540644763, "grad_norm": 0.0, "learning_rate": 1.5263071812445475e-05, "loss": 0.7938, "step": 1115 }, { "epoch": 0.34428505321610364, "grad_norm": 0.0, "learning_rate": 1.525457004697549e-05, "loss": 0.7644, "step": 1116 }, { "epoch": 0.344593552367731, "grad_norm": 0.0, "learning_rate": 1.524606303164054e-05, "loss": 0.829, "step": 1117 }, { "epoch": 0.3449020515193583, "grad_norm": 0.0, "learning_rate": 1.5237550774940018e-05, "loss": 0.8274, "step": 1118 }, { "epoch": 0.34521055067098566, "grad_norm": 0.0, "learning_rate": 1.522903328537856e-05, "loss": 0.7864, "step": 1119 }, { "epoch": 0.345519049822613, "grad_norm": 0.0, "learning_rate": 1.522051057146603e-05, "loss": 0.7428, "step": 1120 }, { "epoch": 0.34582754897424034, "grad_norm": 0.0, "learning_rate": 1.5211982641717509e-05, "loss": 0.7813, "step": 1121 }, { "epoch": 0.3461360481258677, "grad_norm": 0.0, "learning_rate": 1.5203449504653294e-05, "loss": 0.8042, "step": 1122 }, { "epoch": 0.34644454727749496, "grad_norm": 0.0, "learning_rate": 1.5194911168798876e-05, "loss": 0.8232, "step": 1123 }, { "epoch": 0.3467530464291223, "grad_norm": 0.0, "learning_rate": 1.5186367642684952e-05, "loss": 0.8271, "step": 1124 }, { "epoch": 0.34706154558074964, "grad_norm": 0.0, "learning_rate": 1.517781893484739e-05, "loss": 0.8243, "step": 1125 }, { "epoch": 0.347370044732377, "grad_norm": 0.0, "learning_rate": 1.5169265053827246e-05, "loss": 0.7355, "step": 1126 }, { "epoch": 0.3476785438840043, "grad_norm": 0.0, "learning_rate": 1.5160706008170744e-05, "loss": 0.7534, "step": 1127 }, { "epoch": 0.34798704303563166, "grad_norm": 0.0, "learning_rate": 1.5152141806429268e-05, "loss": 0.7316, "step": 1128 }, { "epoch": 0.348295542187259, "grad_norm": 0.0, "learning_rate": 1.5143572457159344e-05, "loss": 0.8143, "step": 1129 }, { "epoch": 0.34860404133888634, "grad_norm": 0.0, "learning_rate": 1.5134997968922655e-05, "loss": 0.7644, "step": 1130 }, { "epoch": 0.3489125404905137, "grad_norm": 0.0, "learning_rate": 1.5126418350286005e-05, "loss": 0.8303, "step": 1131 }, { "epoch": 0.34922103964214096, "grad_norm": 0.0, "learning_rate": 1.5117833609821333e-05, "loss": 0.7803, "step": 1132 }, { "epoch": 0.3495295387937683, "grad_norm": 0.0, "learning_rate": 1.5109243756105692e-05, "loss": 0.8298, "step": 1133 }, { "epoch": 0.34983803794539564, "grad_norm": 0.0, "learning_rate": 1.510064879772125e-05, "loss": 0.8462, "step": 1134 }, { "epoch": 0.350146537097023, "grad_norm": 0.0, "learning_rate": 1.5092048743255258e-05, "loss": 0.8431, "step": 1135 }, { "epoch": 0.3504550362486503, "grad_norm": 0.0, "learning_rate": 1.5083443601300078e-05, "loss": 0.8221, "step": 1136 }, { "epoch": 0.35076353540027766, "grad_norm": 0.0, "learning_rate": 1.5074833380453146e-05, "loss": 0.8576, "step": 1137 }, { "epoch": 0.351072034551905, "grad_norm": 0.0, "learning_rate": 1.5066218089316972e-05, "loss": 0.8344, "step": 1138 }, { "epoch": 0.35138053370353234, "grad_norm": 0.0, "learning_rate": 1.505759773649913e-05, "loss": 0.8229, "step": 1139 }, { "epoch": 0.3516890328551596, "grad_norm": 0.0, "learning_rate": 1.5048972330612256e-05, "loss": 0.7867, "step": 1140 }, { "epoch": 0.35199753200678696, "grad_norm": 0.0, "learning_rate": 1.5040341880274038e-05, "loss": 0.8067, "step": 1141 }, { "epoch": 0.3523060311584143, "grad_norm": 0.0, "learning_rate": 1.5031706394107188e-05, "loss": 0.8575, "step": 1142 }, { "epoch": 0.35261453031004164, "grad_norm": 0.0, "learning_rate": 1.502306588073947e-05, "loss": 0.8686, "step": 1143 }, { "epoch": 0.352923029461669, "grad_norm": 0.0, "learning_rate": 1.5014420348803649e-05, "loss": 0.7929, "step": 1144 }, { "epoch": 0.3532315286132963, "grad_norm": 0.0, "learning_rate": 1.5005769806937523e-05, "loss": 0.6688, "step": 1145 }, { "epoch": 0.35354002776492366, "grad_norm": 0.0, "learning_rate": 1.4997114263783887e-05, "loss": 0.8364, "step": 1146 }, { "epoch": 0.353848526916551, "grad_norm": 0.0, "learning_rate": 1.4988453727990537e-05, "loss": 0.7549, "step": 1147 }, { "epoch": 0.35415702606817834, "grad_norm": 0.0, "learning_rate": 1.4979788208210249e-05, "loss": 0.8211, "step": 1148 }, { "epoch": 0.3544655252198056, "grad_norm": 0.0, "learning_rate": 1.4971117713100785e-05, "loss": 0.8714, "step": 1149 }, { "epoch": 0.35477402437143296, "grad_norm": 0.0, "learning_rate": 1.4962442251324876e-05, "loss": 0.8186, "step": 1150 }, { "epoch": 0.3550825235230603, "grad_norm": 0.0, "learning_rate": 1.4953761831550212e-05, "loss": 0.7695, "step": 1151 }, { "epoch": 0.35539102267468764, "grad_norm": 0.0, "learning_rate": 1.4945076462449448e-05, "loss": 0.8127, "step": 1152 }, { "epoch": 0.355699521826315, "grad_norm": 0.0, "learning_rate": 1.493638615270017e-05, "loss": 0.7623, "step": 1153 }, { "epoch": 0.3560080209779423, "grad_norm": 0.0, "learning_rate": 1.4927690910984911e-05, "loss": 0.7749, "step": 1154 }, { "epoch": 0.35631652012956966, "grad_norm": 0.0, "learning_rate": 1.4918990745991122e-05, "loss": 0.8062, "step": 1155 }, { "epoch": 0.356625019281197, "grad_norm": 0.0, "learning_rate": 1.491028566641118e-05, "loss": 0.8323, "step": 1156 }, { "epoch": 0.35693351843282434, "grad_norm": 0.0, "learning_rate": 1.4901575680942368e-05, "loss": 0.791, "step": 1157 }, { "epoch": 0.3572420175844516, "grad_norm": 0.0, "learning_rate": 1.4892860798286875e-05, "loss": 0.8861, "step": 1158 }, { "epoch": 0.35755051673607896, "grad_norm": 0.0, "learning_rate": 1.4884141027151778e-05, "loss": 0.8189, "step": 1159 }, { "epoch": 0.3578590158877063, "grad_norm": 0.0, "learning_rate": 1.487541637624904e-05, "loss": 1.1021, "step": 1160 }, { "epoch": 0.35816751503933364, "grad_norm": 0.0, "learning_rate": 1.4866686854295502e-05, "loss": 0.8152, "step": 1161 }, { "epoch": 0.358476014190961, "grad_norm": 0.0, "learning_rate": 1.4857952470012871e-05, "loss": 0.7575, "step": 1162 }, { "epoch": 0.3587845133425883, "grad_norm": 0.0, "learning_rate": 1.4849213232127701e-05, "loss": 0.681, "step": 1163 }, { "epoch": 0.35909301249421566, "grad_norm": 0.0, "learning_rate": 1.4840469149371414e-05, "loss": 0.7734, "step": 1164 }, { "epoch": 0.359401511645843, "grad_norm": 0.0, "learning_rate": 1.483172023048026e-05, "loss": 0.7589, "step": 1165 }, { "epoch": 0.3597100107974703, "grad_norm": 0.0, "learning_rate": 1.4822966484195323e-05, "loss": 0.8426, "step": 1166 }, { "epoch": 0.3600185099490976, "grad_norm": 0.0, "learning_rate": 1.4814207919262513e-05, "loss": 0.7434, "step": 1167 }, { "epoch": 0.36032700910072496, "grad_norm": 0.0, "learning_rate": 1.4805444544432547e-05, "loss": 0.7557, "step": 1168 }, { "epoch": 0.3606355082523523, "grad_norm": 0.0, "learning_rate": 1.4796676368460963e-05, "loss": 0.7985, "step": 1169 }, { "epoch": 0.36094400740397964, "grad_norm": 0.0, "learning_rate": 1.4787903400108074e-05, "loss": 0.7233, "step": 1170 }, { "epoch": 0.361252506555607, "grad_norm": 0.0, "learning_rate": 1.4779125648139002e-05, "loss": 0.774, "step": 1171 }, { "epoch": 0.3615610057072343, "grad_norm": 0.0, "learning_rate": 1.4770343121323633e-05, "loss": 0.783, "step": 1172 }, { "epoch": 0.36186950485886166, "grad_norm": 0.0, "learning_rate": 1.4761555828436635e-05, "loss": 0.7645, "step": 1173 }, { "epoch": 0.362178004010489, "grad_norm": 0.0, "learning_rate": 1.4752763778257427e-05, "loss": 0.8663, "step": 1174 }, { "epoch": 0.3624865031621163, "grad_norm": 0.0, "learning_rate": 1.474396697957019e-05, "loss": 0.7965, "step": 1175 }, { "epoch": 0.3627950023137436, "grad_norm": 0.0, "learning_rate": 1.4735165441163846e-05, "loss": 0.8533, "step": 1176 }, { "epoch": 0.36310350146537096, "grad_norm": 0.0, "learning_rate": 1.472635917183205e-05, "loss": 0.8312, "step": 1177 }, { "epoch": 0.3634120006169983, "grad_norm": 0.0, "learning_rate": 1.4717548180373187e-05, "loss": 0.8175, "step": 1178 }, { "epoch": 0.36372049976862564, "grad_norm": 0.0, "learning_rate": 1.4708732475590361e-05, "loss": 0.9286, "step": 1179 }, { "epoch": 0.364028998920253, "grad_norm": 0.0, "learning_rate": 1.4699912066291383e-05, "loss": 0.6973, "step": 1180 }, { "epoch": 0.3643374980718803, "grad_norm": 0.0, "learning_rate": 1.4691086961288758e-05, "loss": 0.7972, "step": 1181 }, { "epoch": 0.36464599722350766, "grad_norm": 0.0, "learning_rate": 1.4682257169399697e-05, "loss": 0.7783, "step": 1182 }, { "epoch": 0.364954496375135, "grad_norm": 0.0, "learning_rate": 1.4673422699446078e-05, "loss": 0.9437, "step": 1183 }, { "epoch": 0.3652629955267623, "grad_norm": 0.0, "learning_rate": 1.4664583560254465e-05, "loss": 0.8215, "step": 1184 }, { "epoch": 0.3655714946783896, "grad_norm": 0.0, "learning_rate": 1.4655739760656082e-05, "loss": 0.7997, "step": 1185 }, { "epoch": 0.36587999383001696, "grad_norm": 0.0, "learning_rate": 1.464689130948681e-05, "loss": 0.7802, "step": 1186 }, { "epoch": 0.3661884929816443, "grad_norm": 0.0, "learning_rate": 1.4638038215587176e-05, "loss": 0.8049, "step": 1187 }, { "epoch": 0.36649699213327164, "grad_norm": 0.0, "learning_rate": 1.4629180487802348e-05, "loss": 0.8285, "step": 1188 }, { "epoch": 0.366805491284899, "grad_norm": 0.0, "learning_rate": 1.4620318134982114e-05, "loss": 0.743, "step": 1189 }, { "epoch": 0.3671139904365263, "grad_norm": 0.0, "learning_rate": 1.4611451165980905e-05, "loss": 0.7363, "step": 1190 }, { "epoch": 0.36742248958815366, "grad_norm": 0.0, "learning_rate": 1.4602579589657742e-05, "loss": 0.8358, "step": 1191 }, { "epoch": 0.36773098873978094, "grad_norm": 0.0, "learning_rate": 1.4593703414876262e-05, "loss": 0.7217, "step": 1192 }, { "epoch": 0.3680394878914083, "grad_norm": 0.0, "learning_rate": 1.4584822650504685e-05, "loss": 0.7513, "step": 1193 }, { "epoch": 0.3683479870430356, "grad_norm": 0.0, "learning_rate": 1.4575937305415829e-05, "loss": 0.8975, "step": 1194 }, { "epoch": 0.36865648619466296, "grad_norm": 0.0, "learning_rate": 1.4567047388487077e-05, "loss": 0.8446, "step": 1195 }, { "epoch": 0.3689649853462903, "grad_norm": 0.0, "learning_rate": 1.4558152908600394e-05, "loss": 0.7981, "step": 1196 }, { "epoch": 0.36927348449791764, "grad_norm": 0.0, "learning_rate": 1.4549253874642289e-05, "loss": 0.749, "step": 1197 }, { "epoch": 0.369581983649545, "grad_norm": 0.0, "learning_rate": 1.4540350295503834e-05, "loss": 0.8389, "step": 1198 }, { "epoch": 0.3698904828011723, "grad_norm": 0.0, "learning_rate": 1.4531442180080625e-05, "loss": 0.8098, "step": 1199 }, { "epoch": 0.37019898195279966, "grad_norm": 0.0, "learning_rate": 1.4522529537272813e-05, "loss": 0.84, "step": 1200 }, { "epoch": 0.37050748110442694, "grad_norm": 0.0, "learning_rate": 1.451361237598505e-05, "loss": 0.7923, "step": 1201 }, { "epoch": 0.3708159802560543, "grad_norm": 0.0, "learning_rate": 1.4504690705126519e-05, "loss": 0.7732, "step": 1202 }, { "epoch": 0.3711244794076816, "grad_norm": 0.0, "learning_rate": 1.4495764533610902e-05, "loss": 0.8072, "step": 1203 }, { "epoch": 0.37143297855930896, "grad_norm": 0.0, "learning_rate": 1.4486833870356374e-05, "loss": 0.7858, "step": 1204 }, { "epoch": 0.3717414777109363, "grad_norm": 0.0, "learning_rate": 1.4477898724285603e-05, "loss": 0.7929, "step": 1205 }, { "epoch": 0.37204997686256364, "grad_norm": 0.0, "learning_rate": 1.4468959104325737e-05, "loss": 0.7335, "step": 1206 }, { "epoch": 0.372358476014191, "grad_norm": 0.0, "learning_rate": 1.446001501940839e-05, "loss": 0.9012, "step": 1207 }, { "epoch": 0.3726669751658183, "grad_norm": 0.0, "learning_rate": 1.4451066478469633e-05, "loss": 0.8734, "step": 1208 }, { "epoch": 0.37297547431744565, "grad_norm": 0.0, "learning_rate": 1.4442113490450002e-05, "loss": 0.8447, "step": 1209 }, { "epoch": 0.37328397346907294, "grad_norm": 0.0, "learning_rate": 1.4433156064294465e-05, "loss": 0.7566, "step": 1210 }, { "epoch": 0.3735924726207003, "grad_norm": 0.0, "learning_rate": 1.4424194208952427e-05, "loss": 0.8284, "step": 1211 }, { "epoch": 0.3739009717723276, "grad_norm": 0.0, "learning_rate": 1.4415227933377715e-05, "loss": 0.8023, "step": 1212 }, { "epoch": 0.37420947092395496, "grad_norm": 0.0, "learning_rate": 1.4406257246528584e-05, "loss": 0.8064, "step": 1213 }, { "epoch": 0.3745179700755823, "grad_norm": 0.0, "learning_rate": 1.4397282157367682e-05, "loss": 0.7575, "step": 1214 }, { "epoch": 0.37482646922720964, "grad_norm": 0.0, "learning_rate": 1.4388302674862065e-05, "loss": 0.8027, "step": 1215 }, { "epoch": 0.375134968378837, "grad_norm": 0.0, "learning_rate": 1.4379318807983172e-05, "loss": 0.8194, "step": 1216 }, { "epoch": 0.3754434675304643, "grad_norm": 0.0, "learning_rate": 1.4370330565706826e-05, "loss": 0.8225, "step": 1217 }, { "epoch": 0.3757519666820916, "grad_norm": 0.0, "learning_rate": 1.4361337957013227e-05, "loss": 0.7896, "step": 1218 }, { "epoch": 0.37606046583371894, "grad_norm": 0.0, "learning_rate": 1.4352340990886924e-05, "loss": 0.7724, "step": 1219 }, { "epoch": 0.3763689649853463, "grad_norm": 0.0, "learning_rate": 1.434333967631683e-05, "loss": 0.7446, "step": 1220 }, { "epoch": 0.3766774641369736, "grad_norm": 0.0, "learning_rate": 1.4334334022296196e-05, "loss": 0.8344, "step": 1221 }, { "epoch": 0.37698596328860096, "grad_norm": 0.0, "learning_rate": 1.432532403782262e-05, "loss": 0.7863, "step": 1222 }, { "epoch": 0.3772944624402283, "grad_norm": 0.0, "learning_rate": 1.431630973189801e-05, "loss": 0.8061, "step": 1223 }, { "epoch": 0.37760296159185563, "grad_norm": 0.0, "learning_rate": 1.430729111352861e-05, "loss": 0.7785, "step": 1224 }, { "epoch": 0.377911460743483, "grad_norm": 0.0, "learning_rate": 1.4298268191724951e-05, "loss": 0.8627, "step": 1225 }, { "epoch": 0.3782199598951103, "grad_norm": 0.0, "learning_rate": 1.4289240975501885e-05, "loss": 0.714, "step": 1226 }, { "epoch": 0.3785284590467376, "grad_norm": 0.0, "learning_rate": 1.4280209473878541e-05, "loss": 0.8796, "step": 1227 }, { "epoch": 0.37883695819836494, "grad_norm": 0.0, "learning_rate": 1.4271173695878335e-05, "loss": 0.8085, "step": 1228 }, { "epoch": 0.3791454573499923, "grad_norm": 0.0, "learning_rate": 1.4262133650528951e-05, "loss": 0.7804, "step": 1229 }, { "epoch": 0.3794539565016196, "grad_norm": 0.0, "learning_rate": 1.4253089346862346e-05, "loss": 0.8191, "step": 1230 }, { "epoch": 0.37976245565324696, "grad_norm": 0.0, "learning_rate": 1.4244040793914717e-05, "loss": 0.7658, "step": 1231 }, { "epoch": 0.3800709548048743, "grad_norm": 0.0, "learning_rate": 1.423498800072652e-05, "loss": 0.7949, "step": 1232 }, { "epoch": 0.38037945395650163, "grad_norm": 0.0, "learning_rate": 1.422593097634244e-05, "loss": 0.8312, "step": 1233 }, { "epoch": 0.380687953108129, "grad_norm": 0.0, "learning_rate": 1.4216869729811393e-05, "loss": 0.7824, "step": 1234 }, { "epoch": 0.3809964522597563, "grad_norm": 0.0, "learning_rate": 1.4207804270186514e-05, "loss": 0.8182, "step": 1235 }, { "epoch": 0.3813049514113836, "grad_norm": 0.0, "learning_rate": 1.419873460652514e-05, "loss": 0.8145, "step": 1236 }, { "epoch": 0.38161345056301094, "grad_norm": 0.0, "learning_rate": 1.4189660747888816e-05, "loss": 0.7986, "step": 1237 }, { "epoch": 0.3819219497146383, "grad_norm": 0.0, "learning_rate": 1.4180582703343276e-05, "loss": 0.7957, "step": 1238 }, { "epoch": 0.3822304488662656, "grad_norm": 0.0, "learning_rate": 1.4171500481958433e-05, "loss": 0.8269, "step": 1239 }, { "epoch": 0.38253894801789295, "grad_norm": 0.0, "learning_rate": 1.416241409280838e-05, "loss": 0.7489, "step": 1240 }, { "epoch": 0.3828474471695203, "grad_norm": 0.0, "learning_rate": 1.4153323544971371e-05, "loss": 0.7288, "step": 1241 }, { "epoch": 0.38315594632114763, "grad_norm": 0.0, "learning_rate": 1.4144228847529809e-05, "loss": 0.9038, "step": 1242 }, { "epoch": 0.383464445472775, "grad_norm": 0.0, "learning_rate": 1.4135130009570251e-05, "loss": 0.7882, "step": 1243 }, { "epoch": 0.38377294462440226, "grad_norm": 0.0, "learning_rate": 1.4126027040183382e-05, "loss": 0.7397, "step": 1244 }, { "epoch": 0.3840814437760296, "grad_norm": 0.0, "learning_rate": 1.411691994846403e-05, "loss": 0.7556, "step": 1245 }, { "epoch": 0.38438994292765694, "grad_norm": 0.0, "learning_rate": 1.4107808743511124e-05, "loss": 0.7622, "step": 1246 }, { "epoch": 0.3846984420792843, "grad_norm": 0.0, "learning_rate": 1.4098693434427715e-05, "loss": 0.9192, "step": 1247 }, { "epoch": 0.3850069412309116, "grad_norm": 0.0, "learning_rate": 1.4089574030320947e-05, "loss": 0.7547, "step": 1248 }, { "epoch": 0.38531544038253895, "grad_norm": 0.0, "learning_rate": 1.4080450540302061e-05, "loss": 0.8098, "step": 1249 }, { "epoch": 0.3856239395341663, "grad_norm": 0.0, "learning_rate": 1.4071322973486376e-05, "loss": 0.7672, "step": 1250 }, { "epoch": 0.38593243868579363, "grad_norm": 0.0, "learning_rate": 1.4062191338993288e-05, "loss": 0.7344, "step": 1251 }, { "epoch": 0.386240937837421, "grad_norm": 0.0, "learning_rate": 1.405305564594625e-05, "loss": 0.7607, "step": 1252 }, { "epoch": 0.38654943698904826, "grad_norm": 0.0, "learning_rate": 1.4043915903472777e-05, "loss": 0.7472, "step": 1253 }, { "epoch": 0.3868579361406756, "grad_norm": 0.0, "learning_rate": 1.4034772120704431e-05, "loss": 0.9083, "step": 1254 }, { "epoch": 0.38716643529230294, "grad_norm": 0.0, "learning_rate": 1.4025624306776806e-05, "loss": 0.8289, "step": 1255 }, { "epoch": 0.3874749344439303, "grad_norm": 0.0, "learning_rate": 1.401647247082952e-05, "loss": 0.7318, "step": 1256 }, { "epoch": 0.3877834335955576, "grad_norm": 0.0, "learning_rate": 1.400731662200622e-05, "loss": 0.8786, "step": 1257 }, { "epoch": 0.38809193274718495, "grad_norm": 0.0, "learning_rate": 1.3998156769454549e-05, "loss": 0.8683, "step": 1258 }, { "epoch": 0.3884004318988123, "grad_norm": 0.0, "learning_rate": 1.3988992922326166e-05, "loss": 1.088, "step": 1259 }, { "epoch": 0.38870893105043963, "grad_norm": 0.0, "learning_rate": 1.3979825089776708e-05, "loss": 0.8816, "step": 1260 }, { "epoch": 0.38901743020206697, "grad_norm": 0.0, "learning_rate": 1.39706532809658e-05, "loss": 0.8478, "step": 1261 }, { "epoch": 0.38932592935369426, "grad_norm": 0.0, "learning_rate": 1.396147750505704e-05, "loss": 0.7809, "step": 1262 }, { "epoch": 0.3896344285053216, "grad_norm": 0.0, "learning_rate": 1.395229777121798e-05, "loss": 0.7805, "step": 1263 }, { "epoch": 0.38994292765694893, "grad_norm": 0.0, "learning_rate": 1.394311408862014e-05, "loss": 0.8007, "step": 1264 }, { "epoch": 0.3902514268085763, "grad_norm": 0.0, "learning_rate": 1.3933926466438979e-05, "loss": 0.7991, "step": 1265 }, { "epoch": 0.3905599259602036, "grad_norm": 0.0, "learning_rate": 1.3924734913853893e-05, "loss": 0.7925, "step": 1266 }, { "epoch": 0.39086842511183095, "grad_norm": 0.0, "learning_rate": 1.3915539440048203e-05, "loss": 0.8606, "step": 1267 }, { "epoch": 0.3911769242634583, "grad_norm": 0.0, "learning_rate": 1.3906340054209149e-05, "loss": 0.693, "step": 1268 }, { "epoch": 0.39148542341508563, "grad_norm": 0.0, "learning_rate": 1.3897136765527879e-05, "loss": 0.8273, "step": 1269 }, { "epoch": 0.3917939225667129, "grad_norm": 0.0, "learning_rate": 1.388792958319944e-05, "loss": 0.7817, "step": 1270 }, { "epoch": 0.39210242171834025, "grad_norm": 0.0, "learning_rate": 1.3878718516422773e-05, "loss": 0.7842, "step": 1271 }, { "epoch": 0.3924109208699676, "grad_norm": 0.0, "learning_rate": 1.3869503574400694e-05, "loss": 0.7567, "step": 1272 }, { "epoch": 0.39271942002159493, "grad_norm": 0.0, "learning_rate": 1.3860284766339896e-05, "loss": 0.7805, "step": 1273 }, { "epoch": 0.3930279191732223, "grad_norm": 0.0, "learning_rate": 1.385106210145093e-05, "loss": 0.7966, "step": 1274 }, { "epoch": 0.3933364183248496, "grad_norm": 0.0, "learning_rate": 1.3841835588948208e-05, "loss": 0.8702, "step": 1275 }, { "epoch": 0.39364491747647695, "grad_norm": 0.0, "learning_rate": 1.3832605238049972e-05, "loss": 0.8001, "step": 1276 }, { "epoch": 0.3939534166281043, "grad_norm": 0.0, "learning_rate": 1.3823371057978312e-05, "loss": 0.8095, "step": 1277 }, { "epoch": 0.39426191577973163, "grad_norm": 0.0, "learning_rate": 1.3814133057959143e-05, "loss": 0.8103, "step": 1278 }, { "epoch": 0.3945704149313589, "grad_norm": 0.0, "learning_rate": 1.380489124722219e-05, "loss": 0.7678, "step": 1279 }, { "epoch": 0.39487891408298625, "grad_norm": 0.0, "learning_rate": 1.3795645635000987e-05, "loss": 0.8294, "step": 1280 }, { "epoch": 0.3951874132346136, "grad_norm": 0.0, "learning_rate": 1.3786396230532871e-05, "loss": 0.7952, "step": 1281 }, { "epoch": 0.39549591238624093, "grad_norm": 0.0, "learning_rate": 1.377714304305896e-05, "loss": 0.7974, "step": 1282 }, { "epoch": 0.3958044115378683, "grad_norm": 0.0, "learning_rate": 1.3767886081824158e-05, "loss": 0.7424, "step": 1283 }, { "epoch": 0.3961129106894956, "grad_norm": 0.0, "learning_rate": 1.375862535607714e-05, "loss": 0.7725, "step": 1284 }, { "epoch": 0.39642140984112295, "grad_norm": 0.0, "learning_rate": 1.3749360875070337e-05, "loss": 0.7731, "step": 1285 }, { "epoch": 0.3967299089927503, "grad_norm": 0.0, "learning_rate": 1.3740092648059933e-05, "loss": 0.7925, "step": 1286 }, { "epoch": 0.39703840814437763, "grad_norm": 0.0, "learning_rate": 1.3730820684305857e-05, "loss": 0.7795, "step": 1287 }, { "epoch": 0.3973469072960049, "grad_norm": 0.0, "learning_rate": 1.3721544993071774e-05, "loss": 0.7334, "step": 1288 }, { "epoch": 0.39765540644763225, "grad_norm": 0.0, "learning_rate": 1.3712265583625059e-05, "loss": 0.7917, "step": 1289 }, { "epoch": 0.3979639055992596, "grad_norm": 0.0, "learning_rate": 1.3702982465236827e-05, "loss": 0.7484, "step": 1290 }, { "epoch": 0.39827240475088693, "grad_norm": 0.0, "learning_rate": 1.3693695647181873e-05, "loss": 0.8103, "step": 1291 }, { "epoch": 0.39858090390251427, "grad_norm": 0.0, "learning_rate": 1.3684405138738705e-05, "loss": 0.759, "step": 1292 }, { "epoch": 0.3988894030541416, "grad_norm": 0.0, "learning_rate": 1.367511094918951e-05, "loss": 0.8789, "step": 1293 }, { "epoch": 0.39919790220576895, "grad_norm": 0.0, "learning_rate": 1.3665813087820157e-05, "loss": 0.8449, "step": 1294 }, { "epoch": 0.3995064013573963, "grad_norm": 0.0, "learning_rate": 1.3656511563920178e-05, "loss": 0.7016, "step": 1295 }, { "epoch": 0.3998149005090236, "grad_norm": 0.0, "learning_rate": 1.3647206386782774e-05, "loss": 0.7889, "step": 1296 }, { "epoch": 0.4001233996606509, "grad_norm": 0.0, "learning_rate": 1.3637897565704785e-05, "loss": 0.7689, "step": 1297 }, { "epoch": 0.40043189881227825, "grad_norm": 0.0, "learning_rate": 1.3628585109986699e-05, "loss": 0.7998, "step": 1298 }, { "epoch": 0.4007403979639056, "grad_norm": 0.0, "learning_rate": 1.3619269028932633e-05, "loss": 0.763, "step": 1299 }, { "epoch": 0.40104889711553293, "grad_norm": 0.0, "learning_rate": 1.3609949331850323e-05, "loss": 0.8125, "step": 1300 }, { "epoch": 0.40135739626716027, "grad_norm": 0.0, "learning_rate": 1.3600626028051127e-05, "loss": 0.7986, "step": 1301 }, { "epoch": 0.4016658954187876, "grad_norm": 0.0, "learning_rate": 1.3591299126849994e-05, "loss": 1.0926, "step": 1302 }, { "epoch": 0.40197439457041495, "grad_norm": 0.0, "learning_rate": 1.3581968637565481e-05, "loss": 0.8111, "step": 1303 }, { "epoch": 0.4022828937220423, "grad_norm": 0.0, "learning_rate": 1.3572634569519718e-05, "loss": 0.8507, "step": 1304 }, { "epoch": 0.4025913928736696, "grad_norm": 0.0, "learning_rate": 1.3563296932038416e-05, "loss": 0.7634, "step": 1305 }, { "epoch": 0.4028998920252969, "grad_norm": 0.0, "learning_rate": 1.355395573445085e-05, "loss": 0.8496, "step": 1306 }, { "epoch": 0.40320839117692425, "grad_norm": 0.0, "learning_rate": 1.3544610986089853e-05, "loss": 0.808, "step": 1307 }, { "epoch": 0.4035168903285516, "grad_norm": 0.0, "learning_rate": 1.3535262696291805e-05, "loss": 0.7791, "step": 1308 }, { "epoch": 0.40382538948017893, "grad_norm": 0.0, "learning_rate": 1.352591087439663e-05, "loss": 0.8124, "step": 1309 }, { "epoch": 0.40413388863180627, "grad_norm": 0.0, "learning_rate": 1.3516555529747772e-05, "loss": 0.7973, "step": 1310 }, { "epoch": 0.4044423877834336, "grad_norm": 0.0, "learning_rate": 1.3507196671692202e-05, "loss": 0.8126, "step": 1311 }, { "epoch": 0.40475088693506095, "grad_norm": 0.0, "learning_rate": 1.3497834309580396e-05, "loss": 0.78, "step": 1312 }, { "epoch": 0.4050593860866883, "grad_norm": 0.0, "learning_rate": 1.3488468452766328e-05, "loss": 0.9037, "step": 1313 }, { "epoch": 0.4053678852383156, "grad_norm": 0.0, "learning_rate": 1.3479099110607475e-05, "loss": 0.7797, "step": 1314 }, { "epoch": 0.4056763843899429, "grad_norm": 0.0, "learning_rate": 1.3469726292464788e-05, "loss": 0.7638, "step": 1315 }, { "epoch": 0.40598488354157025, "grad_norm": 0.0, "learning_rate": 1.3460350007702691e-05, "loss": 0.7942, "step": 1316 }, { "epoch": 0.4062933826931976, "grad_norm": 0.0, "learning_rate": 1.3450970265689074e-05, "loss": 0.8901, "step": 1317 }, { "epoch": 0.40660188184482493, "grad_norm": 0.0, "learning_rate": 1.3441587075795281e-05, "loss": 0.7406, "step": 1318 }, { "epoch": 0.40691038099645227, "grad_norm": 0.0, "learning_rate": 1.34322004473961e-05, "loss": 0.7988, "step": 1319 }, { "epoch": 0.4072188801480796, "grad_norm": 0.0, "learning_rate": 1.3422810389869749e-05, "loss": 0.7412, "step": 1320 }, { "epoch": 0.40752737929970695, "grad_norm": 0.0, "learning_rate": 1.3413416912597885e-05, "loss": 0.8367, "step": 1321 }, { "epoch": 0.40783587845133423, "grad_norm": 0.0, "learning_rate": 1.340402002496557e-05, "loss": 0.7749, "step": 1322 }, { "epoch": 0.40814437760296157, "grad_norm": 0.0, "learning_rate": 1.3394619736361278e-05, "loss": 0.7756, "step": 1323 }, { "epoch": 0.4084528767545889, "grad_norm": 0.0, "learning_rate": 1.3385216056176882e-05, "loss": 0.7485, "step": 1324 }, { "epoch": 0.40876137590621625, "grad_norm": 0.0, "learning_rate": 1.337580899380764e-05, "loss": 0.732, "step": 1325 }, { "epoch": 0.4090698750578436, "grad_norm": 0.0, "learning_rate": 1.3366398558652192e-05, "loss": 0.77, "step": 1326 }, { "epoch": 0.40937837420947093, "grad_norm": 0.0, "learning_rate": 1.3356984760112543e-05, "loss": 0.7426, "step": 1327 }, { "epoch": 0.40968687336109827, "grad_norm": 0.0, "learning_rate": 1.3347567607594069e-05, "loss": 0.7481, "step": 1328 }, { "epoch": 0.4099953725127256, "grad_norm": 0.0, "learning_rate": 1.3338147110505486e-05, "loss": 0.8337, "step": 1329 }, { "epoch": 0.41030387166435295, "grad_norm": 0.0, "learning_rate": 1.3328723278258857e-05, "loss": 0.8113, "step": 1330 }, { "epoch": 0.41061237081598023, "grad_norm": 0.0, "learning_rate": 1.3319296120269573e-05, "loss": 0.8169, "step": 1331 }, { "epoch": 0.41092086996760757, "grad_norm": 0.0, "learning_rate": 1.3309865645956355e-05, "loss": 0.8443, "step": 1332 }, { "epoch": 0.4112293691192349, "grad_norm": 0.0, "learning_rate": 1.3300431864741229e-05, "loss": 0.7986, "step": 1333 }, { "epoch": 0.41153786827086225, "grad_norm": 0.0, "learning_rate": 1.3290994786049532e-05, "loss": 0.6888, "step": 1334 }, { "epoch": 0.4118463674224896, "grad_norm": 0.0, "learning_rate": 1.3281554419309892e-05, "loss": 0.7991, "step": 1335 }, { "epoch": 0.41215486657411693, "grad_norm": 0.0, "learning_rate": 1.3272110773954221e-05, "loss": 0.7479, "step": 1336 }, { "epoch": 0.41246336572574427, "grad_norm": 0.0, "learning_rate": 1.326266385941771e-05, "loss": 0.6592, "step": 1337 }, { "epoch": 0.4127718648773716, "grad_norm": 0.0, "learning_rate": 1.3253213685138814e-05, "loss": 0.7235, "step": 1338 }, { "epoch": 0.41308036402899895, "grad_norm": 0.0, "learning_rate": 1.3243760260559245e-05, "loss": 0.8486, "step": 1339 }, { "epoch": 0.41338886318062623, "grad_norm": 0.0, "learning_rate": 1.323430359512396e-05, "loss": 0.7809, "step": 1340 }, { "epoch": 0.41369736233225357, "grad_norm": 0.0, "learning_rate": 1.3224843698281165e-05, "loss": 0.7706, "step": 1341 }, { "epoch": 0.4140058614838809, "grad_norm": 0.0, "learning_rate": 1.3215380579482277e-05, "loss": 0.7855, "step": 1342 }, { "epoch": 0.41431436063550825, "grad_norm": 0.0, "learning_rate": 1.3205914248181948e-05, "loss": 0.8558, "step": 1343 }, { "epoch": 0.4146228597871356, "grad_norm": 0.0, "learning_rate": 1.3196444713838028e-05, "loss": 0.7852, "step": 1344 }, { "epoch": 0.41493135893876293, "grad_norm": 0.0, "learning_rate": 1.3186971985911575e-05, "loss": 0.7275, "step": 1345 }, { "epoch": 0.41523985809039027, "grad_norm": 0.0, "learning_rate": 1.317749607386683e-05, "loss": 0.8248, "step": 1346 }, { "epoch": 0.4155483572420176, "grad_norm": 0.0, "learning_rate": 1.316801698717123e-05, "loss": 0.8147, "step": 1347 }, { "epoch": 0.4158568563936449, "grad_norm": 0.0, "learning_rate": 1.3158534735295365e-05, "loss": 0.8669, "step": 1348 }, { "epoch": 0.41616535554527223, "grad_norm": 0.0, "learning_rate": 1.3149049327712996e-05, "loss": 0.7874, "step": 1349 }, { "epoch": 0.41647385469689957, "grad_norm": 0.0, "learning_rate": 1.3139560773901045e-05, "loss": 0.7368, "step": 1350 }, { "epoch": 0.4167823538485269, "grad_norm": 0.0, "learning_rate": 1.3130069083339563e-05, "loss": 0.7996, "step": 1351 }, { "epoch": 0.41709085300015425, "grad_norm": 0.0, "learning_rate": 1.3120574265511744e-05, "loss": 0.7961, "step": 1352 }, { "epoch": 0.4173993521517816, "grad_norm": 0.0, "learning_rate": 1.3111076329903898e-05, "loss": 0.7844, "step": 1353 }, { "epoch": 0.41770785130340893, "grad_norm": 0.0, "learning_rate": 1.3101575286005467e-05, "loss": 0.7793, "step": 1354 }, { "epoch": 0.41801635045503627, "grad_norm": 0.0, "learning_rate": 1.309207114330898e-05, "loss": 0.7862, "step": 1355 }, { "epoch": 0.4183248496066636, "grad_norm": 0.0, "learning_rate": 1.3082563911310069e-05, "loss": 0.8153, "step": 1356 }, { "epoch": 0.4186333487582909, "grad_norm": 0.0, "learning_rate": 1.3073053599507455e-05, "loss": 0.752, "step": 1357 }, { "epoch": 0.41894184790991823, "grad_norm": 0.0, "learning_rate": 1.3063540217402934e-05, "loss": 0.8408, "step": 1358 }, { "epoch": 0.41925034706154557, "grad_norm": 0.0, "learning_rate": 1.305402377450137e-05, "loss": 0.8508, "step": 1359 }, { "epoch": 0.4195588462131729, "grad_norm": 0.0, "learning_rate": 1.3044504280310683e-05, "loss": 0.6705, "step": 1360 }, { "epoch": 0.41986734536480025, "grad_norm": 0.0, "learning_rate": 1.3034981744341843e-05, "loss": 0.8153, "step": 1361 }, { "epoch": 0.4201758445164276, "grad_norm": 0.0, "learning_rate": 1.3025456176108864e-05, "loss": 0.8187, "step": 1362 }, { "epoch": 0.4204843436680549, "grad_norm": 0.0, "learning_rate": 1.301592758512878e-05, "loss": 0.8127, "step": 1363 }, { "epoch": 0.42079284281968227, "grad_norm": 0.0, "learning_rate": 1.3006395980921649e-05, "loss": 0.6688, "step": 1364 }, { "epoch": 0.4211013419713096, "grad_norm": 0.0, "learning_rate": 1.2996861373010543e-05, "loss": 0.8386, "step": 1365 }, { "epoch": 0.4214098411229369, "grad_norm": 0.0, "learning_rate": 1.2987323770921531e-05, "loss": 0.8533, "step": 1366 }, { "epoch": 0.42171834027456423, "grad_norm": 0.0, "learning_rate": 1.2977783184183679e-05, "loss": 0.8437, "step": 1367 }, { "epoch": 0.42202683942619157, "grad_norm": 0.0, "learning_rate": 1.2968239622329027e-05, "loss": 0.8493, "step": 1368 }, { "epoch": 0.4223353385778189, "grad_norm": 0.0, "learning_rate": 1.2958693094892589e-05, "loss": 0.7388, "step": 1369 }, { "epoch": 0.42264383772944625, "grad_norm": 0.0, "learning_rate": 1.294914361141235e-05, "loss": 0.7706, "step": 1370 }, { "epoch": 0.4229523368810736, "grad_norm": 0.0, "learning_rate": 1.2939591181429235e-05, "loss": 0.7611, "step": 1371 }, { "epoch": 0.4232608360327009, "grad_norm": 0.0, "learning_rate": 1.2930035814487128e-05, "loss": 0.784, "step": 1372 }, { "epoch": 0.42356933518432827, "grad_norm": 0.0, "learning_rate": 1.292047752013284e-05, "loss": 0.7931, "step": 1373 }, { "epoch": 0.42387783433595555, "grad_norm": 0.0, "learning_rate": 1.29109163079161e-05, "loss": 0.7797, "step": 1374 }, { "epoch": 0.4241863334875829, "grad_norm": 0.0, "learning_rate": 1.2901352187389562e-05, "loss": 0.7803, "step": 1375 }, { "epoch": 0.42449483263921023, "grad_norm": 0.0, "learning_rate": 1.2891785168108784e-05, "loss": 0.7076, "step": 1376 }, { "epoch": 0.42480333179083757, "grad_norm": 0.0, "learning_rate": 1.2882215259632217e-05, "loss": 0.7885, "step": 1377 }, { "epoch": 0.4251118309424649, "grad_norm": 0.0, "learning_rate": 1.28726424715212e-05, "loss": 0.7596, "step": 1378 }, { "epoch": 0.42542033009409225, "grad_norm": 0.0, "learning_rate": 1.2863066813339953e-05, "loss": 0.773, "step": 1379 }, { "epoch": 0.4257288292457196, "grad_norm": 0.0, "learning_rate": 1.2853488294655556e-05, "loss": 0.8012, "step": 1380 }, { "epoch": 0.4260373283973469, "grad_norm": 0.0, "learning_rate": 1.2843906925037955e-05, "loss": 0.766, "step": 1381 }, { "epoch": 0.42634582754897427, "grad_norm": 0.0, "learning_rate": 1.2834322714059932e-05, "loss": 0.7433, "step": 1382 }, { "epoch": 0.42665432670060155, "grad_norm": 0.0, "learning_rate": 1.2824735671297121e-05, "loss": 0.7938, "step": 1383 }, { "epoch": 0.4269628258522289, "grad_norm": 0.0, "learning_rate": 1.2815145806327986e-05, "loss": 0.7867, "step": 1384 }, { "epoch": 0.42727132500385623, "grad_norm": 0.0, "learning_rate": 1.2805553128733797e-05, "loss": 0.697, "step": 1385 }, { "epoch": 0.42757982415548357, "grad_norm": 0.0, "learning_rate": 1.2795957648098645e-05, "loss": 0.7269, "step": 1386 }, { "epoch": 0.4278883233071109, "grad_norm": 0.0, "learning_rate": 1.2786359374009418e-05, "loss": 0.7957, "step": 1387 }, { "epoch": 0.42819682245873825, "grad_norm": 0.0, "learning_rate": 1.2776758316055797e-05, "loss": 0.7505, "step": 1388 }, { "epoch": 0.4285053216103656, "grad_norm": 0.0, "learning_rate": 1.276715448383024e-05, "loss": 0.7421, "step": 1389 }, { "epoch": 0.4288138207619929, "grad_norm": 0.0, "learning_rate": 1.2757547886927982e-05, "loss": 0.766, "step": 1390 }, { "epoch": 0.42912231991362026, "grad_norm": 0.0, "learning_rate": 1.2747938534947015e-05, "loss": 0.7551, "step": 1391 }, { "epoch": 0.42943081906524755, "grad_norm": 0.0, "learning_rate": 1.2738326437488093e-05, "loss": 0.7754, "step": 1392 }, { "epoch": 0.4297393182168749, "grad_norm": 0.0, "learning_rate": 1.2728711604154702e-05, "loss": 0.7794, "step": 1393 }, { "epoch": 0.4300478173685022, "grad_norm": 0.0, "learning_rate": 1.2719094044553066e-05, "loss": 0.7675, "step": 1394 }, { "epoch": 0.43035631652012957, "grad_norm": 0.0, "learning_rate": 1.270947376829213e-05, "loss": 0.7542, "step": 1395 }, { "epoch": 0.4306648156717569, "grad_norm": 0.0, "learning_rate": 1.269985078498356e-05, "loss": 0.7695, "step": 1396 }, { "epoch": 0.43097331482338425, "grad_norm": 0.0, "learning_rate": 1.2690225104241722e-05, "loss": 0.7702, "step": 1397 }, { "epoch": 0.4312818139750116, "grad_norm": 0.0, "learning_rate": 1.2680596735683679e-05, "loss": 0.7786, "step": 1398 }, { "epoch": 0.4315903131266389, "grad_norm": 0.0, "learning_rate": 1.2670965688929176e-05, "loss": 0.7759, "step": 1399 }, { "epoch": 0.4318988122782662, "grad_norm": 0.0, "learning_rate": 1.2661331973600635e-05, "loss": 0.7005, "step": 1400 }, { "epoch": 0.43220731142989355, "grad_norm": 0.0, "learning_rate": 1.2651695599323147e-05, "loss": 0.7682, "step": 1401 }, { "epoch": 0.4325158105815209, "grad_norm": 0.0, "learning_rate": 1.2642056575724457e-05, "loss": 0.7238, "step": 1402 }, { "epoch": 0.4328243097331482, "grad_norm": 0.0, "learning_rate": 1.2632414912434961e-05, "loss": 0.7869, "step": 1403 }, { "epoch": 0.43313280888477557, "grad_norm": 0.0, "learning_rate": 1.262277061908768e-05, "loss": 0.852, "step": 1404 }, { "epoch": 0.4334413080364029, "grad_norm": 0.0, "learning_rate": 1.2613123705318281e-05, "loss": 0.7744, "step": 1405 }, { "epoch": 0.43374980718803025, "grad_norm": 0.0, "learning_rate": 1.2603474180765035e-05, "loss": 0.7881, "step": 1406 }, { "epoch": 0.4340583063396576, "grad_norm": 0.0, "learning_rate": 1.2593822055068825e-05, "loss": 0.7329, "step": 1407 }, { "epoch": 0.4343668054912849, "grad_norm": 0.0, "learning_rate": 1.2584167337873137e-05, "loss": 0.8911, "step": 1408 }, { "epoch": 0.4346753046429122, "grad_norm": 0.0, "learning_rate": 1.2574510038824042e-05, "loss": 0.742, "step": 1409 }, { "epoch": 0.43498380379453955, "grad_norm": 0.0, "learning_rate": 1.256485016757019e-05, "loss": 0.791, "step": 1410 }, { "epoch": 0.4352923029461669, "grad_norm": 0.0, "learning_rate": 1.2555187733762808e-05, "loss": 0.7505, "step": 1411 }, { "epoch": 0.4356008020977942, "grad_norm": 0.0, "learning_rate": 1.2545522747055669e-05, "loss": 0.7848, "step": 1412 }, { "epoch": 0.43590930124942157, "grad_norm": 0.0, "learning_rate": 1.2535855217105114e-05, "loss": 0.6734, "step": 1413 }, { "epoch": 0.4362178004010489, "grad_norm": 0.0, "learning_rate": 1.252618515357001e-05, "loss": 0.7401, "step": 1414 }, { "epoch": 0.43652629955267624, "grad_norm": 0.0, "learning_rate": 1.2516512566111766e-05, "loss": 0.7617, "step": 1415 }, { "epoch": 0.4368347987043036, "grad_norm": 0.0, "learning_rate": 1.2506837464394307e-05, "loss": 0.7688, "step": 1416 }, { "epoch": 0.4371432978559309, "grad_norm": 0.0, "learning_rate": 1.2497159858084072e-05, "loss": 0.7622, "step": 1417 }, { "epoch": 0.4374517970075582, "grad_norm": 0.0, "learning_rate": 1.2487479756850001e-05, "loss": 0.6837, "step": 1418 }, { "epoch": 0.43776029615918555, "grad_norm": 0.0, "learning_rate": 1.2477797170363524e-05, "loss": 0.8013, "step": 1419 }, { "epoch": 0.4380687953108129, "grad_norm": 0.0, "learning_rate": 1.2468112108298563e-05, "loss": 0.8371, "step": 1420 }, { "epoch": 0.4383772944624402, "grad_norm": 0.0, "learning_rate": 1.2458424580331505e-05, "loss": 0.6998, "step": 1421 }, { "epoch": 0.43868579361406757, "grad_norm": 0.0, "learning_rate": 1.2448734596141205e-05, "loss": 0.7428, "step": 1422 }, { "epoch": 0.4389942927656949, "grad_norm": 0.0, "learning_rate": 1.243904216540897e-05, "loss": 0.749, "step": 1423 }, { "epoch": 0.43930279191732224, "grad_norm": 0.0, "learning_rate": 1.2429347297818551e-05, "loss": 0.8241, "step": 1424 }, { "epoch": 0.4396112910689496, "grad_norm": 0.0, "learning_rate": 1.2419650003056134e-05, "loss": 1.087, "step": 1425 }, { "epoch": 0.43991979022057687, "grad_norm": 0.0, "learning_rate": 1.2409950290810332e-05, "loss": 0.786, "step": 1426 }, { "epoch": 0.4402282893722042, "grad_norm": 0.0, "learning_rate": 1.240024817077217e-05, "loss": 0.8391, "step": 1427 }, { "epoch": 0.44053678852383155, "grad_norm": 0.0, "learning_rate": 1.2390543652635088e-05, "loss": 0.8163, "step": 1428 }, { "epoch": 0.4408452876754589, "grad_norm": 0.0, "learning_rate": 1.2380836746094903e-05, "loss": 0.7677, "step": 1429 }, { "epoch": 0.4411537868270862, "grad_norm": 0.0, "learning_rate": 1.2371127460849837e-05, "loss": 0.7921, "step": 1430 }, { "epoch": 0.44146228597871356, "grad_norm": 0.0, "learning_rate": 1.236141580660048e-05, "loss": 0.8214, "step": 1431 }, { "epoch": 0.4417707851303409, "grad_norm": 0.0, "learning_rate": 1.2351701793049786e-05, "loss": 0.7893, "step": 1432 }, { "epoch": 0.44207928428196824, "grad_norm": 0.0, "learning_rate": 1.2341985429903075e-05, "loss": 0.7722, "step": 1433 }, { "epoch": 0.4423877834335956, "grad_norm": 0.0, "learning_rate": 1.2332266726868013e-05, "loss": 0.7685, "step": 1434 }, { "epoch": 0.44269628258522287, "grad_norm": 0.0, "learning_rate": 1.2322545693654595e-05, "loss": 0.8296, "step": 1435 }, { "epoch": 0.4430047817368502, "grad_norm": 0.0, "learning_rate": 1.2312822339975147e-05, "loss": 0.7838, "step": 1436 }, { "epoch": 0.44331328088847755, "grad_norm": 0.0, "learning_rate": 1.2303096675544325e-05, "loss": 0.8509, "step": 1437 }, { "epoch": 0.4436217800401049, "grad_norm": 0.0, "learning_rate": 1.229336871007908e-05, "loss": 0.7628, "step": 1438 }, { "epoch": 0.4439302791917322, "grad_norm": 0.0, "learning_rate": 1.2283638453298665e-05, "loss": 0.7456, "step": 1439 }, { "epoch": 0.44423877834335956, "grad_norm": 0.0, "learning_rate": 1.2273905914924627e-05, "loss": 0.8279, "step": 1440 }, { "epoch": 0.4445472774949869, "grad_norm": 0.0, "learning_rate": 1.2264171104680791e-05, "loss": 0.742, "step": 1441 }, { "epoch": 0.44485577664661424, "grad_norm": 0.0, "learning_rate": 1.2254434032293245e-05, "loss": 0.7001, "step": 1442 }, { "epoch": 0.4451642757982416, "grad_norm": 0.0, "learning_rate": 1.2244694707490348e-05, "loss": 0.7747, "step": 1443 }, { "epoch": 0.44547277494986887, "grad_norm": 0.0, "learning_rate": 1.2234953140002698e-05, "loss": 0.8176, "step": 1444 }, { "epoch": 0.4457812741014962, "grad_norm": 0.0, "learning_rate": 1.2225209339563144e-05, "loss": 0.8175, "step": 1445 }, { "epoch": 0.44608977325312354, "grad_norm": 0.0, "learning_rate": 1.2215463315906764e-05, "loss": 0.8301, "step": 1446 }, { "epoch": 0.4463982724047509, "grad_norm": 0.0, "learning_rate": 1.220571507877085e-05, "loss": 0.7825, "step": 1447 }, { "epoch": 0.4467067715563782, "grad_norm": 0.0, "learning_rate": 1.2195964637894913e-05, "loss": 0.8353, "step": 1448 }, { "epoch": 0.44701527070800556, "grad_norm": 0.0, "learning_rate": 1.2186212003020658e-05, "loss": 0.8001, "step": 1449 }, { "epoch": 0.4473237698596329, "grad_norm": 0.0, "learning_rate": 1.2176457183891996e-05, "loss": 0.7869, "step": 1450 }, { "epoch": 0.44763226901126024, "grad_norm": 0.0, "learning_rate": 1.2166700190255006e-05, "loss": 0.768, "step": 1451 }, { "epoch": 0.4479407681628875, "grad_norm": 0.0, "learning_rate": 1.2156941031857943e-05, "loss": 0.774, "step": 1452 }, { "epoch": 0.44824926731451487, "grad_norm": 0.0, "learning_rate": 1.214717971845123e-05, "loss": 0.7134, "step": 1453 }, { "epoch": 0.4485577664661422, "grad_norm": 0.0, "learning_rate": 1.2137416259787441e-05, "loss": 0.8372, "step": 1454 }, { "epoch": 0.44886626561776954, "grad_norm": 0.0, "learning_rate": 1.212765066562129e-05, "loss": 0.7524, "step": 1455 }, { "epoch": 0.4491747647693969, "grad_norm": 0.0, "learning_rate": 1.2117882945709626e-05, "loss": 0.6967, "step": 1456 }, { "epoch": 0.4494832639210242, "grad_norm": 0.0, "learning_rate": 1.2108113109811426e-05, "loss": 0.7692, "step": 1457 }, { "epoch": 0.44979176307265156, "grad_norm": 0.0, "learning_rate": 1.2098341167687773e-05, "loss": 1.0787, "step": 1458 }, { "epoch": 0.4501002622242789, "grad_norm": 0.0, "learning_rate": 1.2088567129101861e-05, "loss": 0.7832, "step": 1459 }, { "epoch": 0.45040876137590624, "grad_norm": 0.0, "learning_rate": 1.2078791003818981e-05, "loss": 0.7352, "step": 1460 }, { "epoch": 0.4507172605275335, "grad_norm": 0.0, "learning_rate": 1.20690128016065e-05, "loss": 0.7652, "step": 1461 }, { "epoch": 0.45102575967916086, "grad_norm": 0.0, "learning_rate": 1.2059232532233864e-05, "loss": 0.7968, "step": 1462 }, { "epoch": 0.4513342588307882, "grad_norm": 0.0, "learning_rate": 1.2049450205472585e-05, "loss": 0.7473, "step": 1463 }, { "epoch": 0.45164275798241554, "grad_norm": 0.0, "learning_rate": 1.203966583109623e-05, "loss": 0.7823, "step": 1464 }, { "epoch": 0.4519512571340429, "grad_norm": 0.0, "learning_rate": 1.2029879418880419e-05, "loss": 0.7657, "step": 1465 }, { "epoch": 0.4522597562856702, "grad_norm": 0.0, "learning_rate": 1.2020090978602795e-05, "loss": 0.7575, "step": 1466 }, { "epoch": 0.45256825543729756, "grad_norm": 0.0, "learning_rate": 1.2010300520043035e-05, "loss": 0.8245, "step": 1467 }, { "epoch": 0.4528767545889249, "grad_norm": 0.0, "learning_rate": 1.2000508052982827e-05, "loss": 0.7677, "step": 1468 }, { "epoch": 0.45318525374055224, "grad_norm": 0.0, "learning_rate": 1.1990713587205876e-05, "loss": 0.8229, "step": 1469 }, { "epoch": 0.4534937528921795, "grad_norm": 0.0, "learning_rate": 1.1980917132497871e-05, "loss": 0.7153, "step": 1470 }, { "epoch": 0.45380225204380686, "grad_norm": 0.0, "learning_rate": 1.1971118698646502e-05, "loss": 0.7822, "step": 1471 }, { "epoch": 0.4541107511954342, "grad_norm": 0.0, "learning_rate": 1.1961318295441424e-05, "loss": 0.7923, "step": 1472 }, { "epoch": 0.45441925034706154, "grad_norm": 0.0, "learning_rate": 1.1951515932674267e-05, "loss": 0.8233, "step": 1473 }, { "epoch": 0.4547277494986889, "grad_norm": 0.0, "learning_rate": 1.1941711620138614e-05, "loss": 0.7519, "step": 1474 }, { "epoch": 0.4550362486503162, "grad_norm": 0.0, "learning_rate": 1.1931905367630004e-05, "loss": 0.6626, "step": 1475 }, { "epoch": 0.45534474780194356, "grad_norm": 0.0, "learning_rate": 1.1922097184945897e-05, "loss": 0.8072, "step": 1476 }, { "epoch": 0.4556532469535709, "grad_norm": 0.0, "learning_rate": 1.1912287081885708e-05, "loss": 0.6687, "step": 1477 }, { "epoch": 0.4559617461051982, "grad_norm": 0.0, "learning_rate": 1.1902475068250747e-05, "loss": 0.8199, "step": 1478 }, { "epoch": 0.4562702452568255, "grad_norm": 0.0, "learning_rate": 1.1892661153844243e-05, "loss": 0.7257, "step": 1479 }, { "epoch": 0.45657874440845286, "grad_norm": 0.0, "learning_rate": 1.1882845348471324e-05, "loss": 0.6936, "step": 1480 }, { "epoch": 0.4568872435600802, "grad_norm": 0.0, "learning_rate": 1.1873027661939003e-05, "loss": 0.6945, "step": 1481 }, { "epoch": 0.45719574271170754, "grad_norm": 0.0, "learning_rate": 1.1863208104056179e-05, "loss": 0.7709, "step": 1482 }, { "epoch": 0.4575042418633349, "grad_norm": 0.0, "learning_rate": 1.1853386684633616e-05, "loss": 0.748, "step": 1483 }, { "epoch": 0.4578127410149622, "grad_norm": 0.0, "learning_rate": 1.1843563413483941e-05, "loss": 0.7041, "step": 1484 }, { "epoch": 0.45812124016658956, "grad_norm": 0.0, "learning_rate": 1.1833738300421625e-05, "loss": 1.0664, "step": 1485 }, { "epoch": 0.4584297393182169, "grad_norm": 0.0, "learning_rate": 1.1823911355262986e-05, "loss": 0.8065, "step": 1486 }, { "epoch": 0.4587382384698442, "grad_norm": 0.0, "learning_rate": 1.181408258782617e-05, "loss": 0.8089, "step": 1487 }, { "epoch": 0.4590467376214715, "grad_norm": 0.0, "learning_rate": 1.1804252007931142e-05, "loss": 0.7305, "step": 1488 }, { "epoch": 0.45935523677309886, "grad_norm": 0.0, "learning_rate": 1.1794419625399682e-05, "loss": 0.6922, "step": 1489 }, { "epoch": 0.4596637359247262, "grad_norm": 0.0, "learning_rate": 1.1784585450055367e-05, "loss": 0.6915, "step": 1490 }, { "epoch": 0.45997223507635354, "grad_norm": 0.0, "learning_rate": 1.1774749491723565e-05, "loss": 0.7147, "step": 1491 }, { "epoch": 0.4602807342279809, "grad_norm": 0.0, "learning_rate": 1.176491176023143e-05, "loss": 0.7419, "step": 1492 }, { "epoch": 0.4605892333796082, "grad_norm": 0.0, "learning_rate": 1.1755072265407881e-05, "loss": 0.8442, "step": 1493 }, { "epoch": 0.46089773253123556, "grad_norm": 0.0, "learning_rate": 1.1745231017083603e-05, "loss": 0.748, "step": 1494 }, { "epoch": 0.4612062316828629, "grad_norm": 0.0, "learning_rate": 1.1735388025091032e-05, "loss": 0.8019, "step": 1495 }, { "epoch": 0.4615147308344902, "grad_norm": 0.0, "learning_rate": 1.1725543299264352e-05, "loss": 0.7985, "step": 1496 }, { "epoch": 0.4618232299861175, "grad_norm": 0.0, "learning_rate": 1.1715696849439465e-05, "loss": 0.8069, "step": 1497 }, { "epoch": 0.46213172913774486, "grad_norm": 0.0, "learning_rate": 1.1705848685454011e-05, "loss": 0.8735, "step": 1498 }, { "epoch": 0.4624402282893722, "grad_norm": 0.0, "learning_rate": 1.1695998817147334e-05, "loss": 0.8315, "step": 1499 }, { "epoch": 0.46274872744099954, "grad_norm": 0.0, "learning_rate": 1.168614725436048e-05, "loss": 0.7874, "step": 1500 }, { "epoch": 0.4630572265926269, "grad_norm": 0.0, "learning_rate": 1.167629400693619e-05, "loss": 0.7339, "step": 1501 }, { "epoch": 0.4633657257442542, "grad_norm": 0.0, "learning_rate": 1.1666439084718893e-05, "loss": 0.8196, "step": 1502 }, { "epoch": 0.46367422489588156, "grad_norm": 0.0, "learning_rate": 1.1656582497554682e-05, "loss": 0.8023, "step": 1503 }, { "epoch": 0.46398272404750884, "grad_norm": 0.0, "learning_rate": 1.1646724255291321e-05, "loss": 0.7816, "step": 1504 }, { "epoch": 0.4642912231991362, "grad_norm": 0.0, "learning_rate": 1.1636864367778226e-05, "loss": 0.7904, "step": 1505 }, { "epoch": 0.4645997223507635, "grad_norm": 0.0, "learning_rate": 1.1627002844866455e-05, "loss": 0.8138, "step": 1506 }, { "epoch": 0.46490822150239086, "grad_norm": 0.0, "learning_rate": 1.1617139696408696e-05, "loss": 0.7566, "step": 1507 }, { "epoch": 0.4652167206540182, "grad_norm": 0.0, "learning_rate": 1.1607274932259273e-05, "loss": 0.8639, "step": 1508 }, { "epoch": 0.46552521980564554, "grad_norm": 0.0, "learning_rate": 1.1597408562274113e-05, "loss": 0.7517, "step": 1509 }, { "epoch": 0.4658337189572729, "grad_norm": 0.0, "learning_rate": 1.1587540596310751e-05, "loss": 0.8349, "step": 1510 }, { "epoch": 0.4661422181089002, "grad_norm": 0.0, "learning_rate": 1.157767104422832e-05, "loss": 0.7613, "step": 1511 }, { "epoch": 0.46645071726052756, "grad_norm": 0.0, "learning_rate": 1.1567799915887532e-05, "loss": 0.7602, "step": 1512 }, { "epoch": 0.46675921641215484, "grad_norm": 0.0, "learning_rate": 1.1557927221150674e-05, "loss": 0.7969, "step": 1513 }, { "epoch": 0.4670677155637822, "grad_norm": 0.0, "learning_rate": 1.1548052969881604e-05, "loss": 0.6685, "step": 1514 }, { "epoch": 0.4673762147154095, "grad_norm": 0.0, "learning_rate": 1.1538177171945733e-05, "loss": 0.7429, "step": 1515 }, { "epoch": 0.46768471386703686, "grad_norm": 0.0, "learning_rate": 1.1528299837210015e-05, "loss": 0.7313, "step": 1516 }, { "epoch": 0.4679932130186642, "grad_norm": 0.0, "learning_rate": 1.1518420975542937e-05, "loss": 0.7563, "step": 1517 }, { "epoch": 0.46830171217029154, "grad_norm": 0.0, "learning_rate": 1.150854059681452e-05, "loss": 0.7186, "step": 1518 }, { "epoch": 0.4686102113219189, "grad_norm": 0.0, "learning_rate": 1.149865871089629e-05, "loss": 0.8129, "step": 1519 }, { "epoch": 0.4689187104735462, "grad_norm": 0.0, "learning_rate": 1.1488775327661288e-05, "loss": 0.7723, "step": 1520 }, { "epoch": 0.46922720962517356, "grad_norm": 0.0, "learning_rate": 1.1478890456984044e-05, "loss": 0.7515, "step": 1521 }, { "epoch": 0.46953570877680084, "grad_norm": 0.0, "learning_rate": 1.1469004108740584e-05, "loss": 0.7922, "step": 1522 }, { "epoch": 0.4698442079284282, "grad_norm": 0.0, "learning_rate": 1.14591162928084e-05, "loss": 0.7006, "step": 1523 }, { "epoch": 0.4701527070800555, "grad_norm": 0.0, "learning_rate": 1.1449227019066452e-05, "loss": 0.7337, "step": 1524 }, { "epoch": 0.47046120623168286, "grad_norm": 0.0, "learning_rate": 1.1439336297395161e-05, "loss": 0.756, "step": 1525 }, { "epoch": 0.4707697053833102, "grad_norm": 0.0, "learning_rate": 1.1429444137676391e-05, "loss": 0.7558, "step": 1526 }, { "epoch": 0.47107820453493754, "grad_norm": 0.0, "learning_rate": 1.1419550549793443e-05, "loss": 0.7017, "step": 1527 }, { "epoch": 0.4713867036865649, "grad_norm": 0.0, "learning_rate": 1.1409655543631054e-05, "loss": 0.6988, "step": 1528 }, { "epoch": 0.4716952028381922, "grad_norm": 0.0, "learning_rate": 1.1399759129075358e-05, "loss": 0.8331, "step": 1529 }, { "epoch": 0.4720037019898195, "grad_norm": 0.0, "learning_rate": 1.1389861316013914e-05, "loss": 0.7593, "step": 1530 }, { "epoch": 0.47231220114144684, "grad_norm": 0.0, "learning_rate": 1.1379962114335676e-05, "loss": 0.8505, "step": 1531 }, { "epoch": 0.4726207002930742, "grad_norm": 0.0, "learning_rate": 1.1370061533930974e-05, "loss": 0.7795, "step": 1532 }, { "epoch": 0.4729291994447015, "grad_norm": 0.0, "learning_rate": 1.1360159584691529e-05, "loss": 0.7112, "step": 1533 }, { "epoch": 0.47323769859632886, "grad_norm": 0.0, "learning_rate": 1.1350256276510421e-05, "loss": 0.7215, "step": 1534 }, { "epoch": 0.4735461977479562, "grad_norm": 0.0, "learning_rate": 1.1340351619282091e-05, "loss": 0.7342, "step": 1535 }, { "epoch": 0.47385469689958354, "grad_norm": 0.0, "learning_rate": 1.1330445622902326e-05, "loss": 0.7108, "step": 1536 }, { "epoch": 0.4741631960512109, "grad_norm": 0.0, "learning_rate": 1.1320538297268258e-05, "loss": 0.8285, "step": 1537 }, { "epoch": 0.4744716952028382, "grad_norm": 0.0, "learning_rate": 1.1310629652278334e-05, "loss": 0.7805, "step": 1538 }, { "epoch": 0.4747801943544655, "grad_norm": 0.0, "learning_rate": 1.1300719697832329e-05, "loss": 0.724, "step": 1539 }, { "epoch": 0.47508869350609284, "grad_norm": 0.0, "learning_rate": 1.1290808443831324e-05, "loss": 0.7394, "step": 1540 }, { "epoch": 0.4753971926577202, "grad_norm": 0.0, "learning_rate": 1.1280895900177704e-05, "loss": 0.7535, "step": 1541 }, { "epoch": 0.4757056918093475, "grad_norm": 0.0, "learning_rate": 1.1270982076775126e-05, "loss": 0.7777, "step": 1542 }, { "epoch": 0.47601419096097486, "grad_norm": 0.0, "learning_rate": 1.1261066983528544e-05, "loss": 0.7488, "step": 1543 }, { "epoch": 0.4763226901126022, "grad_norm": 0.0, "learning_rate": 1.125115063034417e-05, "loss": 0.7654, "step": 1544 }, { "epoch": 0.47663118926422954, "grad_norm": 0.0, "learning_rate": 1.124123302712948e-05, "loss": 0.7223, "step": 1545 }, { "epoch": 0.4769396884158569, "grad_norm": 0.0, "learning_rate": 1.1231314183793192e-05, "loss": 0.7698, "step": 1546 }, { "epoch": 0.4772481875674842, "grad_norm": 0.0, "learning_rate": 1.1221394110245271e-05, "loss": 0.7299, "step": 1547 }, { "epoch": 0.4775566867191115, "grad_norm": 0.0, "learning_rate": 1.1211472816396912e-05, "loss": 0.7969, "step": 1548 }, { "epoch": 0.47786518587073884, "grad_norm": 0.0, "learning_rate": 1.120155031216052e-05, "loss": 0.7025, "step": 1549 }, { "epoch": 0.4781736850223662, "grad_norm": 0.0, "learning_rate": 1.1191626607449713e-05, "loss": 0.7603, "step": 1550 }, { "epoch": 0.4784821841739935, "grad_norm": 0.0, "learning_rate": 1.1181701712179311e-05, "loss": 0.7384, "step": 1551 }, { "epoch": 0.47879068332562086, "grad_norm": 0.0, "learning_rate": 1.1171775636265326e-05, "loss": 0.7992, "step": 1552 }, { "epoch": 0.4790991824772482, "grad_norm": 0.0, "learning_rate": 1.1161848389624942e-05, "loss": 0.7818, "step": 1553 }, { "epoch": 0.47940768162887554, "grad_norm": 0.0, "learning_rate": 1.115191998217652e-05, "loss": 0.7237, "step": 1554 }, { "epoch": 0.4797161807805029, "grad_norm": 0.0, "learning_rate": 1.1141990423839573e-05, "loss": 0.7212, "step": 1555 }, { "epoch": 0.48002467993213016, "grad_norm": 0.0, "learning_rate": 1.1132059724534772e-05, "loss": 0.7296, "step": 1556 }, { "epoch": 0.4803331790837575, "grad_norm": 0.0, "learning_rate": 1.112212789418392e-05, "loss": 0.7456, "step": 1557 }, { "epoch": 0.48064167823538484, "grad_norm": 0.0, "learning_rate": 1.1112194942709956e-05, "loss": 0.7829, "step": 1558 }, { "epoch": 0.4809501773870122, "grad_norm": 0.0, "learning_rate": 1.1102260880036937e-05, "loss": 0.7913, "step": 1559 }, { "epoch": 0.4812586765386395, "grad_norm": 0.0, "learning_rate": 1.1092325716090035e-05, "loss": 0.7316, "step": 1560 }, { "epoch": 0.48156717569026686, "grad_norm": 0.0, "learning_rate": 1.108238946079551e-05, "loss": 0.8268, "step": 1561 }, { "epoch": 0.4818756748418942, "grad_norm": 0.0, "learning_rate": 1.1072452124080728e-05, "loss": 0.7784, "step": 1562 }, { "epoch": 0.48218417399352154, "grad_norm": 0.0, "learning_rate": 1.1062513715874119e-05, "loss": 0.8224, "step": 1563 }, { "epoch": 0.4824926731451489, "grad_norm": 0.0, "learning_rate": 1.1052574246105196e-05, "loss": 0.7781, "step": 1564 }, { "epoch": 0.48280117229677616, "grad_norm": 0.0, "learning_rate": 1.1042633724704534e-05, "loss": 0.6717, "step": 1565 }, { "epoch": 0.4831096714484035, "grad_norm": 0.0, "learning_rate": 1.1032692161603746e-05, "loss": 0.8659, "step": 1566 }, { "epoch": 0.48341817060003084, "grad_norm": 0.0, "learning_rate": 1.1022749566735497e-05, "loss": 0.7775, "step": 1567 }, { "epoch": 0.4837266697516582, "grad_norm": 0.0, "learning_rate": 1.1012805950033476e-05, "loss": 0.7473, "step": 1568 }, { "epoch": 0.4840351689032855, "grad_norm": 0.0, "learning_rate": 1.1002861321432401e-05, "loss": 0.7479, "step": 1569 }, { "epoch": 0.48434366805491286, "grad_norm": 0.0, "learning_rate": 1.0992915690867989e-05, "loss": 0.721, "step": 1570 }, { "epoch": 0.4846521672065402, "grad_norm": 0.0, "learning_rate": 1.0982969068276971e-05, "loss": 0.7837, "step": 1571 }, { "epoch": 0.48496066635816754, "grad_norm": 0.0, "learning_rate": 1.097302146359706e-05, "loss": 0.7825, "step": 1572 }, { "epoch": 0.4852691655097949, "grad_norm": 0.0, "learning_rate": 1.0963072886766956e-05, "loss": 0.7759, "step": 1573 }, { "epoch": 0.48557766466142216, "grad_norm": 0.0, "learning_rate": 1.0953123347726325e-05, "loss": 0.7175, "step": 1574 }, { "epoch": 0.4858861638130495, "grad_norm": 0.0, "learning_rate": 1.0943172856415798e-05, "loss": 0.7636, "step": 1575 }, { "epoch": 0.48619466296467684, "grad_norm": 0.0, "learning_rate": 1.0933221422776953e-05, "loss": 0.7212, "step": 1576 }, { "epoch": 0.4865031621163042, "grad_norm": 0.0, "learning_rate": 1.092326905675232e-05, "loss": 0.7146, "step": 1577 }, { "epoch": 0.4868116612679315, "grad_norm": 0.0, "learning_rate": 1.0913315768285347e-05, "loss": 0.7574, "step": 1578 }, { "epoch": 0.48712016041955886, "grad_norm": 0.0, "learning_rate": 1.0903361567320411e-05, "loss": 0.7246, "step": 1579 }, { "epoch": 0.4874286595711862, "grad_norm": 0.0, "learning_rate": 1.0893406463802805e-05, "loss": 0.785, "step": 1580 }, { "epoch": 0.48773715872281354, "grad_norm": 0.0, "learning_rate": 1.0883450467678711e-05, "loss": 0.7738, "step": 1581 }, { "epoch": 0.4880456578744408, "grad_norm": 0.0, "learning_rate": 1.0873493588895216e-05, "loss": 0.732, "step": 1582 }, { "epoch": 0.48835415702606816, "grad_norm": 0.0, "learning_rate": 1.0863535837400281e-05, "loss": 0.7488, "step": 1583 }, { "epoch": 0.4886626561776955, "grad_norm": 0.0, "learning_rate": 1.0853577223142742e-05, "loss": 0.8508, "step": 1584 }, { "epoch": 0.48897115532932284, "grad_norm": 0.0, "learning_rate": 1.0843617756072294e-05, "loss": 0.8112, "step": 1585 }, { "epoch": 0.4892796544809502, "grad_norm": 0.0, "learning_rate": 1.0833657446139487e-05, "loss": 0.7818, "step": 1586 }, { "epoch": 0.4895881536325775, "grad_norm": 0.0, "learning_rate": 1.0823696303295712e-05, "loss": 0.7754, "step": 1587 }, { "epoch": 0.48989665278420486, "grad_norm": 0.0, "learning_rate": 1.0813734337493194e-05, "loss": 0.7389, "step": 1588 }, { "epoch": 0.4902051519358322, "grad_norm": 0.0, "learning_rate": 1.0803771558684977e-05, "loss": 0.6946, "step": 1589 }, { "epoch": 0.49051365108745953, "grad_norm": 0.0, "learning_rate": 1.079380797682492e-05, "loss": 0.7306, "step": 1590 }, { "epoch": 0.4908221502390868, "grad_norm": 0.0, "learning_rate": 1.0783843601867681e-05, "loss": 0.681, "step": 1591 }, { "epoch": 0.49113064939071416, "grad_norm": 0.0, "learning_rate": 1.0773878443768716e-05, "loss": 0.7666, "step": 1592 }, { "epoch": 0.4914391485423415, "grad_norm": 0.0, "learning_rate": 1.0763912512484257e-05, "loss": 0.807, "step": 1593 }, { "epoch": 0.49174764769396884, "grad_norm": 0.0, "learning_rate": 1.0753945817971311e-05, "loss": 0.8633, "step": 1594 }, { "epoch": 0.4920561468455962, "grad_norm": 0.0, "learning_rate": 1.0743978370187651e-05, "loss": 0.8063, "step": 1595 }, { "epoch": 0.4923646459972235, "grad_norm": 0.0, "learning_rate": 1.0734010179091798e-05, "loss": 0.7307, "step": 1596 }, { "epoch": 0.49267314514885086, "grad_norm": 0.0, "learning_rate": 1.072404125464302e-05, "loss": 0.747, "step": 1597 }, { "epoch": 0.4929816443004782, "grad_norm": 0.0, "learning_rate": 1.0714071606801314e-05, "loss": 0.7696, "step": 1598 }, { "epoch": 0.49329014345210553, "grad_norm": 0.0, "learning_rate": 1.0704101245527396e-05, "loss": 0.774, "step": 1599 }, { "epoch": 0.4935986426037328, "grad_norm": 0.0, "learning_rate": 1.0694130180782705e-05, "loss": 0.7023, "step": 1600 }, { "epoch": 0.49390714175536016, "grad_norm": 0.0, "learning_rate": 1.0684158422529374e-05, "loss": 0.7351, "step": 1601 }, { "epoch": 0.4942156409069875, "grad_norm": 0.0, "learning_rate": 1.0674185980730234e-05, "loss": 0.7549, "step": 1602 }, { "epoch": 0.49452414005861484, "grad_norm": 0.0, "learning_rate": 1.0664212865348798e-05, "loss": 0.7447, "step": 1603 }, { "epoch": 0.4948326392102422, "grad_norm": 0.0, "learning_rate": 1.065423908634925e-05, "loss": 0.7676, "step": 1604 }, { "epoch": 0.4951411383618695, "grad_norm": 0.0, "learning_rate": 1.064426465369644e-05, "loss": 0.7771, "step": 1605 }, { "epoch": 0.49544963751349685, "grad_norm": 0.0, "learning_rate": 1.0634289577355864e-05, "loss": 0.7675, "step": 1606 }, { "epoch": 0.4957581366651242, "grad_norm": 0.0, "learning_rate": 1.062431386729367e-05, "loss": 0.7554, "step": 1607 }, { "epoch": 0.4960666358167515, "grad_norm": 0.0, "learning_rate": 1.0614337533476635e-05, "loss": 0.8138, "step": 1608 }, { "epoch": 0.4963751349683788, "grad_norm": 0.0, "learning_rate": 1.0604360585872165e-05, "loss": 0.7168, "step": 1609 }, { "epoch": 0.49668363412000616, "grad_norm": 0.0, "learning_rate": 1.0594383034448266e-05, "loss": 0.7353, "step": 1610 }, { "epoch": 0.4969921332716335, "grad_norm": 0.0, "learning_rate": 1.058440488917356e-05, "loss": 0.7012, "step": 1611 }, { "epoch": 0.49730063242326084, "grad_norm": 0.0, "learning_rate": 1.0574426160017257e-05, "loss": 0.8099, "step": 1612 }, { "epoch": 0.4976091315748882, "grad_norm": 0.0, "learning_rate": 1.0564446856949146e-05, "loss": 0.827, "step": 1613 }, { "epoch": 0.4979176307265155, "grad_norm": 0.0, "learning_rate": 1.0554466989939602e-05, "loss": 0.855, "step": 1614 }, { "epoch": 0.49822612987814285, "grad_norm": 0.0, "learning_rate": 1.0544486568959551e-05, "loss": 0.7281, "step": 1615 }, { "epoch": 0.4985346290297702, "grad_norm": 0.0, "learning_rate": 1.0534505603980481e-05, "loss": 0.7121, "step": 1616 }, { "epoch": 0.4988431281813975, "grad_norm": 0.0, "learning_rate": 1.0524524104974414e-05, "loss": 0.7575, "step": 1617 }, { "epoch": 0.4991516273330248, "grad_norm": 0.0, "learning_rate": 1.0514542081913916e-05, "loss": 0.8202, "step": 1618 }, { "epoch": 0.49946012648465216, "grad_norm": 0.0, "learning_rate": 1.0504559544772071e-05, "loss": 0.858, "step": 1619 }, { "epoch": 0.4997686256362795, "grad_norm": 0.0, "learning_rate": 1.0494576503522475e-05, "loss": 0.7307, "step": 1620 }, { "epoch": 0.5000771247879068, "grad_norm": 0.0, "learning_rate": 1.0484592968139234e-05, "loss": 0.7756, "step": 1621 }, { "epoch": 0.5003856239395341, "grad_norm": 0.0, "learning_rate": 1.0474608948596943e-05, "loss": 0.7073, "step": 1622 }, { "epoch": 0.5006941230911615, "grad_norm": 0.0, "learning_rate": 1.0464624454870678e-05, "loss": 0.7282, "step": 1623 }, { "epoch": 0.5010026222427888, "grad_norm": 0.0, "learning_rate": 1.0454639496935997e-05, "loss": 0.8389, "step": 1624 }, { "epoch": 0.5013111213944161, "grad_norm": 0.0, "learning_rate": 1.0444654084768915e-05, "loss": 0.7465, "step": 1625 }, { "epoch": 0.5016196205460435, "grad_norm": 0.0, "learning_rate": 1.0434668228345901e-05, "loss": 0.8251, "step": 1626 }, { "epoch": 0.5019281196976708, "grad_norm": 0.0, "learning_rate": 1.0424681937643872e-05, "loss": 0.8191, "step": 1627 }, { "epoch": 0.5022366188492982, "grad_norm": 0.0, "learning_rate": 1.041469522264018e-05, "loss": 0.8236, "step": 1628 }, { "epoch": 0.5025451180009255, "grad_norm": 0.0, "learning_rate": 1.0404708093312593e-05, "loss": 0.7842, "step": 1629 }, { "epoch": 0.5028536171525528, "grad_norm": 0.0, "learning_rate": 1.0394720559639295e-05, "loss": 0.8182, "step": 1630 }, { "epoch": 0.5031621163041802, "grad_norm": 0.0, "learning_rate": 1.0384732631598886e-05, "loss": 0.7417, "step": 1631 }, { "epoch": 0.5034706154558075, "grad_norm": 0.0, "learning_rate": 1.0374744319170342e-05, "loss": 0.7373, "step": 1632 }, { "epoch": 0.5037791146074349, "grad_norm": 0.0, "learning_rate": 1.0364755632333034e-05, "loss": 0.7286, "step": 1633 }, { "epoch": 0.5040876137590622, "grad_norm": 0.0, "learning_rate": 1.0354766581066706e-05, "loss": 0.8905, "step": 1634 }, { "epoch": 0.5043961129106895, "grad_norm": 0.0, "learning_rate": 1.0344777175351467e-05, "loss": 0.7573, "step": 1635 }, { "epoch": 0.5047046120623169, "grad_norm": 0.0, "learning_rate": 1.0334787425167772e-05, "loss": 0.7438, "step": 1636 }, { "epoch": 0.5050131112139442, "grad_norm": 0.0, "learning_rate": 1.0324797340496432e-05, "loss": 0.7304, "step": 1637 }, { "epoch": 0.5053216103655715, "grad_norm": 0.0, "learning_rate": 1.031480693131858e-05, "loss": 0.6828, "step": 1638 }, { "epoch": 0.5056301095171988, "grad_norm": 0.0, "learning_rate": 1.0304816207615682e-05, "loss": 0.762, "step": 1639 }, { "epoch": 0.5059386086688261, "grad_norm": 0.0, "learning_rate": 1.0294825179369518e-05, "loss": 0.6984, "step": 1640 }, { "epoch": 0.5062471078204535, "grad_norm": 0.0, "learning_rate": 1.0284833856562173e-05, "loss": 0.677, "step": 1641 }, { "epoch": 0.5065556069720808, "grad_norm": 0.0, "learning_rate": 1.0274842249176016e-05, "loss": 0.7709, "step": 1642 }, { "epoch": 0.5068641061237081, "grad_norm": 0.0, "learning_rate": 1.0264850367193713e-05, "loss": 0.748, "step": 1643 }, { "epoch": 0.5071726052753355, "grad_norm": 0.0, "learning_rate": 1.0254858220598194e-05, "loss": 0.671, "step": 1644 }, { "epoch": 0.5074811044269628, "grad_norm": 0.0, "learning_rate": 1.024486581937266e-05, "loss": 0.7853, "step": 1645 }, { "epoch": 0.5077896035785902, "grad_norm": 0.0, "learning_rate": 1.0234873173500566e-05, "loss": 0.7042, "step": 1646 }, { "epoch": 0.5080981027302175, "grad_norm": 0.0, "learning_rate": 1.0224880292965611e-05, "loss": 0.6942, "step": 1647 }, { "epoch": 0.5084066018818448, "grad_norm": 0.0, "learning_rate": 1.021488718775173e-05, "loss": 0.7299, "step": 1648 }, { "epoch": 0.5087151010334722, "grad_norm": 0.0, "learning_rate": 1.0204893867843073e-05, "loss": 0.7374, "step": 1649 }, { "epoch": 0.5090236001850995, "grad_norm": 0.0, "learning_rate": 1.019490034322402e-05, "loss": 0.7705, "step": 1650 }, { "epoch": 0.5093320993367269, "grad_norm": 0.0, "learning_rate": 1.0184906623879137e-05, "loss": 0.7399, "step": 1651 }, { "epoch": 0.5096405984883542, "grad_norm": 0.0, "learning_rate": 1.0174912719793202e-05, "loss": 0.7825, "step": 1652 }, { "epoch": 0.5099490976399815, "grad_norm": 0.0, "learning_rate": 1.0164918640951169e-05, "loss": 0.7848, "step": 1653 }, { "epoch": 0.5102575967916089, "grad_norm": 0.0, "learning_rate": 1.0154924397338169e-05, "loss": 0.7622, "step": 1654 }, { "epoch": 0.5105660959432362, "grad_norm": 0.0, "learning_rate": 1.0144929998939491e-05, "loss": 0.8247, "step": 1655 }, { "epoch": 0.5108745950948634, "grad_norm": 0.0, "learning_rate": 1.0134935455740593e-05, "loss": 0.7686, "step": 1656 }, { "epoch": 0.5111830942464908, "grad_norm": 0.0, "learning_rate": 1.0124940777727061e-05, "loss": 0.7389, "step": 1657 }, { "epoch": 0.5114915933981181, "grad_norm": 0.0, "learning_rate": 1.0114945974884627e-05, "loss": 0.7494, "step": 1658 }, { "epoch": 0.5118000925497455, "grad_norm": 0.0, "learning_rate": 1.0104951057199144e-05, "loss": 0.82, "step": 1659 }, { "epoch": 0.5121085917013728, "grad_norm": 0.0, "learning_rate": 1.0094956034656582e-05, "loss": 0.749, "step": 1660 }, { "epoch": 0.5124170908530001, "grad_norm": 0.0, "learning_rate": 1.0084960917243008e-05, "loss": 0.6975, "step": 1661 }, { "epoch": 0.5127255900046275, "grad_norm": 0.0, "learning_rate": 1.0074965714944593e-05, "loss": 0.8068, "step": 1662 }, { "epoch": 0.5130340891562548, "grad_norm": 0.0, "learning_rate": 1.0064970437747592e-05, "loss": 0.6657, "step": 1663 }, { "epoch": 0.5133425883078822, "grad_norm": 0.0, "learning_rate": 1.0054975095638327e-05, "loss": 0.7297, "step": 1664 }, { "epoch": 0.5136510874595095, "grad_norm": 0.0, "learning_rate": 1.0044979698603195e-05, "loss": 0.6419, "step": 1665 }, { "epoch": 0.5139595866111368, "grad_norm": 0.0, "learning_rate": 1.0034984256628637e-05, "loss": 0.8095, "step": 1666 }, { "epoch": 0.5142680857627642, "grad_norm": 0.0, "learning_rate": 1.002498877970115e-05, "loss": 0.7918, "step": 1667 }, { "epoch": 0.5145765849143915, "grad_norm": 0.0, "learning_rate": 1.001499327780726e-05, "loss": 0.7891, "step": 1668 }, { "epoch": 0.5148850840660188, "grad_norm": 0.0, "learning_rate": 1.0004997760933517e-05, "loss": 0.7117, "step": 1669 }, { "epoch": 0.5151935832176462, "grad_norm": 0.0, "learning_rate": 9.995002239066486e-06, "loss": 0.7741, "step": 1670 }, { "epoch": 0.5155020823692735, "grad_norm": 0.0, "learning_rate": 9.985006722192742e-06, "loss": 0.8163, "step": 1671 }, { "epoch": 0.5158105815209009, "grad_norm": 0.0, "learning_rate": 9.975011220298853e-06, "loss": 0.7816, "step": 1672 }, { "epoch": 0.5161190806725281, "grad_norm": 0.0, "learning_rate": 9.965015743371368e-06, "loss": 0.7656, "step": 1673 }, { "epoch": 0.5164275798241554, "grad_norm": 0.0, "learning_rate": 9.95502030139681e-06, "loss": 0.8098, "step": 1674 }, { "epoch": 0.5167360789757828, "grad_norm": 0.0, "learning_rate": 9.945024904361675e-06, "loss": 0.7805, "step": 1675 }, { "epoch": 0.5170445781274101, "grad_norm": 0.0, "learning_rate": 9.935029562252413e-06, "loss": 0.7974, "step": 1676 }, { "epoch": 0.5173530772790375, "grad_norm": 0.0, "learning_rate": 9.92503428505541e-06, "loss": 0.7451, "step": 1677 }, { "epoch": 0.5176615764306648, "grad_norm": 0.0, "learning_rate": 9.915039082756995e-06, "loss": 0.7494, "step": 1678 }, { "epoch": 0.5179700755822921, "grad_norm": 0.0, "learning_rate": 9.905043965343421e-06, "loss": 0.7199, "step": 1679 }, { "epoch": 0.5182785747339195, "grad_norm": 0.0, "learning_rate": 9.895048942800856e-06, "loss": 0.7528, "step": 1680 }, { "epoch": 0.5185870738855468, "grad_norm": 0.0, "learning_rate": 9.885054025115378e-06, "loss": 0.7486, "step": 1681 }, { "epoch": 0.5188955730371742, "grad_norm": 0.0, "learning_rate": 9.875059222272942e-06, "loss": 0.6966, "step": 1682 }, { "epoch": 0.5192040721888015, "grad_norm": 0.0, "learning_rate": 9.865064544259409e-06, "loss": 0.6967, "step": 1683 }, { "epoch": 0.5195125713404288, "grad_norm": 0.0, "learning_rate": 9.855070001060508e-06, "loss": 0.8254, "step": 1684 }, { "epoch": 0.5198210704920562, "grad_norm": 0.0, "learning_rate": 9.845075602661836e-06, "loss": 0.7459, "step": 1685 }, { "epoch": 0.5201295696436835, "grad_norm": 0.0, "learning_rate": 9.835081359048835e-06, "loss": 0.7506, "step": 1686 }, { "epoch": 0.5204380687953108, "grad_norm": 0.0, "learning_rate": 9.825087280206801e-06, "loss": 0.8133, "step": 1687 }, { "epoch": 0.5207465679469382, "grad_norm": 0.0, "learning_rate": 9.815093376120866e-06, "loss": 0.7707, "step": 1688 }, { "epoch": 0.5210550670985655, "grad_norm": 0.0, "learning_rate": 9.805099656775987e-06, "loss": 0.7421, "step": 1689 }, { "epoch": 0.5213635662501929, "grad_norm": 0.0, "learning_rate": 9.79510613215693e-06, "loss": 0.6809, "step": 1690 }, { "epoch": 0.5216720654018201, "grad_norm": 0.0, "learning_rate": 9.785112812248274e-06, "loss": 0.7263, "step": 1691 }, { "epoch": 0.5219805645534474, "grad_norm": 0.0, "learning_rate": 9.775119707034389e-06, "loss": 0.7779, "step": 1692 }, { "epoch": 0.5222890637050748, "grad_norm": 0.0, "learning_rate": 9.765126826499437e-06, "loss": 0.7037, "step": 1693 }, { "epoch": 0.5225975628567021, "grad_norm": 0.0, "learning_rate": 9.755134180627342e-06, "loss": 0.78, "step": 1694 }, { "epoch": 0.5229060620083295, "grad_norm": 0.0, "learning_rate": 9.74514177940181e-06, "loss": 0.7053, "step": 1695 }, { "epoch": 0.5232145611599568, "grad_norm": 0.0, "learning_rate": 9.73514963280629e-06, "loss": 0.851, "step": 1696 }, { "epoch": 0.5235230603115841, "grad_norm": 0.0, "learning_rate": 9.725157750823984e-06, "loss": 0.7866, "step": 1697 }, { "epoch": 0.5238315594632115, "grad_norm": 0.0, "learning_rate": 9.715166143437832e-06, "loss": 0.7903, "step": 1698 }, { "epoch": 0.5241400586148388, "grad_norm": 0.0, "learning_rate": 9.705174820630483e-06, "loss": 0.7754, "step": 1699 }, { "epoch": 0.5244485577664662, "grad_norm": 0.0, "learning_rate": 9.69518379238432e-06, "loss": 0.7465, "step": 1700 }, { "epoch": 0.5247570569180935, "grad_norm": 0.0, "learning_rate": 9.685193068681423e-06, "loss": 0.7097, "step": 1701 }, { "epoch": 0.5250655560697208, "grad_norm": 0.0, "learning_rate": 9.675202659503575e-06, "loss": 0.737, "step": 1702 }, { "epoch": 0.5253740552213482, "grad_norm": 0.0, "learning_rate": 9.665212574832233e-06, "loss": 0.7015, "step": 1703 }, { "epoch": 0.5256825543729755, "grad_norm": 0.0, "learning_rate": 9.655222824648537e-06, "loss": 0.7699, "step": 1704 }, { "epoch": 0.5259910535246028, "grad_norm": 0.0, "learning_rate": 9.645233418933296e-06, "loss": 0.6963, "step": 1705 }, { "epoch": 0.5262995526762302, "grad_norm": 0.0, "learning_rate": 9.63524436766697e-06, "loss": 0.7432, "step": 1706 }, { "epoch": 0.5266080518278575, "grad_norm": 0.0, "learning_rate": 9.625255680829661e-06, "loss": 0.7409, "step": 1707 }, { "epoch": 0.5269165509794848, "grad_norm": 0.0, "learning_rate": 9.615267368401118e-06, "loss": 0.7743, "step": 1708 }, { "epoch": 0.5272250501311121, "grad_norm": 0.0, "learning_rate": 9.605279440360705e-06, "loss": 0.8109, "step": 1709 }, { "epoch": 0.5275335492827394, "grad_norm": 0.0, "learning_rate": 9.595291906687414e-06, "loss": 0.7718, "step": 1710 }, { "epoch": 0.5278420484343668, "grad_norm": 0.0, "learning_rate": 9.585304777359826e-06, "loss": 0.7101, "step": 1711 }, { "epoch": 0.5281505475859941, "grad_norm": 0.0, "learning_rate": 9.57531806235613e-06, "loss": 0.8157, "step": 1712 }, { "epoch": 0.5284590467376215, "grad_norm": 0.0, "learning_rate": 9.565331771654102e-06, "loss": 0.7767, "step": 1713 }, { "epoch": 0.5287675458892488, "grad_norm": 0.0, "learning_rate": 9.55534591523109e-06, "loss": 0.7739, "step": 1714 }, { "epoch": 0.5290760450408761, "grad_norm": 0.0, "learning_rate": 9.545360503064007e-06, "loss": 0.7932, "step": 1715 }, { "epoch": 0.5293845441925035, "grad_norm": 0.0, "learning_rate": 9.535375545129326e-06, "loss": 0.7008, "step": 1716 }, { "epoch": 0.5296930433441308, "grad_norm": 0.0, "learning_rate": 9.52539105140306e-06, "loss": 0.7103, "step": 1717 }, { "epoch": 0.5300015424957581, "grad_norm": 0.0, "learning_rate": 9.515407031860768e-06, "loss": 0.7694, "step": 1718 }, { "epoch": 0.5303100416473855, "grad_norm": 0.0, "learning_rate": 9.505423496477527e-06, "loss": 0.6776, "step": 1719 }, { "epoch": 0.5306185407990128, "grad_norm": 0.0, "learning_rate": 9.495440455227932e-06, "loss": 0.7044, "step": 1720 }, { "epoch": 0.5309270399506402, "grad_norm": 0.0, "learning_rate": 9.485457918086086e-06, "loss": 0.7518, "step": 1721 }, { "epoch": 0.5312355391022675, "grad_norm": 0.0, "learning_rate": 9.475475895025586e-06, "loss": 0.7754, "step": 1722 }, { "epoch": 0.5315440382538948, "grad_norm": 0.0, "learning_rate": 9.465494396019524e-06, "loss": 0.6894, "step": 1723 }, { "epoch": 0.5318525374055222, "grad_norm": 0.0, "learning_rate": 9.455513431040452e-06, "loss": 0.7407, "step": 1724 }, { "epoch": 0.5321610365571494, "grad_norm": 0.0, "learning_rate": 9.445533010060402e-06, "loss": 0.7462, "step": 1725 }, { "epoch": 0.5324695357087768, "grad_norm": 0.0, "learning_rate": 9.435553143050856e-06, "loss": 0.7176, "step": 1726 }, { "epoch": 0.5327780348604041, "grad_norm": 0.0, "learning_rate": 9.42557383998275e-06, "loss": 0.7529, "step": 1727 }, { "epoch": 0.5330865340120314, "grad_norm": 0.0, "learning_rate": 9.415595110826444e-06, "loss": 0.7301, "step": 1728 }, { "epoch": 0.5333950331636588, "grad_norm": 0.0, "learning_rate": 9.405616965551738e-06, "loss": 0.7651, "step": 1729 }, { "epoch": 0.5337035323152861, "grad_norm": 0.0, "learning_rate": 9.395639414127837e-06, "loss": 0.6992, "step": 1730 }, { "epoch": 0.5340120314669135, "grad_norm": 0.0, "learning_rate": 9.385662466523364e-06, "loss": 0.7285, "step": 1731 }, { "epoch": 0.5343205306185408, "grad_norm": 0.0, "learning_rate": 9.375686132706333e-06, "loss": 0.8753, "step": 1732 }, { "epoch": 0.5346290297701681, "grad_norm": 0.0, "learning_rate": 9.36571042264414e-06, "loss": 0.767, "step": 1733 }, { "epoch": 0.5349375289217955, "grad_norm": 0.0, "learning_rate": 9.355735346303566e-06, "loss": 0.7476, "step": 1734 }, { "epoch": 0.5352460280734228, "grad_norm": 0.0, "learning_rate": 9.345760913650752e-06, "loss": 0.8124, "step": 1735 }, { "epoch": 0.5355545272250501, "grad_norm": 0.0, "learning_rate": 9.335787134651207e-06, "loss": 0.8571, "step": 1736 }, { "epoch": 0.5358630263766775, "grad_norm": 0.0, "learning_rate": 9.32581401926977e-06, "loss": 0.743, "step": 1737 }, { "epoch": 0.5361715255283048, "grad_norm": 0.0, "learning_rate": 9.31584157747063e-06, "loss": 0.763, "step": 1738 }, { "epoch": 0.5364800246799322, "grad_norm": 0.0, "learning_rate": 9.305869819217299e-06, "loss": 0.7776, "step": 1739 }, { "epoch": 0.5367885238315595, "grad_norm": 0.0, "learning_rate": 9.295898754472607e-06, "loss": 0.7812, "step": 1740 }, { "epoch": 0.5370970229831868, "grad_norm": 0.0, "learning_rate": 9.285928393198691e-06, "loss": 0.8133, "step": 1741 }, { "epoch": 0.5374055221348142, "grad_norm": 0.0, "learning_rate": 9.275958745356982e-06, "loss": 0.7735, "step": 1742 }, { "epoch": 0.5377140212864414, "grad_norm": 0.0, "learning_rate": 9.2659898209082e-06, "loss": 0.7431, "step": 1743 }, { "epoch": 0.5380225204380688, "grad_norm": 0.0, "learning_rate": 9.256021629812349e-06, "loss": 0.7729, "step": 1744 }, { "epoch": 0.5383310195896961, "grad_norm": 0.0, "learning_rate": 9.24605418202869e-06, "loss": 0.6907, "step": 1745 }, { "epoch": 0.5386395187413234, "grad_norm": 0.0, "learning_rate": 9.236087487515746e-06, "loss": 0.7184, "step": 1746 }, { "epoch": 0.5389480178929508, "grad_norm": 0.0, "learning_rate": 9.226121556231287e-06, "loss": 0.6686, "step": 1747 }, { "epoch": 0.5392565170445781, "grad_norm": 0.0, "learning_rate": 9.21615639813232e-06, "loss": 0.7839, "step": 1748 }, { "epoch": 0.5395650161962054, "grad_norm": 0.0, "learning_rate": 9.206192023175086e-06, "loss": 0.776, "step": 1749 }, { "epoch": 0.5398735153478328, "grad_norm": 0.0, "learning_rate": 9.196228441315028e-06, "loss": 0.7371, "step": 1750 }, { "epoch": 0.5401820144994601, "grad_norm": 0.0, "learning_rate": 9.18626566250681e-06, "loss": 0.6791, "step": 1751 }, { "epoch": 0.5404905136510875, "grad_norm": 0.0, "learning_rate": 9.176303696704288e-06, "loss": 0.7811, "step": 1752 }, { "epoch": 0.5407990128027148, "grad_norm": 0.0, "learning_rate": 9.166342553860518e-06, "loss": 0.7032, "step": 1753 }, { "epoch": 0.5411075119543421, "grad_norm": 0.0, "learning_rate": 9.15638224392771e-06, "loss": 0.7617, "step": 1754 }, { "epoch": 0.5414160111059695, "grad_norm": 0.0, "learning_rate": 9.146422776857262e-06, "loss": 0.6893, "step": 1755 }, { "epoch": 0.5417245102575968, "grad_norm": 0.0, "learning_rate": 9.13646416259972e-06, "loss": 0.7536, "step": 1756 }, { "epoch": 0.5420330094092242, "grad_norm": 0.0, "learning_rate": 9.126506411104786e-06, "loss": 0.756, "step": 1757 }, { "epoch": 0.5423415085608515, "grad_norm": 0.0, "learning_rate": 9.11654953232129e-06, "loss": 0.81, "step": 1758 }, { "epoch": 0.5426500077124788, "grad_norm": 0.0, "learning_rate": 9.106593536197198e-06, "loss": 0.7235, "step": 1759 }, { "epoch": 0.5429585068641061, "grad_norm": 0.0, "learning_rate": 9.09663843267959e-06, "loss": 1.0373, "step": 1760 }, { "epoch": 0.5432670060157334, "grad_norm": 0.0, "learning_rate": 9.086684231714655e-06, "loss": 0.751, "step": 1761 }, { "epoch": 0.5435755051673608, "grad_norm": 0.0, "learning_rate": 9.076730943247687e-06, "loss": 0.7119, "step": 1762 }, { "epoch": 0.5438840043189881, "grad_norm": 0.0, "learning_rate": 9.06677857722305e-06, "loss": 0.7603, "step": 1763 }, { "epoch": 0.5441925034706154, "grad_norm": 0.0, "learning_rate": 9.056827143584206e-06, "loss": 0.6543, "step": 1764 }, { "epoch": 0.5445010026222428, "grad_norm": 0.0, "learning_rate": 9.046876652273677e-06, "loss": 0.7651, "step": 1765 }, { "epoch": 0.5448095017738701, "grad_norm": 0.0, "learning_rate": 9.036927113233049e-06, "loss": 0.8314, "step": 1766 }, { "epoch": 0.5451180009254974, "grad_norm": 0.0, "learning_rate": 9.026978536402943e-06, "loss": 0.7353, "step": 1767 }, { "epoch": 0.5454265000771248, "grad_norm": 0.0, "learning_rate": 9.01703093172303e-06, "loss": 0.7053, "step": 1768 }, { "epoch": 0.5457349992287521, "grad_norm": 0.0, "learning_rate": 9.007084309132013e-06, "loss": 0.7845, "step": 1769 }, { "epoch": 0.5460434983803795, "grad_norm": 0.0, "learning_rate": 8.997138678567602e-06, "loss": 0.778, "step": 1770 }, { "epoch": 0.5463519975320068, "grad_norm": 0.0, "learning_rate": 8.987194049966526e-06, "loss": 0.7123, "step": 1771 }, { "epoch": 0.5466604966836341, "grad_norm": 0.0, "learning_rate": 8.977250433264504e-06, "loss": 0.7844, "step": 1772 }, { "epoch": 0.5469689958352615, "grad_norm": 0.0, "learning_rate": 8.967307838396256e-06, "loss": 0.7202, "step": 1773 }, { "epoch": 0.5472774949868888, "grad_norm": 0.0, "learning_rate": 8.957366275295471e-06, "loss": 0.7484, "step": 1774 }, { "epoch": 0.5475859941385162, "grad_norm": 0.0, "learning_rate": 8.947425753894805e-06, "loss": 0.77, "step": 1775 }, { "epoch": 0.5478944932901435, "grad_norm": 0.0, "learning_rate": 8.937486284125883e-06, "loss": 0.8058, "step": 1776 }, { "epoch": 0.5482029924417707, "grad_norm": 0.0, "learning_rate": 8.927547875919275e-06, "loss": 0.8029, "step": 1777 }, { "epoch": 0.5485114915933981, "grad_norm": 0.0, "learning_rate": 8.91761053920449e-06, "loss": 0.7281, "step": 1778 }, { "epoch": 0.5488199907450254, "grad_norm": 0.0, "learning_rate": 8.90767428390997e-06, "loss": 1.0318, "step": 1779 }, { "epoch": 0.5491284898966527, "grad_norm": 0.0, "learning_rate": 8.897739119963065e-06, "loss": 0.8065, "step": 1780 }, { "epoch": 0.5494369890482801, "grad_norm": 0.0, "learning_rate": 8.887805057290048e-06, "loss": 0.7575, "step": 1781 }, { "epoch": 0.5497454881999074, "grad_norm": 0.0, "learning_rate": 8.877872105816082e-06, "loss": 0.7086, "step": 1782 }, { "epoch": 0.5500539873515348, "grad_norm": 0.0, "learning_rate": 8.867940275465233e-06, "loss": 0.7722, "step": 1783 }, { "epoch": 0.5503624865031621, "grad_norm": 0.0, "learning_rate": 8.85800957616043e-06, "loss": 0.7618, "step": 1784 }, { "epoch": 0.5506709856547894, "grad_norm": 0.0, "learning_rate": 8.848080017823482e-06, "loss": 0.7556, "step": 1785 }, { "epoch": 0.5509794848064168, "grad_norm": 0.0, "learning_rate": 8.83815161037506e-06, "loss": 0.8066, "step": 1786 }, { "epoch": 0.5512879839580441, "grad_norm": 0.0, "learning_rate": 8.828224363734679e-06, "loss": 0.791, "step": 1787 }, { "epoch": 0.5515964831096715, "grad_norm": 0.0, "learning_rate": 8.81829828782069e-06, "loss": 0.778, "step": 1788 }, { "epoch": 0.5519049822612988, "grad_norm": 0.0, "learning_rate": 8.80837339255029e-06, "loss": 0.676, "step": 1789 }, { "epoch": 0.5522134814129261, "grad_norm": 0.0, "learning_rate": 8.798449687839482e-06, "loss": 0.7408, "step": 1790 }, { "epoch": 0.5525219805645535, "grad_norm": 0.0, "learning_rate": 8.788527183603093e-06, "loss": 0.7563, "step": 1791 }, { "epoch": 0.5528304797161808, "grad_norm": 0.0, "learning_rate": 8.77860588975473e-06, "loss": 0.7402, "step": 1792 }, { "epoch": 0.5531389788678082, "grad_norm": 0.0, "learning_rate": 8.768685816206811e-06, "loss": 0.7686, "step": 1793 }, { "epoch": 0.5534474780194355, "grad_norm": 0.0, "learning_rate": 8.758766972870525e-06, "loss": 0.7298, "step": 1794 }, { "epoch": 0.5537559771710627, "grad_norm": 0.0, "learning_rate": 8.748849369655833e-06, "loss": 0.7227, "step": 1795 }, { "epoch": 0.5540644763226901, "grad_norm": 0.0, "learning_rate": 8.738933016471461e-06, "loss": 0.802, "step": 1796 }, { "epoch": 0.5543729754743174, "grad_norm": 0.0, "learning_rate": 8.729017923224878e-06, "loss": 0.7057, "step": 1797 }, { "epoch": 0.5546814746259447, "grad_norm": 0.0, "learning_rate": 8.7191040998223e-06, "loss": 0.708, "step": 1798 }, { "epoch": 0.5549899737775721, "grad_norm": 0.0, "learning_rate": 8.709191556168675e-06, "loss": 0.6959, "step": 1799 }, { "epoch": 0.5552984729291994, "grad_norm": 0.0, "learning_rate": 8.699280302167674e-06, "loss": 0.7177, "step": 1800 }, { "epoch": 0.5556069720808268, "grad_norm": 0.0, "learning_rate": 8.689370347721668e-06, "loss": 0.731, "step": 1801 }, { "epoch": 0.5559154712324541, "grad_norm": 0.0, "learning_rate": 8.679461702731746e-06, "loss": 0.8371, "step": 1802 }, { "epoch": 0.5562239703840814, "grad_norm": 0.0, "learning_rate": 8.669554377097674e-06, "loss": 0.8826, "step": 1803 }, { "epoch": 0.5565324695357088, "grad_norm": 0.0, "learning_rate": 8.659648380717914e-06, "loss": 0.6772, "step": 1804 }, { "epoch": 0.5568409686873361, "grad_norm": 0.0, "learning_rate": 8.649743723489582e-06, "loss": 0.6543, "step": 1805 }, { "epoch": 0.5571494678389635, "grad_norm": 0.0, "learning_rate": 8.639840415308475e-06, "loss": 0.7435, "step": 1806 }, { "epoch": 0.5574579669905908, "grad_norm": 0.0, "learning_rate": 8.629938466069028e-06, "loss": 0.6827, "step": 1807 }, { "epoch": 0.5577664661422181, "grad_norm": 0.0, "learning_rate": 8.62003788566433e-06, "loss": 0.7311, "step": 1808 }, { "epoch": 0.5580749652938455, "grad_norm": 0.0, "learning_rate": 8.610138683986088e-06, "loss": 0.762, "step": 1809 }, { "epoch": 0.5583834644454728, "grad_norm": 0.0, "learning_rate": 8.600240870924645e-06, "loss": 0.7338, "step": 1810 }, { "epoch": 0.5586919635971002, "grad_norm": 0.0, "learning_rate": 8.59034445636895e-06, "loss": 0.7335, "step": 1811 }, { "epoch": 0.5590004627487274, "grad_norm": 0.0, "learning_rate": 8.580449450206555e-06, "loss": 0.765, "step": 1812 }, { "epoch": 0.5593089619003547, "grad_norm": 0.0, "learning_rate": 8.570555862323612e-06, "loss": 0.7169, "step": 1813 }, { "epoch": 0.5596174610519821, "grad_norm": 0.0, "learning_rate": 8.560663702604844e-06, "loss": 0.7687, "step": 1814 }, { "epoch": 0.5599259602036094, "grad_norm": 0.0, "learning_rate": 8.55077298093355e-06, "loss": 0.6824, "step": 1815 }, { "epoch": 0.5602344593552367, "grad_norm": 0.0, "learning_rate": 8.540883707191602e-06, "loss": 0.7119, "step": 1816 }, { "epoch": 0.5605429585068641, "grad_norm": 0.0, "learning_rate": 8.53099589125942e-06, "loss": 0.7964, "step": 1817 }, { "epoch": 0.5608514576584914, "grad_norm": 0.0, "learning_rate": 8.521109543015958e-06, "loss": 0.7385, "step": 1818 }, { "epoch": 0.5611599568101188, "grad_norm": 0.0, "learning_rate": 8.511224672338715e-06, "loss": 0.7068, "step": 1819 }, { "epoch": 0.5614684559617461, "grad_norm": 0.0, "learning_rate": 8.501341289103712e-06, "loss": 1.0249, "step": 1820 }, { "epoch": 0.5617769551133734, "grad_norm": 0.0, "learning_rate": 8.491459403185485e-06, "loss": 0.7633, "step": 1821 }, { "epoch": 0.5620854542650008, "grad_norm": 0.0, "learning_rate": 8.481579024457066e-06, "loss": 0.6486, "step": 1822 }, { "epoch": 0.5623939534166281, "grad_norm": 0.0, "learning_rate": 8.471700162789989e-06, "loss": 0.7773, "step": 1823 }, { "epoch": 0.5627024525682555, "grad_norm": 0.0, "learning_rate": 8.461822828054269e-06, "loss": 0.7, "step": 1824 }, { "epoch": 0.5630109517198828, "grad_norm": 0.0, "learning_rate": 8.451947030118397e-06, "loss": 0.7306, "step": 1825 }, { "epoch": 0.5633194508715101, "grad_norm": 0.0, "learning_rate": 8.442072778849328e-06, "loss": 0.7438, "step": 1826 }, { "epoch": 0.5636279500231375, "grad_norm": 0.0, "learning_rate": 8.432200084112473e-06, "loss": 0.6776, "step": 1827 }, { "epoch": 0.5639364491747648, "grad_norm": 0.0, "learning_rate": 8.422328955771683e-06, "loss": 0.7962, "step": 1828 }, { "epoch": 0.564244948326392, "grad_norm": 0.0, "learning_rate": 8.412459403689249e-06, "loss": 0.7924, "step": 1829 }, { "epoch": 0.5645534474780194, "grad_norm": 0.0, "learning_rate": 8.40259143772589e-06, "loss": 0.7442, "step": 1830 }, { "epoch": 0.5648619466296467, "grad_norm": 0.0, "learning_rate": 8.39272506774073e-06, "loss": 0.6769, "step": 1831 }, { "epoch": 0.5651704457812741, "grad_norm": 0.0, "learning_rate": 8.382860303591306e-06, "loss": 0.7485, "step": 1832 }, { "epoch": 0.5654789449329014, "grad_norm": 0.0, "learning_rate": 8.372997155133548e-06, "loss": 0.7878, "step": 1833 }, { "epoch": 0.5657874440845287, "grad_norm": 0.0, "learning_rate": 8.363135632221777e-06, "loss": 0.8033, "step": 1834 }, { "epoch": 0.5660959432361561, "grad_norm": 0.0, "learning_rate": 8.35327574470868e-06, "loss": 0.6677, "step": 1835 }, { "epoch": 0.5664044423877834, "grad_norm": 0.0, "learning_rate": 8.34341750244532e-06, "loss": 0.7917, "step": 1836 }, { "epoch": 0.5667129415394108, "grad_norm": 0.0, "learning_rate": 8.333560915281109e-06, "loss": 0.7731, "step": 1837 }, { "epoch": 0.5670214406910381, "grad_norm": 0.0, "learning_rate": 8.323705993063813e-06, "loss": 0.7557, "step": 1838 }, { "epoch": 0.5673299398426654, "grad_norm": 0.0, "learning_rate": 8.313852745639523e-06, "loss": 0.761, "step": 1839 }, { "epoch": 0.5676384389942928, "grad_norm": 0.0, "learning_rate": 8.304001182852668e-06, "loss": 0.7892, "step": 1840 }, { "epoch": 0.5679469381459201, "grad_norm": 0.0, "learning_rate": 8.294151314545988e-06, "loss": 0.7484, "step": 1841 }, { "epoch": 0.5682554372975475, "grad_norm": 0.0, "learning_rate": 8.284303150560538e-06, "loss": 0.7573, "step": 1842 }, { "epoch": 0.5685639364491748, "grad_norm": 0.0, "learning_rate": 8.274456700735653e-06, "loss": 0.6972, "step": 1843 }, { "epoch": 0.5688724356008021, "grad_norm": 0.0, "learning_rate": 8.26461197490897e-06, "loss": 0.7, "step": 1844 }, { "epoch": 0.5691809347524295, "grad_norm": 0.0, "learning_rate": 8.2547689829164e-06, "loss": 0.7058, "step": 1845 }, { "epoch": 0.5694894339040568, "grad_norm": 0.0, "learning_rate": 8.24492773459212e-06, "loss": 0.7191, "step": 1846 }, { "epoch": 0.569797933055684, "grad_norm": 0.0, "learning_rate": 8.235088239768577e-06, "loss": 0.7849, "step": 1847 }, { "epoch": 0.5701064322073114, "grad_norm": 0.0, "learning_rate": 8.225250508276439e-06, "loss": 0.7633, "step": 1848 }, { "epoch": 0.5704149313589387, "grad_norm": 0.0, "learning_rate": 8.215414549944636e-06, "loss": 0.6931, "step": 1849 }, { "epoch": 0.5707234305105661, "grad_norm": 0.0, "learning_rate": 8.20558037460032e-06, "loss": 0.6946, "step": 1850 }, { "epoch": 0.5710319296621934, "grad_norm": 0.0, "learning_rate": 8.19574799206886e-06, "loss": 0.6747, "step": 1851 }, { "epoch": 0.5713404288138207, "grad_norm": 0.0, "learning_rate": 8.185917412173832e-06, "loss": 0.7037, "step": 1852 }, { "epoch": 0.5716489279654481, "grad_norm": 0.0, "learning_rate": 8.176088644737015e-06, "loss": 0.7733, "step": 1853 }, { "epoch": 0.5719574271170754, "grad_norm": 0.0, "learning_rate": 8.166261699578375e-06, "loss": 0.7072, "step": 1854 }, { "epoch": 0.5722659262687028, "grad_norm": 0.0, "learning_rate": 8.156436586516064e-06, "loss": 0.7545, "step": 1855 }, { "epoch": 0.5725744254203301, "grad_norm": 0.0, "learning_rate": 8.146613315366387e-06, "loss": 0.7408, "step": 1856 }, { "epoch": 0.5728829245719574, "grad_norm": 0.0, "learning_rate": 8.136791895943825e-06, "loss": 0.7409, "step": 1857 }, { "epoch": 0.5731914237235848, "grad_norm": 0.0, "learning_rate": 8.126972338060997e-06, "loss": 0.7324, "step": 1858 }, { "epoch": 0.5734999228752121, "grad_norm": 0.0, "learning_rate": 8.117154651528676e-06, "loss": 0.8158, "step": 1859 }, { "epoch": 0.5738084220268395, "grad_norm": 0.0, "learning_rate": 8.107338846155762e-06, "loss": 0.7496, "step": 1860 }, { "epoch": 0.5741169211784668, "grad_norm": 0.0, "learning_rate": 8.097524931749256e-06, "loss": 0.7526, "step": 1861 }, { "epoch": 0.5744254203300941, "grad_norm": 0.0, "learning_rate": 8.087712918114294e-06, "loss": 0.7048, "step": 1862 }, { "epoch": 0.5747339194817215, "grad_norm": 0.0, "learning_rate": 8.077902815054102e-06, "loss": 0.7347, "step": 1863 }, { "epoch": 0.5750424186333487, "grad_norm": 0.0, "learning_rate": 8.06809463237e-06, "loss": 1.0299, "step": 1864 }, { "epoch": 0.575350917784976, "grad_norm": 0.0, "learning_rate": 8.058288379861387e-06, "loss": 0.7647, "step": 1865 }, { "epoch": 0.5756594169366034, "grad_norm": 0.0, "learning_rate": 8.048484067325735e-06, "loss": 0.7311, "step": 1866 }, { "epoch": 0.5759679160882307, "grad_norm": 0.0, "learning_rate": 8.038681704558578e-06, "loss": 0.6969, "step": 1867 }, { "epoch": 0.5762764152398581, "grad_norm": 0.0, "learning_rate": 8.028881301353503e-06, "loss": 0.7247, "step": 1868 }, { "epoch": 0.5765849143914854, "grad_norm": 0.0, "learning_rate": 8.019082867502132e-06, "loss": 0.713, "step": 1869 }, { "epoch": 0.5768934135431127, "grad_norm": 0.0, "learning_rate": 8.009286412794126e-06, "loss": 0.7307, "step": 1870 }, { "epoch": 0.5772019126947401, "grad_norm": 0.0, "learning_rate": 7.999491947017174e-06, "loss": 0.6996, "step": 1871 }, { "epoch": 0.5775104118463674, "grad_norm": 0.0, "learning_rate": 7.989699479956972e-06, "loss": 0.7088, "step": 1872 }, { "epoch": 0.5778189109979948, "grad_norm": 0.0, "learning_rate": 7.97990902139721e-06, "loss": 0.7401, "step": 1873 }, { "epoch": 0.5781274101496221, "grad_norm": 0.0, "learning_rate": 7.970120581119584e-06, "loss": 0.7832, "step": 1874 }, { "epoch": 0.5784359093012494, "grad_norm": 0.0, "learning_rate": 7.960334168903769e-06, "loss": 0.6926, "step": 1875 }, { "epoch": 0.5787444084528768, "grad_norm": 0.0, "learning_rate": 7.950549794527418e-06, "loss": 0.7896, "step": 1876 }, { "epoch": 0.5790529076045041, "grad_norm": 0.0, "learning_rate": 7.940767467766142e-06, "loss": 0.7093, "step": 1877 }, { "epoch": 0.5793614067561315, "grad_norm": 0.0, "learning_rate": 7.930987198393506e-06, "loss": 0.7789, "step": 1878 }, { "epoch": 0.5796699059077588, "grad_norm": 0.0, "learning_rate": 7.921208996181022e-06, "loss": 0.7411, "step": 1879 }, { "epoch": 0.5799784050593861, "grad_norm": 0.0, "learning_rate": 7.911432870898139e-06, "loss": 0.7569, "step": 1880 }, { "epoch": 0.5802869042110134, "grad_norm": 0.0, "learning_rate": 7.901658832312234e-06, "loss": 0.6853, "step": 1881 }, { "epoch": 0.5805954033626407, "grad_norm": 0.0, "learning_rate": 7.891886890188578e-06, "loss": 0.8369, "step": 1882 }, { "epoch": 0.580903902514268, "grad_norm": 0.0, "learning_rate": 7.882117054290375e-06, "loss": 0.804, "step": 1883 }, { "epoch": 0.5812124016658954, "grad_norm": 0.0, "learning_rate": 7.872349334378712e-06, "loss": 0.7166, "step": 1884 }, { "epoch": 0.5815209008175227, "grad_norm": 0.0, "learning_rate": 7.862583740212564e-06, "loss": 0.8346, "step": 1885 }, { "epoch": 0.5818293999691501, "grad_norm": 0.0, "learning_rate": 7.852820281548773e-06, "loss": 0.6754, "step": 1886 }, { "epoch": 0.5821378991207774, "grad_norm": 0.0, "learning_rate": 7.84305896814206e-06, "loss": 0.727, "step": 1887 }, { "epoch": 0.5824463982724047, "grad_norm": 0.0, "learning_rate": 7.833299809744997e-06, "loss": 0.7691, "step": 1888 }, { "epoch": 0.5827548974240321, "grad_norm": 0.0, "learning_rate": 7.823542816108007e-06, "loss": 0.7847, "step": 1889 }, { "epoch": 0.5830633965756594, "grad_norm": 0.0, "learning_rate": 7.813787996979343e-06, "loss": 0.7657, "step": 1890 }, { "epoch": 0.5833718957272868, "grad_norm": 0.0, "learning_rate": 7.804035362105092e-06, "loss": 0.7125, "step": 1891 }, { "epoch": 0.5836803948789141, "grad_norm": 0.0, "learning_rate": 7.794284921229151e-06, "loss": 0.7207, "step": 1892 }, { "epoch": 0.5839888940305414, "grad_norm": 0.0, "learning_rate": 7.784536684093237e-06, "loss": 0.7243, "step": 1893 }, { "epoch": 0.5842973931821688, "grad_norm": 0.0, "learning_rate": 7.774790660436857e-06, "loss": 0.6716, "step": 1894 }, { "epoch": 0.5846058923337961, "grad_norm": 0.0, "learning_rate": 7.765046859997303e-06, "loss": 0.7218, "step": 1895 }, { "epoch": 0.5849143914854235, "grad_norm": 0.0, "learning_rate": 7.755305292509656e-06, "loss": 0.7652, "step": 1896 }, { "epoch": 0.5852228906370508, "grad_norm": 0.0, "learning_rate": 7.745565967706757e-06, "loss": 0.7331, "step": 1897 }, { "epoch": 0.5855313897886781, "grad_norm": 0.0, "learning_rate": 7.735828895319215e-06, "loss": 0.7257, "step": 1898 }, { "epoch": 0.5858398889403054, "grad_norm": 0.0, "learning_rate": 7.726094085075377e-06, "loss": 0.7037, "step": 1899 }, { "epoch": 0.5861483880919327, "grad_norm": 0.0, "learning_rate": 7.716361546701337e-06, "loss": 0.7186, "step": 1900 }, { "epoch": 0.58645688724356, "grad_norm": 0.0, "learning_rate": 7.706631289920923e-06, "loss": 0.7586, "step": 1901 }, { "epoch": 0.5867653863951874, "grad_norm": 0.0, "learning_rate": 7.696903324455678e-06, "loss": 0.7501, "step": 1902 }, { "epoch": 0.5870738855468147, "grad_norm": 0.0, "learning_rate": 7.687177660024854e-06, "loss": 0.7695, "step": 1903 }, { "epoch": 0.5873823846984421, "grad_norm": 0.0, "learning_rate": 7.677454306345408e-06, "loss": 0.7259, "step": 1904 }, { "epoch": 0.5876908838500694, "grad_norm": 0.0, "learning_rate": 7.667733273131989e-06, "loss": 0.7603, "step": 1905 }, { "epoch": 0.5879993830016967, "grad_norm": 0.0, "learning_rate": 7.658014570096926e-06, "loss": 0.6914, "step": 1906 }, { "epoch": 0.5883078821533241, "grad_norm": 0.0, "learning_rate": 7.648298206950216e-06, "loss": 0.7278, "step": 1907 }, { "epoch": 0.5886163813049514, "grad_norm": 0.0, "learning_rate": 7.638584193399524e-06, "loss": 0.7888, "step": 1908 }, { "epoch": 0.5889248804565788, "grad_norm": 0.0, "learning_rate": 7.628872539150165e-06, "loss": 0.7208, "step": 1909 }, { "epoch": 0.5892333796082061, "grad_norm": 0.0, "learning_rate": 7.619163253905097e-06, "loss": 0.6926, "step": 1910 }, { "epoch": 0.5895418787598334, "grad_norm": 0.0, "learning_rate": 7.609456347364919e-06, "loss": 0.7166, "step": 1911 }, { "epoch": 0.5898503779114608, "grad_norm": 0.0, "learning_rate": 7.599751829227832e-06, "loss": 0.7261, "step": 1912 }, { "epoch": 0.5901588770630881, "grad_norm": 0.0, "learning_rate": 7.590049709189671e-06, "loss": 0.6931, "step": 1913 }, { "epoch": 0.5904673762147155, "grad_norm": 0.0, "learning_rate": 7.580349996943868e-06, "loss": 0.6828, "step": 1914 }, { "epoch": 0.5907758753663428, "grad_norm": 0.0, "learning_rate": 7.570652702181454e-06, "loss": 0.727, "step": 1915 }, { "epoch": 0.59108437451797, "grad_norm": 0.0, "learning_rate": 7.560957834591034e-06, "loss": 0.7789, "step": 1916 }, { "epoch": 0.5913928736695974, "grad_norm": 0.0, "learning_rate": 7.551265403858797e-06, "loss": 0.7021, "step": 1917 }, { "epoch": 0.5917013728212247, "grad_norm": 0.0, "learning_rate": 7.541575419668497e-06, "loss": 0.6739, "step": 1918 }, { "epoch": 0.592009871972852, "grad_norm": 0.0, "learning_rate": 7.531887891701441e-06, "loss": 0.8115, "step": 1919 }, { "epoch": 0.5923183711244794, "grad_norm": 0.0, "learning_rate": 7.522202829636478e-06, "loss": 0.7618, "step": 1920 }, { "epoch": 0.5926268702761067, "grad_norm": 0.0, "learning_rate": 7.512520243150003e-06, "loss": 0.7356, "step": 1921 }, { "epoch": 0.5929353694277341, "grad_norm": 0.0, "learning_rate": 7.50284014191593e-06, "loss": 0.7187, "step": 1922 }, { "epoch": 0.5932438685793614, "grad_norm": 0.0, "learning_rate": 7.493162535605698e-06, "loss": 0.7121, "step": 1923 }, { "epoch": 0.5935523677309887, "grad_norm": 0.0, "learning_rate": 7.483487433888238e-06, "loss": 0.7162, "step": 1924 }, { "epoch": 0.5938608668826161, "grad_norm": 0.0, "learning_rate": 7.473814846429993e-06, "loss": 0.6884, "step": 1925 }, { "epoch": 0.5941693660342434, "grad_norm": 0.0, "learning_rate": 7.46414478289489e-06, "loss": 0.7447, "step": 1926 }, { "epoch": 0.5944778651858708, "grad_norm": 0.0, "learning_rate": 7.4544772529443295e-06, "loss": 0.7128, "step": 1927 }, { "epoch": 0.5947863643374981, "grad_norm": 0.0, "learning_rate": 7.444812266237198e-06, "loss": 0.665, "step": 1928 }, { "epoch": 0.5950948634891254, "grad_norm": 0.0, "learning_rate": 7.435149832429812e-06, "loss": 0.7802, "step": 1929 }, { "epoch": 0.5954033626407528, "grad_norm": 0.0, "learning_rate": 7.4254899611759616e-06, "loss": 1.0484, "step": 1930 }, { "epoch": 0.5957118617923801, "grad_norm": 0.0, "learning_rate": 7.415832662126865e-06, "loss": 0.7477, "step": 1931 }, { "epoch": 0.5960203609440075, "grad_norm": 0.0, "learning_rate": 7.406177944931179e-06, "loss": 0.6942, "step": 1932 }, { "epoch": 0.5963288600956347, "grad_norm": 0.0, "learning_rate": 7.396525819234969e-06, "loss": 0.7013, "step": 1933 }, { "epoch": 0.596637359247262, "grad_norm": 0.0, "learning_rate": 7.386876294681722e-06, "loss": 0.7359, "step": 1934 }, { "epoch": 0.5969458583988894, "grad_norm": 0.0, "learning_rate": 7.377229380912321e-06, "loss": 0.7121, "step": 1935 }, { "epoch": 0.5972543575505167, "grad_norm": 0.0, "learning_rate": 7.367585087565046e-06, "loss": 0.7173, "step": 1936 }, { "epoch": 0.597562856702144, "grad_norm": 0.0, "learning_rate": 7.357943424275547e-06, "loss": 0.7273, "step": 1937 }, { "epoch": 0.5978713558537714, "grad_norm": 0.0, "learning_rate": 7.348304400676856e-06, "loss": 0.7294, "step": 1938 }, { "epoch": 0.5981798550053987, "grad_norm": 0.0, "learning_rate": 7.338668026399365e-06, "loss": 0.7416, "step": 1939 }, { "epoch": 0.5984883541570261, "grad_norm": 0.0, "learning_rate": 7.329034311070828e-06, "loss": 0.8225, "step": 1940 }, { "epoch": 0.5987968533086534, "grad_norm": 0.0, "learning_rate": 7.319403264316325e-06, "loss": 0.7122, "step": 1941 }, { "epoch": 0.5991053524602807, "grad_norm": 0.0, "learning_rate": 7.30977489575828e-06, "loss": 0.6605, "step": 1942 }, { "epoch": 0.5994138516119081, "grad_norm": 0.0, "learning_rate": 7.300149215016442e-06, "loss": 0.7746, "step": 1943 }, { "epoch": 0.5997223507635354, "grad_norm": 0.0, "learning_rate": 7.290526231707873e-06, "loss": 0.6528, "step": 1944 }, { "epoch": 0.6000308499151628, "grad_norm": 0.0, "learning_rate": 7.28090595544694e-06, "loss": 0.7022, "step": 1945 }, { "epoch": 0.6003393490667901, "grad_norm": 0.0, "learning_rate": 7.271288395845302e-06, "loss": 0.6744, "step": 1946 }, { "epoch": 0.6006478482184174, "grad_norm": 0.0, "learning_rate": 7.2616735625119085e-06, "loss": 0.6828, "step": 1947 }, { "epoch": 0.6009563473700448, "grad_norm": 0.0, "learning_rate": 7.252061465052984e-06, "loss": 0.7555, "step": 1948 }, { "epoch": 0.6012648465216721, "grad_norm": 0.0, "learning_rate": 7.242452113072022e-06, "loss": 0.7715, "step": 1949 }, { "epoch": 0.6015733456732995, "grad_norm": 0.0, "learning_rate": 7.232845516169764e-06, "loss": 0.7449, "step": 1950 }, { "epoch": 0.6018818448249267, "grad_norm": 0.0, "learning_rate": 7.223241683944204e-06, "loss": 0.7889, "step": 1951 }, { "epoch": 0.602190343976554, "grad_norm": 0.0, "learning_rate": 7.213640625990582e-06, "loss": 0.6666, "step": 1952 }, { "epoch": 0.6024988431281814, "grad_norm": 0.0, "learning_rate": 7.204042351901359e-06, "loss": 1.0587, "step": 1953 }, { "epoch": 0.6028073422798087, "grad_norm": 0.0, "learning_rate": 7.194446871266206e-06, "loss": 0.6755, "step": 1954 }, { "epoch": 0.603115841431436, "grad_norm": 0.0, "learning_rate": 7.184854193672017e-06, "loss": 0.7586, "step": 1955 }, { "epoch": 0.6034243405830634, "grad_norm": 0.0, "learning_rate": 7.175264328702878e-06, "loss": 0.7476, "step": 1956 }, { "epoch": 0.6037328397346907, "grad_norm": 0.0, "learning_rate": 7.165677285940071e-06, "loss": 0.6872, "step": 1957 }, { "epoch": 0.6040413388863181, "grad_norm": 0.0, "learning_rate": 7.156093074962052e-06, "loss": 0.7498, "step": 1958 }, { "epoch": 0.6043498380379454, "grad_norm": 0.0, "learning_rate": 7.1465117053444465e-06, "loss": 0.7997, "step": 1959 }, { "epoch": 0.6046583371895727, "grad_norm": 0.0, "learning_rate": 7.136933186660049e-06, "loss": 0.6762, "step": 1960 }, { "epoch": 0.6049668363412001, "grad_norm": 0.0, "learning_rate": 7.1273575284788e-06, "loss": 0.8435, "step": 1961 }, { "epoch": 0.6052753354928274, "grad_norm": 0.0, "learning_rate": 7.117784740367788e-06, "loss": 0.8126, "step": 1962 }, { "epoch": 0.6055838346444548, "grad_norm": 0.0, "learning_rate": 7.108214831891219e-06, "loss": 0.6898, "step": 1963 }, { "epoch": 0.6058923337960821, "grad_norm": 0.0, "learning_rate": 7.09864781261044e-06, "loss": 0.7271, "step": 1964 }, { "epoch": 0.6062008329477094, "grad_norm": 0.0, "learning_rate": 7.089083692083902e-06, "loss": 0.7512, "step": 1965 }, { "epoch": 0.6065093320993368, "grad_norm": 0.0, "learning_rate": 7.0795224798671666e-06, "loss": 0.8055, "step": 1966 }, { "epoch": 0.6068178312509641, "grad_norm": 0.0, "learning_rate": 7.069964185512874e-06, "loss": 0.6799, "step": 1967 }, { "epoch": 0.6071263304025913, "grad_norm": 0.0, "learning_rate": 7.060408818570768e-06, "loss": 0.7722, "step": 1968 }, { "epoch": 0.6074348295542187, "grad_norm": 0.0, "learning_rate": 7.050856388587655e-06, "loss": 0.7811, "step": 1969 }, { "epoch": 0.607743328705846, "grad_norm": 0.0, "learning_rate": 7.0413069051074146e-06, "loss": 0.6801, "step": 1970 }, { "epoch": 0.6080518278574734, "grad_norm": 0.0, "learning_rate": 7.031760377670978e-06, "loss": 0.7812, "step": 1971 }, { "epoch": 0.6083603270091007, "grad_norm": 0.0, "learning_rate": 7.022216815816323e-06, "loss": 0.7256, "step": 1972 }, { "epoch": 0.608668826160728, "grad_norm": 0.0, "learning_rate": 7.012676229078469e-06, "loss": 0.7172, "step": 1973 }, { "epoch": 0.6089773253123554, "grad_norm": 0.0, "learning_rate": 7.003138626989457e-06, "loss": 1.0376, "step": 1974 }, { "epoch": 0.6092858244639827, "grad_norm": 0.0, "learning_rate": 6.993604019078354e-06, "loss": 0.6692, "step": 1975 }, { "epoch": 0.6095943236156101, "grad_norm": 0.0, "learning_rate": 6.984072414871223e-06, "loss": 0.7331, "step": 1976 }, { "epoch": 0.6099028227672374, "grad_norm": 0.0, "learning_rate": 6.974543823891138e-06, "loss": 0.7152, "step": 1977 }, { "epoch": 0.6102113219188647, "grad_norm": 0.0, "learning_rate": 6.965018255658156e-06, "loss": 0.7579, "step": 1978 }, { "epoch": 0.6105198210704921, "grad_norm": 0.0, "learning_rate": 6.955495719689321e-06, "loss": 0.6, "step": 1979 }, { "epoch": 0.6108283202221194, "grad_norm": 0.0, "learning_rate": 6.9459762254986344e-06, "loss": 0.8325, "step": 1980 }, { "epoch": 0.6111368193737468, "grad_norm": 0.0, "learning_rate": 6.936459782597069e-06, "loss": 0.7452, "step": 1981 }, { "epoch": 0.6114453185253741, "grad_norm": 0.0, "learning_rate": 6.9269464004925466e-06, "loss": 0.6906, "step": 1982 }, { "epoch": 0.6117538176770014, "grad_norm": 0.0, "learning_rate": 6.917436088689935e-06, "loss": 0.7581, "step": 1983 }, { "epoch": 0.6120623168286288, "grad_norm": 0.0, "learning_rate": 6.907928856691024e-06, "loss": 0.6741, "step": 1984 }, { "epoch": 0.612370815980256, "grad_norm": 0.0, "learning_rate": 6.898424713994536e-06, "loss": 0.6596, "step": 1985 }, { "epoch": 0.6126793151318833, "grad_norm": 0.0, "learning_rate": 6.888923670096102e-06, "loss": 0.748, "step": 1986 }, { "epoch": 0.6129878142835107, "grad_norm": 0.0, "learning_rate": 6.879425734488261e-06, "loss": 0.7023, "step": 1987 }, { "epoch": 0.613296313435138, "grad_norm": 0.0, "learning_rate": 6.86993091666044e-06, "loss": 0.7277, "step": 1988 }, { "epoch": 0.6136048125867654, "grad_norm": 0.0, "learning_rate": 6.860439226098956e-06, "loss": 0.8299, "step": 1989 }, { "epoch": 0.6139133117383927, "grad_norm": 0.0, "learning_rate": 6.850950672287003e-06, "loss": 0.8051, "step": 1990 }, { "epoch": 0.61422181089002, "grad_norm": 0.0, "learning_rate": 6.841465264704636e-06, "loss": 0.8194, "step": 1991 }, { "epoch": 0.6145303100416474, "grad_norm": 0.0, "learning_rate": 6.831983012828775e-06, "loss": 0.7889, "step": 1992 }, { "epoch": 0.6148388091932747, "grad_norm": 0.0, "learning_rate": 6.82250392613317e-06, "loss": 0.7253, "step": 1993 }, { "epoch": 0.615147308344902, "grad_norm": 0.0, "learning_rate": 6.8130280140884286e-06, "loss": 0.7615, "step": 1994 }, { "epoch": 0.6154558074965294, "grad_norm": 0.0, "learning_rate": 6.803555286161973e-06, "loss": 0.6758, "step": 1995 }, { "epoch": 0.6157643066481567, "grad_norm": 0.0, "learning_rate": 6.7940857518180555e-06, "loss": 0.8349, "step": 1996 }, { "epoch": 0.6160728057997841, "grad_norm": 0.0, "learning_rate": 6.784619420517724e-06, "loss": 0.7385, "step": 1997 }, { "epoch": 0.6163813049514114, "grad_norm": 0.0, "learning_rate": 6.775156301718837e-06, "loss": 0.6718, "step": 1998 }, { "epoch": 0.6166898041030388, "grad_norm": 0.0, "learning_rate": 6.765696404876039e-06, "loss": 0.7217, "step": 1999 }, { "epoch": 0.6169983032546661, "grad_norm": 0.0, "learning_rate": 6.756239739440758e-06, "loss": 0.8079, "step": 2000 }, { "epoch": 0.6173068024062934, "grad_norm": 0.0, "learning_rate": 6.746786314861189e-06, "loss": 0.703, "step": 2001 }, { "epoch": 0.6176153015579208, "grad_norm": 0.0, "learning_rate": 6.737336140582291e-06, "loss": 0.7275, "step": 2002 }, { "epoch": 0.617923800709548, "grad_norm": 0.0, "learning_rate": 6.72788922604578e-06, "loss": 0.7693, "step": 2003 }, { "epoch": 0.6182322998611753, "grad_norm": 0.0, "learning_rate": 6.718445580690113e-06, "loss": 0.7452, "step": 2004 }, { "epoch": 0.6185407990128027, "grad_norm": 0.0, "learning_rate": 6.709005213950472e-06, "loss": 0.7144, "step": 2005 }, { "epoch": 0.61884929816443, "grad_norm": 0.0, "learning_rate": 6.699568135258774e-06, "loss": 0.6836, "step": 2006 }, { "epoch": 0.6191577973160574, "grad_norm": 0.0, "learning_rate": 6.690134354043649e-06, "loss": 0.7265, "step": 2007 }, { "epoch": 0.6194662964676847, "grad_norm": 0.0, "learning_rate": 6.68070387973043e-06, "loss": 0.6829, "step": 2008 }, { "epoch": 0.619774795619312, "grad_norm": 0.0, "learning_rate": 6.671276721741149e-06, "loss": 0.7093, "step": 2009 }, { "epoch": 0.6200832947709394, "grad_norm": 0.0, "learning_rate": 6.6618528894945175e-06, "loss": 0.7282, "step": 2010 }, { "epoch": 0.6203917939225667, "grad_norm": 0.0, "learning_rate": 6.652432392405934e-06, "loss": 0.7671, "step": 2011 }, { "epoch": 0.620700293074194, "grad_norm": 0.0, "learning_rate": 6.643015239887458e-06, "loss": 0.7393, "step": 2012 }, { "epoch": 0.6210087922258214, "grad_norm": 0.0, "learning_rate": 6.633601441347812e-06, "loss": 0.6363, "step": 2013 }, { "epoch": 0.6213172913774487, "grad_norm": 0.0, "learning_rate": 6.624191006192363e-06, "loss": 0.7655, "step": 2014 }, { "epoch": 0.6216257905290761, "grad_norm": 0.0, "learning_rate": 6.61478394382312e-06, "loss": 0.6773, "step": 2015 }, { "epoch": 0.6219342896807034, "grad_norm": 0.0, "learning_rate": 6.605380263638722e-06, "loss": 0.6704, "step": 2016 }, { "epoch": 0.6222427888323308, "grad_norm": 0.0, "learning_rate": 6.595979975034434e-06, "loss": 0.7343, "step": 2017 }, { "epoch": 0.6225512879839581, "grad_norm": 0.0, "learning_rate": 6.586583087402119e-06, "loss": 0.7653, "step": 2018 }, { "epoch": 0.6228597871355854, "grad_norm": 0.0, "learning_rate": 6.577189610130254e-06, "loss": 0.7775, "step": 2019 }, { "epoch": 0.6231682862872127, "grad_norm": 0.0, "learning_rate": 6.567799552603904e-06, "loss": 0.7751, "step": 2020 }, { "epoch": 0.62347678543884, "grad_norm": 0.0, "learning_rate": 6.558412924204722e-06, "loss": 0.676, "step": 2021 }, { "epoch": 0.6237852845904673, "grad_norm": 0.0, "learning_rate": 6.549029734310928e-06, "loss": 0.6836, "step": 2022 }, { "epoch": 0.6240937837420947, "grad_norm": 0.0, "learning_rate": 6.539649992297311e-06, "loss": 0.7058, "step": 2023 }, { "epoch": 0.624402282893722, "grad_norm": 0.0, "learning_rate": 6.530273707535214e-06, "loss": 0.724, "step": 2024 }, { "epoch": 0.6247107820453494, "grad_norm": 0.0, "learning_rate": 6.520900889392525e-06, "loss": 0.7247, "step": 2025 }, { "epoch": 0.6250192811969767, "grad_norm": 0.0, "learning_rate": 6.511531547233674e-06, "loss": 0.761, "step": 2026 }, { "epoch": 0.625327780348604, "grad_norm": 0.0, "learning_rate": 6.502165690419608e-06, "loss": 0.801, "step": 2027 }, { "epoch": 0.6256362795002314, "grad_norm": 0.0, "learning_rate": 6.492803328307799e-06, "loss": 0.726, "step": 2028 }, { "epoch": 0.6259447786518587, "grad_norm": 0.0, "learning_rate": 6.483444470252227e-06, "loss": 0.7648, "step": 2029 }, { "epoch": 0.626253277803486, "grad_norm": 0.0, "learning_rate": 6.4740891256033736e-06, "loss": 0.7044, "step": 2030 }, { "epoch": 0.6265617769551134, "grad_norm": 0.0, "learning_rate": 6.464737303708197e-06, "loss": 0.7878, "step": 2031 }, { "epoch": 0.6268702761067407, "grad_norm": 0.0, "learning_rate": 6.455389013910151e-06, "loss": 0.6916, "step": 2032 }, { "epoch": 0.6271787752583681, "grad_norm": 0.0, "learning_rate": 6.4460442655491515e-06, "loss": 0.6691, "step": 2033 }, { "epoch": 0.6274872744099954, "grad_norm": 0.0, "learning_rate": 6.436703067961589e-06, "loss": 0.6556, "step": 2034 }, { "epoch": 0.6277957735616227, "grad_norm": 0.0, "learning_rate": 6.4273654304802844e-06, "loss": 0.7585, "step": 2035 }, { "epoch": 0.6281042727132501, "grad_norm": 0.0, "learning_rate": 6.4180313624345205e-06, "loss": 0.7429, "step": 2036 }, { "epoch": 0.6284127718648773, "grad_norm": 0.0, "learning_rate": 6.408700873150005e-06, "loss": 0.6842, "step": 2037 }, { "epoch": 0.6287212710165047, "grad_norm": 0.0, "learning_rate": 6.399373971948877e-06, "loss": 0.7534, "step": 2038 }, { "epoch": 0.629029770168132, "grad_norm": 0.0, "learning_rate": 6.3900506681496786e-06, "loss": 0.7038, "step": 2039 }, { "epoch": 0.6293382693197593, "grad_norm": 0.0, "learning_rate": 6.38073097106737e-06, "loss": 0.6684, "step": 2040 }, { "epoch": 0.6296467684713867, "grad_norm": 0.0, "learning_rate": 6.371414890013304e-06, "loss": 0.6384, "step": 2041 }, { "epoch": 0.629955267623014, "grad_norm": 0.0, "learning_rate": 6.362102434295216e-06, "loss": 0.816, "step": 2042 }, { "epoch": 0.6302637667746414, "grad_norm": 0.0, "learning_rate": 6.352793613217232e-06, "loss": 0.6687, "step": 2043 }, { "epoch": 0.6305722659262687, "grad_norm": 0.0, "learning_rate": 6.3434884360798255e-06, "loss": 0.7027, "step": 2044 }, { "epoch": 0.630880765077896, "grad_norm": 0.0, "learning_rate": 6.334186912179845e-06, "loss": 0.7615, "step": 2045 }, { "epoch": 0.6311892642295234, "grad_norm": 0.0, "learning_rate": 6.3248890508104895e-06, "loss": 0.6306, "step": 2046 }, { "epoch": 0.6314977633811507, "grad_norm": 0.0, "learning_rate": 6.315594861261299e-06, "loss": 0.8275, "step": 2047 }, { "epoch": 0.631806262532778, "grad_norm": 0.0, "learning_rate": 6.3063043528181286e-06, "loss": 0.7133, "step": 2048 }, { "epoch": 0.6321147616844054, "grad_norm": 0.0, "learning_rate": 6.297017534763175e-06, "loss": 0.7637, "step": 2049 }, { "epoch": 0.6324232608360327, "grad_norm": 0.0, "learning_rate": 6.28773441637494e-06, "loss": 1.0219, "step": 2050 }, { "epoch": 0.6327317599876601, "grad_norm": 0.0, "learning_rate": 6.278455006928233e-06, "loss": 0.6952, "step": 2051 }, { "epoch": 0.6330402591392874, "grad_norm": 0.0, "learning_rate": 6.269179315694145e-06, "loss": 0.6483, "step": 2052 }, { "epoch": 0.6333487582909147, "grad_norm": 0.0, "learning_rate": 6.259907351940069e-06, "loss": 0.614, "step": 2053 }, { "epoch": 0.6336572574425421, "grad_norm": 0.0, "learning_rate": 6.250639124929665e-06, "loss": 0.7949, "step": 2054 }, { "epoch": 0.6339657565941693, "grad_norm": 0.0, "learning_rate": 6.241374643922864e-06, "loss": 0.6852, "step": 2055 }, { "epoch": 0.6342742557457967, "grad_norm": 0.0, "learning_rate": 6.232113918175845e-06, "loss": 0.7307, "step": 2056 }, { "epoch": 0.634582754897424, "grad_norm": 0.0, "learning_rate": 6.222856956941041e-06, "loss": 0.7534, "step": 2057 }, { "epoch": 0.6348912540490513, "grad_norm": 0.0, "learning_rate": 6.213603769467132e-06, "loss": 0.6979, "step": 2058 }, { "epoch": 0.6351997532006787, "grad_norm": 0.0, "learning_rate": 6.204354364999014e-06, "loss": 0.7602, "step": 2059 }, { "epoch": 0.635508252352306, "grad_norm": 0.0, "learning_rate": 6.195108752777814e-06, "loss": 0.7576, "step": 2060 }, { "epoch": 0.6358167515039334, "grad_norm": 0.0, "learning_rate": 6.185866942040861e-06, "loss": 0.7792, "step": 2061 }, { "epoch": 0.6361252506555607, "grad_norm": 0.0, "learning_rate": 6.17662894202169e-06, "loss": 0.7114, "step": 2062 }, { "epoch": 0.636433749807188, "grad_norm": 0.0, "learning_rate": 6.167394761950032e-06, "loss": 0.8525, "step": 2063 }, { "epoch": 0.6367422489588154, "grad_norm": 0.0, "learning_rate": 6.158164411051799e-06, "loss": 0.6502, "step": 2064 }, { "epoch": 0.6370507481104427, "grad_norm": 0.0, "learning_rate": 6.148937898549072e-06, "loss": 0.7319, "step": 2065 }, { "epoch": 0.63735924726207, "grad_norm": 0.0, "learning_rate": 6.139715233660106e-06, "loss": 0.7326, "step": 2066 }, { "epoch": 0.6376677464136974, "grad_norm": 0.0, "learning_rate": 6.130496425599308e-06, "loss": 0.7331, "step": 2067 }, { "epoch": 0.6379762455653247, "grad_norm": 0.0, "learning_rate": 6.121281483577233e-06, "loss": 0.7268, "step": 2068 }, { "epoch": 0.6382847447169521, "grad_norm": 0.0, "learning_rate": 6.112070416800562e-06, "loss": 0.7058, "step": 2069 }, { "epoch": 0.6385932438685794, "grad_norm": 0.0, "learning_rate": 6.102863234472124e-06, "loss": 0.6867, "step": 2070 }, { "epoch": 0.6389017430202067, "grad_norm": 0.0, "learning_rate": 6.093659945790853e-06, "loss": 0.7267, "step": 2071 }, { "epoch": 0.639210242171834, "grad_norm": 0.0, "learning_rate": 6.084460559951802e-06, "loss": 0.6444, "step": 2072 }, { "epoch": 0.6395187413234613, "grad_norm": 0.0, "learning_rate": 6.075265086146111e-06, "loss": 1.0321, "step": 2073 }, { "epoch": 0.6398272404750887, "grad_norm": 0.0, "learning_rate": 6.066073533561024e-06, "loss": 0.6793, "step": 2074 }, { "epoch": 0.640135739626716, "grad_norm": 0.0, "learning_rate": 6.056885911379863e-06, "loss": 0.6712, "step": 2075 }, { "epoch": 0.6404442387783433, "grad_norm": 0.0, "learning_rate": 6.047702228782023e-06, "loss": 0.7343, "step": 2076 }, { "epoch": 0.6407527379299707, "grad_norm": 0.0, "learning_rate": 6.0385224949429666e-06, "loss": 0.6725, "step": 2077 }, { "epoch": 0.641061237081598, "grad_norm": 0.0, "learning_rate": 6.029346719034203e-06, "loss": 0.6364, "step": 2078 }, { "epoch": 0.6413697362332254, "grad_norm": 0.0, "learning_rate": 6.020174910223293e-06, "loss": 0.7213, "step": 2079 }, { "epoch": 0.6416782353848527, "grad_norm": 0.0, "learning_rate": 6.011007077673835e-06, "loss": 0.6837, "step": 2080 }, { "epoch": 0.64198673453648, "grad_norm": 0.0, "learning_rate": 6.001843230545452e-06, "loss": 0.669, "step": 2081 }, { "epoch": 0.6422952336881074, "grad_norm": 0.0, "learning_rate": 5.992683377993784e-06, "loss": 0.6777, "step": 2082 }, { "epoch": 0.6426037328397347, "grad_norm": 0.0, "learning_rate": 5.983527529170481e-06, "loss": 0.7283, "step": 2083 }, { "epoch": 0.642912231991362, "grad_norm": 0.0, "learning_rate": 5.9743756932231955e-06, "loss": 0.7848, "step": 2084 }, { "epoch": 0.6432207311429894, "grad_norm": 0.0, "learning_rate": 5.965227879295572e-06, "loss": 0.7034, "step": 2085 }, { "epoch": 0.6435292302946167, "grad_norm": 0.0, "learning_rate": 5.956084096527224e-06, "loss": 0.7339, "step": 2086 }, { "epoch": 0.6438377294462441, "grad_norm": 0.0, "learning_rate": 5.946944354053753e-06, "loss": 0.6728, "step": 2087 }, { "epoch": 0.6441462285978714, "grad_norm": 0.0, "learning_rate": 5.937808661006715e-06, "loss": 0.7308, "step": 2088 }, { "epoch": 0.6444547277494986, "grad_norm": 0.0, "learning_rate": 5.928677026513627e-06, "loss": 0.6657, "step": 2089 }, { "epoch": 0.644763226901126, "grad_norm": 0.0, "learning_rate": 5.919549459697942e-06, "loss": 0.7471, "step": 2090 }, { "epoch": 0.6450717260527533, "grad_norm": 0.0, "learning_rate": 5.910425969679056e-06, "loss": 0.7049, "step": 2091 }, { "epoch": 0.6453802252043807, "grad_norm": 0.0, "learning_rate": 5.901306565572288e-06, "loss": 0.6928, "step": 2092 }, { "epoch": 0.645688724356008, "grad_norm": 0.0, "learning_rate": 5.8921912564888775e-06, "loss": 0.758, "step": 2093 }, { "epoch": 0.6459972235076353, "grad_norm": 0.0, "learning_rate": 5.883080051535974e-06, "loss": 0.6874, "step": 2094 }, { "epoch": 0.6463057226592627, "grad_norm": 0.0, "learning_rate": 5.873972959816619e-06, "loss": 0.7153, "step": 2095 }, { "epoch": 0.64661422181089, "grad_norm": 0.0, "learning_rate": 5.864869990429753e-06, "loss": 0.6867, "step": 2096 }, { "epoch": 0.6469227209625174, "grad_norm": 0.0, "learning_rate": 5.855771152470193e-06, "loss": 0.6989, "step": 2097 }, { "epoch": 0.6472312201141447, "grad_norm": 0.0, "learning_rate": 5.846676455028635e-06, "loss": 0.7185, "step": 2098 }, { "epoch": 0.647539719265772, "grad_norm": 0.0, "learning_rate": 5.83758590719162e-06, "loss": 0.7486, "step": 2099 }, { "epoch": 0.6478482184173994, "grad_norm": 0.0, "learning_rate": 5.8284995180415685e-06, "loss": 0.6952, "step": 2100 }, { "epoch": 0.6481567175690267, "grad_norm": 0.0, "learning_rate": 5.819417296656724e-06, "loss": 0.7227, "step": 2101 }, { "epoch": 0.648465216720654, "grad_norm": 0.0, "learning_rate": 5.81033925211119e-06, "loss": 0.7285, "step": 2102 }, { "epoch": 0.6487737158722814, "grad_norm": 0.0, "learning_rate": 5.8012653934748644e-06, "loss": 0.7128, "step": 2103 }, { "epoch": 0.6490822150239087, "grad_norm": 0.0, "learning_rate": 5.7921957298134865e-06, "loss": 0.6555, "step": 2104 }, { "epoch": 0.6493907141755361, "grad_norm": 0.0, "learning_rate": 5.783130270188607e-06, "loss": 0.7154, "step": 2105 }, { "epoch": 0.6496992133271634, "grad_norm": 0.0, "learning_rate": 5.774069023657558e-06, "loss": 0.7147, "step": 2106 }, { "epoch": 0.6500077124787906, "grad_norm": 0.0, "learning_rate": 5.765011999273484e-06, "loss": 0.7412, "step": 2107 }, { "epoch": 0.650316211630418, "grad_norm": 0.0, "learning_rate": 5.755959206085285e-06, "loss": 0.7176, "step": 2108 }, { "epoch": 0.6506247107820453, "grad_norm": 0.0, "learning_rate": 5.746910653137659e-06, "loss": 0.6684, "step": 2109 }, { "epoch": 0.6509332099336727, "grad_norm": 0.0, "learning_rate": 5.73786634947105e-06, "loss": 0.717, "step": 2110 }, { "epoch": 0.6512417090853, "grad_norm": 0.0, "learning_rate": 5.7288263041216685e-06, "loss": 0.7727, "step": 2111 }, { "epoch": 0.6515502082369273, "grad_norm": 0.0, "learning_rate": 5.719790526121462e-06, "loss": 0.7078, "step": 2112 }, { "epoch": 0.6518587073885547, "grad_norm": 0.0, "learning_rate": 5.7107590244981156e-06, "loss": 0.7385, "step": 2113 }, { "epoch": 0.652167206540182, "grad_norm": 0.0, "learning_rate": 5.70173180827505e-06, "loss": 0.7426, "step": 2114 }, { "epoch": 0.6524757056918093, "grad_norm": 0.0, "learning_rate": 5.692708886471395e-06, "loss": 0.6916, "step": 2115 }, { "epoch": 0.6527842048434367, "grad_norm": 0.0, "learning_rate": 5.683690268101989e-06, "loss": 0.6724, "step": 2116 }, { "epoch": 0.653092703995064, "grad_norm": 0.0, "learning_rate": 5.674675962177383e-06, "loss": 0.6995, "step": 2117 }, { "epoch": 0.6534012031466914, "grad_norm": 0.0, "learning_rate": 5.665665977703803e-06, "loss": 0.6714, "step": 2118 }, { "epoch": 0.6537097022983187, "grad_norm": 0.0, "learning_rate": 5.656660323683177e-06, "loss": 0.6629, "step": 2119 }, { "epoch": 0.654018201449946, "grad_norm": 0.0, "learning_rate": 5.647659009113079e-06, "loss": 0.6691, "step": 2120 }, { "epoch": 0.6543267006015734, "grad_norm": 0.0, "learning_rate": 5.638662042986777e-06, "loss": 0.6801, "step": 2121 }, { "epoch": 0.6546351997532007, "grad_norm": 0.0, "learning_rate": 5.629669434293172e-06, "loss": 0.746, "step": 2122 }, { "epoch": 0.6549436989048281, "grad_norm": 0.0, "learning_rate": 5.62068119201683e-06, "loss": 0.7055, "step": 2123 }, { "epoch": 0.6552521980564553, "grad_norm": 0.0, "learning_rate": 5.611697325137939e-06, "loss": 0.7346, "step": 2124 }, { "epoch": 0.6555606972080826, "grad_norm": 0.0, "learning_rate": 5.602717842632319e-06, "loss": 0.6536, "step": 2125 }, { "epoch": 0.65586919635971, "grad_norm": 0.0, "learning_rate": 5.5937427534714195e-06, "loss": 0.6763, "step": 2126 }, { "epoch": 0.6561776955113373, "grad_norm": 0.0, "learning_rate": 5.584772066622284e-06, "loss": 0.7353, "step": 2127 }, { "epoch": 0.6564861946629647, "grad_norm": 0.0, "learning_rate": 5.575805791047577e-06, "loss": 0.7712, "step": 2128 }, { "epoch": 0.656794693814592, "grad_norm": 0.0, "learning_rate": 5.566843935705539e-06, "loss": 0.754, "step": 2129 }, { "epoch": 0.6571031929662193, "grad_norm": 0.0, "learning_rate": 5.557886509549998e-06, "loss": 0.7469, "step": 2130 }, { "epoch": 0.6574116921178467, "grad_norm": 0.0, "learning_rate": 5.5489335215303674e-06, "loss": 0.6798, "step": 2131 }, { "epoch": 0.657720191269474, "grad_norm": 0.0, "learning_rate": 5.539984980591615e-06, "loss": 0.6893, "step": 2132 }, { "epoch": 0.6580286904211013, "grad_norm": 0.0, "learning_rate": 5.531040895674267e-06, "loss": 0.7365, "step": 2133 }, { "epoch": 0.6583371895727287, "grad_norm": 0.0, "learning_rate": 5.5221012757143974e-06, "loss": 0.6681, "step": 2134 }, { "epoch": 0.658645688724356, "grad_norm": 0.0, "learning_rate": 5.51316612964363e-06, "loss": 0.7298, "step": 2135 }, { "epoch": 0.6589541878759834, "grad_norm": 0.0, "learning_rate": 5.504235466389103e-06, "loss": 0.6818, "step": 2136 }, { "epoch": 0.6592626870276107, "grad_norm": 0.0, "learning_rate": 5.495309294873483e-06, "loss": 0.7411, "step": 2137 }, { "epoch": 0.659571186179238, "grad_norm": 0.0, "learning_rate": 5.486387624014952e-06, "loss": 0.734, "step": 2138 }, { "epoch": 0.6598796853308654, "grad_norm": 0.0, "learning_rate": 5.47747046272719e-06, "loss": 0.7049, "step": 2139 }, { "epoch": 0.6601881844824927, "grad_norm": 0.0, "learning_rate": 5.468557819919378e-06, "loss": 0.7069, "step": 2140 }, { "epoch": 0.66049668363412, "grad_norm": 0.0, "learning_rate": 5.4596497044961725e-06, "loss": 0.7255, "step": 2141 }, { "epoch": 0.6608051827857473, "grad_norm": 0.0, "learning_rate": 5.450746125357712e-06, "loss": 0.7469, "step": 2142 }, { "epoch": 0.6611136819373746, "grad_norm": 0.0, "learning_rate": 5.44184709139961e-06, "loss": 0.7603, "step": 2143 }, { "epoch": 0.661422181089002, "grad_norm": 0.0, "learning_rate": 5.432952611512923e-06, "loss": 0.8311, "step": 2144 }, { "epoch": 0.6617306802406293, "grad_norm": 0.0, "learning_rate": 5.424062694584179e-06, "loss": 0.6956, "step": 2145 }, { "epoch": 0.6620391793922566, "grad_norm": 0.0, "learning_rate": 5.41517734949532e-06, "loss": 0.7034, "step": 2146 }, { "epoch": 0.662347678543884, "grad_norm": 0.0, "learning_rate": 5.406296585123745e-06, "loss": 0.7404, "step": 2147 }, { "epoch": 0.6626561776955113, "grad_norm": 0.0, "learning_rate": 5.397420410342259e-06, "loss": 0.7452, "step": 2148 }, { "epoch": 0.6629646768471387, "grad_norm": 0.0, "learning_rate": 5.388548834019097e-06, "loss": 0.629, "step": 2149 }, { "epoch": 0.663273175998766, "grad_norm": 0.0, "learning_rate": 5.379681865017887e-06, "loss": 0.7724, "step": 2150 }, { "epoch": 0.6635816751503933, "grad_norm": 0.0, "learning_rate": 5.370819512197656e-06, "loss": 0.7365, "step": 2151 }, { "epoch": 0.6638901743020207, "grad_norm": 0.0, "learning_rate": 5.361961784412828e-06, "loss": 0.6681, "step": 2152 }, { "epoch": 0.664198673453648, "grad_norm": 0.0, "learning_rate": 5.353108690513193e-06, "loss": 0.7178, "step": 2153 }, { "epoch": 0.6645071726052754, "grad_norm": 0.0, "learning_rate": 5.344260239343919e-06, "loss": 0.7258, "step": 2154 }, { "epoch": 0.6648156717569027, "grad_norm": 0.0, "learning_rate": 5.335416439745538e-06, "loss": 0.7827, "step": 2155 }, { "epoch": 0.66512417090853, "grad_norm": 0.0, "learning_rate": 5.326577300553923e-06, "loss": 0.757, "step": 2156 }, { "epoch": 0.6654326700601574, "grad_norm": 0.0, "learning_rate": 5.317742830600306e-06, "loss": 0.824, "step": 2157 }, { "epoch": 0.6657411692117847, "grad_norm": 0.0, "learning_rate": 5.308913038711245e-06, "loss": 0.7818, "step": 2158 }, { "epoch": 0.666049668363412, "grad_norm": 0.0, "learning_rate": 5.300087933708624e-06, "loss": 0.7217, "step": 2159 }, { "epoch": 0.6663581675150393, "grad_norm": 0.0, "learning_rate": 5.29126752440964e-06, "loss": 0.7242, "step": 2160 }, { "epoch": 0.6666666666666666, "grad_norm": 0.0, "learning_rate": 5.282451819626815e-06, "loss": 0.7298, "step": 2161 }, { "epoch": 0.666975165818294, "grad_norm": 0.0, "learning_rate": 5.273640828167954e-06, "loss": 0.7062, "step": 2162 }, { "epoch": 0.6672836649699213, "grad_norm": 0.0, "learning_rate": 5.264834558836156e-06, "loss": 0.7027, "step": 2163 }, { "epoch": 0.6675921641215486, "grad_norm": 0.0, "learning_rate": 5.256033020429813e-06, "loss": 0.7512, "step": 2164 }, { "epoch": 0.667900663273176, "grad_norm": 0.0, "learning_rate": 5.247236221742575e-06, "loss": 0.7286, "step": 2165 }, { "epoch": 0.6682091624248033, "grad_norm": 0.0, "learning_rate": 5.238444171563368e-06, "loss": 0.7256, "step": 2166 }, { "epoch": 0.6685176615764307, "grad_norm": 0.0, "learning_rate": 5.22965687867637e-06, "loss": 0.744, "step": 2167 }, { "epoch": 0.668826160728058, "grad_norm": 0.0, "learning_rate": 5.220874351861001e-06, "loss": 0.7358, "step": 2168 }, { "epoch": 0.6691346598796853, "grad_norm": 0.0, "learning_rate": 5.212096599891927e-06, "loss": 0.7285, "step": 2169 }, { "epoch": 0.6694431590313127, "grad_norm": 0.0, "learning_rate": 5.203323631539042e-06, "loss": 0.6504, "step": 2170 }, { "epoch": 0.66975165818294, "grad_norm": 0.0, "learning_rate": 5.194555455567456e-06, "loss": 0.8199, "step": 2171 }, { "epoch": 0.6700601573345674, "grad_norm": 0.0, "learning_rate": 5.185792080737491e-06, "loss": 0.7196, "step": 2172 }, { "epoch": 0.6703686564861947, "grad_norm": 0.0, "learning_rate": 5.177033515804682e-06, "loss": 0.751, "step": 2173 }, { "epoch": 0.670677155637822, "grad_norm": 0.0, "learning_rate": 5.168279769519742e-06, "loss": 0.771, "step": 2174 }, { "epoch": 0.6709856547894494, "grad_norm": 0.0, "learning_rate": 5.159530850628589e-06, "loss": 0.7746, "step": 2175 }, { "epoch": 0.6712941539410766, "grad_norm": 0.0, "learning_rate": 5.150786767872302e-06, "loss": 0.7745, "step": 2176 }, { "epoch": 0.671602653092704, "grad_norm": 0.0, "learning_rate": 5.142047529987133e-06, "loss": 0.6888, "step": 2177 }, { "epoch": 0.6719111522443313, "grad_norm": 0.0, "learning_rate": 5.1333131457044995e-06, "loss": 0.7485, "step": 2178 }, { "epoch": 0.6722196513959586, "grad_norm": 0.0, "learning_rate": 5.124583623750963e-06, "loss": 0.7505, "step": 2179 }, { "epoch": 0.672528150547586, "grad_norm": 0.0, "learning_rate": 5.115858972848224e-06, "loss": 0.6436, "step": 2180 }, { "epoch": 0.6728366496992133, "grad_norm": 0.0, "learning_rate": 5.107139201713128e-06, "loss": 0.7271, "step": 2181 }, { "epoch": 0.6731451488508406, "grad_norm": 0.0, "learning_rate": 5.098424319057632e-06, "loss": 0.7306, "step": 2182 }, { "epoch": 0.673453648002468, "grad_norm": 0.0, "learning_rate": 5.089714333588827e-06, "loss": 1.0136, "step": 2183 }, { "epoch": 0.6737621471540953, "grad_norm": 0.0, "learning_rate": 5.081009254008882e-06, "loss": 0.7328, "step": 2184 }, { "epoch": 0.6740706463057227, "grad_norm": 0.0, "learning_rate": 5.072309089015092e-06, "loss": 0.7289, "step": 2185 }, { "epoch": 0.67437914545735, "grad_norm": 0.0, "learning_rate": 5.063613847299831e-06, "loss": 0.6871, "step": 2186 }, { "epoch": 0.6746876446089773, "grad_norm": 0.0, "learning_rate": 5.054923537550554e-06, "loss": 0.6143, "step": 2187 }, { "epoch": 0.6749961437606047, "grad_norm": 0.0, "learning_rate": 5.046238168449791e-06, "loss": 0.6871, "step": 2188 }, { "epoch": 0.675304642912232, "grad_norm": 0.0, "learning_rate": 5.037557748675128e-06, "loss": 0.6316, "step": 2189 }, { "epoch": 0.6756131420638594, "grad_norm": 0.0, "learning_rate": 5.028882286899219e-06, "loss": 0.7132, "step": 2190 }, { "epoch": 0.6759216412154867, "grad_norm": 0.0, "learning_rate": 5.020211791789753e-06, "loss": 0.6796, "step": 2191 }, { "epoch": 0.676230140367114, "grad_norm": 0.0, "learning_rate": 5.011546272009464e-06, "loss": 0.7367, "step": 2192 }, { "epoch": 0.6765386395187413, "grad_norm": 0.0, "learning_rate": 5.0028857362161144e-06, "loss": 0.7244, "step": 2193 }, { "epoch": 0.6768471386703686, "grad_norm": 0.0, "learning_rate": 4.994230193062477e-06, "loss": 0.6804, "step": 2194 }, { "epoch": 0.677155637821996, "grad_norm": 0.0, "learning_rate": 4.985579651196354e-06, "loss": 0.7504, "step": 2195 }, { "epoch": 0.6774641369736233, "grad_norm": 0.0, "learning_rate": 4.976934119260537e-06, "loss": 0.6989, "step": 2196 }, { "epoch": 0.6777726361252506, "grad_norm": 0.0, "learning_rate": 4.968293605892817e-06, "loss": 0.6624, "step": 2197 }, { "epoch": 0.678081135276878, "grad_norm": 0.0, "learning_rate": 4.959658119725965e-06, "loss": 0.7303, "step": 2198 }, { "epoch": 0.6783896344285053, "grad_norm": 0.0, "learning_rate": 4.951027669387741e-06, "loss": 0.7036, "step": 2199 }, { "epoch": 0.6786981335801326, "grad_norm": 0.0, "learning_rate": 4.942402263500874e-06, "loss": 0.7572, "step": 2200 }, { "epoch": 0.67900663273176, "grad_norm": 0.0, "learning_rate": 4.933781910683031e-06, "loss": 0.7067, "step": 2201 }, { "epoch": 0.6793151318833873, "grad_norm": 0.0, "learning_rate": 4.925166619546857e-06, "loss": 0.8102, "step": 2202 }, { "epoch": 0.6796236310350147, "grad_norm": 0.0, "learning_rate": 4.916556398699922e-06, "loss": 0.7812, "step": 2203 }, { "epoch": 0.679932130186642, "grad_norm": 0.0, "learning_rate": 4.907951256744744e-06, "loss": 0.7577, "step": 2204 }, { "epoch": 0.6802406293382693, "grad_norm": 0.0, "learning_rate": 4.899351202278756e-06, "loss": 0.7994, "step": 2205 }, { "epoch": 0.6805491284898967, "grad_norm": 0.0, "learning_rate": 4.890756243894308e-06, "loss": 0.698, "step": 2206 }, { "epoch": 0.680857627641524, "grad_norm": 0.0, "learning_rate": 4.88216639017867e-06, "loss": 0.7232, "step": 2207 }, { "epoch": 0.6811661267931514, "grad_norm": 0.0, "learning_rate": 4.873581649713996e-06, "loss": 0.6626, "step": 2208 }, { "epoch": 0.6814746259447787, "grad_norm": 0.0, "learning_rate": 4.865002031077353e-06, "loss": 0.774, "step": 2209 }, { "epoch": 0.681783125096406, "grad_norm": 0.0, "learning_rate": 4.856427542840658e-06, "loss": 0.7072, "step": 2210 }, { "epoch": 0.6820916242480333, "grad_norm": 0.0, "learning_rate": 4.847858193570733e-06, "loss": 0.7224, "step": 2211 }, { "epoch": 0.6824001233996606, "grad_norm": 0.0, "learning_rate": 4.839293991829256e-06, "loss": 0.733, "step": 2212 }, { "epoch": 0.682708622551288, "grad_norm": 0.0, "learning_rate": 4.830734946172756e-06, "loss": 0.6981, "step": 2213 }, { "epoch": 0.6830171217029153, "grad_norm": 0.0, "learning_rate": 4.8221810651526154e-06, "loss": 0.7701, "step": 2214 }, { "epoch": 0.6833256208545426, "grad_norm": 0.0, "learning_rate": 4.8136323573150525e-06, "loss": 0.7602, "step": 2215 }, { "epoch": 0.68363412000617, "grad_norm": 0.0, "learning_rate": 4.805088831201127e-06, "loss": 0.7599, "step": 2216 }, { "epoch": 0.6839426191577973, "grad_norm": 0.0, "learning_rate": 4.796550495346711e-06, "loss": 0.744, "step": 2217 }, { "epoch": 0.6842511183094246, "grad_norm": 0.0, "learning_rate": 4.788017358282492e-06, "loss": 0.7434, "step": 2218 }, { "epoch": 0.684559617461052, "grad_norm": 0.0, "learning_rate": 4.779489428533973e-06, "loss": 0.748, "step": 2219 }, { "epoch": 0.6848681166126793, "grad_norm": 0.0, "learning_rate": 4.770966714621441e-06, "loss": 0.7359, "step": 2220 }, { "epoch": 0.6851766157643067, "grad_norm": 0.0, "learning_rate": 4.762449225059985e-06, "loss": 0.7254, "step": 2221 }, { "epoch": 0.685485114915934, "grad_norm": 0.0, "learning_rate": 4.753936968359465e-06, "loss": 0.7747, "step": 2222 }, { "epoch": 0.6857936140675613, "grad_norm": 0.0, "learning_rate": 4.745429953024511e-06, "loss": 0.7263, "step": 2223 }, { "epoch": 0.6861021132191887, "grad_norm": 0.0, "learning_rate": 4.736928187554529e-06, "loss": 0.7798, "step": 2224 }, { "epoch": 0.686410612370816, "grad_norm": 0.0, "learning_rate": 4.728431680443663e-06, "loss": 0.7363, "step": 2225 }, { "epoch": 0.6867191115224434, "grad_norm": 0.0, "learning_rate": 4.719940440180827e-06, "loss": 0.8307, "step": 2226 }, { "epoch": 0.6870276106740707, "grad_norm": 0.0, "learning_rate": 4.711454475249638e-06, "loss": 0.7031, "step": 2227 }, { "epoch": 0.6873361098256979, "grad_norm": 0.0, "learning_rate": 4.702973794128477e-06, "loss": 0.7277, "step": 2228 }, { "epoch": 0.6876446089773253, "grad_norm": 0.0, "learning_rate": 4.694498405290423e-06, "loss": 0.6801, "step": 2229 }, { "epoch": 0.6879531081289526, "grad_norm": 0.0, "learning_rate": 4.686028317203283e-06, "loss": 0.6709, "step": 2230 }, { "epoch": 0.68826160728058, "grad_norm": 0.0, "learning_rate": 4.6775635383295555e-06, "loss": 0.8133, "step": 2231 }, { "epoch": 0.6885701064322073, "grad_norm": 0.0, "learning_rate": 4.669104077126439e-06, "loss": 0.6551, "step": 2232 }, { "epoch": 0.6888786055838346, "grad_norm": 0.0, "learning_rate": 4.660649942045826e-06, "loss": 0.7909, "step": 2233 }, { "epoch": 0.689187104735462, "grad_norm": 0.0, "learning_rate": 4.652201141534279e-06, "loss": 0.6711, "step": 2234 }, { "epoch": 0.6894956038870893, "grad_norm": 0.0, "learning_rate": 4.643757684033026e-06, "loss": 0.7137, "step": 2235 }, { "epoch": 0.6898041030387166, "grad_norm": 0.0, "learning_rate": 4.635319577977975e-06, "loss": 0.615, "step": 2236 }, { "epoch": 0.690112602190344, "grad_norm": 0.0, "learning_rate": 4.626886831799668e-06, "loss": 0.6953, "step": 2237 }, { "epoch": 0.6904211013419713, "grad_norm": 0.0, "learning_rate": 4.618459453923307e-06, "loss": 0.7033, "step": 2238 }, { "epoch": 0.6907296004935987, "grad_norm": 0.0, "learning_rate": 4.6100374527687195e-06, "loss": 0.7572, "step": 2239 }, { "epoch": 0.691038099645226, "grad_norm": 0.0, "learning_rate": 4.601620836750367e-06, "loss": 0.7321, "step": 2240 }, { "epoch": 0.6913465987968533, "grad_norm": 0.0, "learning_rate": 4.593209614277325e-06, "loss": 0.6992, "step": 2241 }, { "epoch": 0.6916550979484807, "grad_norm": 0.0, "learning_rate": 4.58480379375329e-06, "loss": 1.0177, "step": 2242 }, { "epoch": 0.691963597100108, "grad_norm": 0.0, "learning_rate": 4.576403383576555e-06, "loss": 0.6625, "step": 2243 }, { "epoch": 0.6922720962517354, "grad_norm": 0.0, "learning_rate": 4.568008392140003e-06, "loss": 0.7334, "step": 2244 }, { "epoch": 0.6925805954033626, "grad_norm": 0.0, "learning_rate": 4.559618827831116e-06, "loss": 0.6984, "step": 2245 }, { "epoch": 0.6928890945549899, "grad_norm": 0.0, "learning_rate": 4.551234699031938e-06, "loss": 0.6868, "step": 2246 }, { "epoch": 0.6931975937066173, "grad_norm": 0.0, "learning_rate": 4.542856014119098e-06, "loss": 0.7404, "step": 2247 }, { "epoch": 0.6935060928582446, "grad_norm": 0.0, "learning_rate": 4.534482781463775e-06, "loss": 0.8335, "step": 2248 }, { "epoch": 0.6938145920098719, "grad_norm": 0.0, "learning_rate": 4.5261150094317e-06, "loss": 0.7299, "step": 2249 }, { "epoch": 0.6941230911614993, "grad_norm": 0.0, "learning_rate": 4.517752706383159e-06, "loss": 0.6716, "step": 2250 }, { "epoch": 0.6944315903131266, "grad_norm": 0.0, "learning_rate": 4.509395880672967e-06, "loss": 0.6818, "step": 2251 }, { "epoch": 0.694740089464754, "grad_norm": 0.0, "learning_rate": 4.501044540650464e-06, "loss": 0.7939, "step": 2252 }, { "epoch": 0.6950485886163813, "grad_norm": 0.0, "learning_rate": 4.4926986946595065e-06, "loss": 0.7085, "step": 2253 }, { "epoch": 0.6953570877680086, "grad_norm": 0.0, "learning_rate": 4.484358351038478e-06, "loss": 0.8178, "step": 2254 }, { "epoch": 0.695665586919636, "grad_norm": 0.0, "learning_rate": 4.4760235181202465e-06, "loss": 0.7343, "step": 2255 }, { "epoch": 0.6959740860712633, "grad_norm": 0.0, "learning_rate": 4.467694204232187e-06, "loss": 0.7586, "step": 2256 }, { "epoch": 0.6962825852228907, "grad_norm": 0.0, "learning_rate": 4.459370417696152e-06, "loss": 0.8157, "step": 2257 }, { "epoch": 0.696591084374518, "grad_norm": 0.0, "learning_rate": 4.4510521668284736e-06, "loss": 0.6318, "step": 2258 }, { "epoch": 0.6968995835261453, "grad_norm": 0.0, "learning_rate": 4.4427394599399575e-06, "loss": 0.7224, "step": 2259 }, { "epoch": 0.6972080826777727, "grad_norm": 0.0, "learning_rate": 4.434432305335866e-06, "loss": 0.6759, "step": 2260 }, { "epoch": 0.6975165818294, "grad_norm": 0.0, "learning_rate": 4.426130711315913e-06, "loss": 0.7719, "step": 2261 }, { "epoch": 0.6978250809810274, "grad_norm": 0.0, "learning_rate": 4.417834686174263e-06, "loss": 0.6957, "step": 2262 }, { "epoch": 0.6981335801326546, "grad_norm": 0.0, "learning_rate": 4.4095442381995055e-06, "loss": 0.7719, "step": 2263 }, { "epoch": 0.6984420792842819, "grad_norm": 0.0, "learning_rate": 4.401259375674679e-06, "loss": 0.6877, "step": 2264 }, { "epoch": 0.6987505784359093, "grad_norm": 0.0, "learning_rate": 4.392980106877212e-06, "loss": 0.7441, "step": 2265 }, { "epoch": 0.6990590775875366, "grad_norm": 0.0, "learning_rate": 4.384706440078968e-06, "loss": 0.7662, "step": 2266 }, { "epoch": 0.6993675767391639, "grad_norm": 0.0, "learning_rate": 4.376438383546202e-06, "loss": 0.7548, "step": 2267 }, { "epoch": 0.6996760758907913, "grad_norm": 0.0, "learning_rate": 4.368175945539572e-06, "loss": 0.6793, "step": 2268 }, { "epoch": 0.6999845750424186, "grad_norm": 0.0, "learning_rate": 4.359919134314113e-06, "loss": 0.6686, "step": 2269 }, { "epoch": 0.700293074194046, "grad_norm": 0.0, "learning_rate": 4.351667958119242e-06, "loss": 0.6941, "step": 2270 }, { "epoch": 0.7006015733456733, "grad_norm": 0.0, "learning_rate": 4.343422425198753e-06, "loss": 0.7611, "step": 2271 }, { "epoch": 0.7009100724973006, "grad_norm": 0.0, "learning_rate": 4.335182543790788e-06, "loss": 0.8059, "step": 2272 }, { "epoch": 0.701218571648928, "grad_norm": 0.0, "learning_rate": 4.326948322127858e-06, "loss": 0.6992, "step": 2273 }, { "epoch": 0.7015270708005553, "grad_norm": 0.0, "learning_rate": 4.318719768436808e-06, "loss": 0.7047, "step": 2274 }, { "epoch": 0.7018355699521827, "grad_norm": 0.0, "learning_rate": 4.3104968909388174e-06, "loss": 0.7672, "step": 2275 }, { "epoch": 0.70214406910381, "grad_norm": 0.0, "learning_rate": 4.302279697849412e-06, "loss": 0.7012, "step": 2276 }, { "epoch": 0.7024525682554373, "grad_norm": 0.0, "learning_rate": 4.29406819737842e-06, "loss": 0.6696, "step": 2277 }, { "epoch": 0.7027610674070647, "grad_norm": 0.0, "learning_rate": 4.285862397729993e-06, "loss": 0.7496, "step": 2278 }, { "epoch": 0.703069566558692, "grad_norm": 0.0, "learning_rate": 4.277662307102574e-06, "loss": 0.7151, "step": 2279 }, { "epoch": 0.7033780657103192, "grad_norm": 0.0, "learning_rate": 4.26946793368892e-06, "loss": 0.6707, "step": 2280 }, { "epoch": 0.7036865648619466, "grad_norm": 0.0, "learning_rate": 4.261279285676071e-06, "loss": 0.6088, "step": 2281 }, { "epoch": 0.7039950640135739, "grad_norm": 0.0, "learning_rate": 4.253096371245329e-06, "loss": 0.6599, "step": 2282 }, { "epoch": 0.7043035631652013, "grad_norm": 0.0, "learning_rate": 4.244919198572293e-06, "loss": 0.7794, "step": 2283 }, { "epoch": 0.7046120623168286, "grad_norm": 0.0, "learning_rate": 4.236747775826804e-06, "loss": 0.6954, "step": 2284 }, { "epoch": 0.7049205614684559, "grad_norm": 0.0, "learning_rate": 4.228582111172977e-06, "loss": 0.6958, "step": 2285 }, { "epoch": 0.7052290606200833, "grad_norm": 0.0, "learning_rate": 4.220422212769161e-06, "loss": 0.6756, "step": 2286 }, { "epoch": 0.7055375597717106, "grad_norm": 0.0, "learning_rate": 4.212268088767944e-06, "loss": 0.9999, "step": 2287 }, { "epoch": 0.705846058923338, "grad_norm": 0.0, "learning_rate": 4.204119747316157e-06, "loss": 0.759, "step": 2288 }, { "epoch": 0.7061545580749653, "grad_norm": 0.0, "learning_rate": 4.195977196554835e-06, "loss": 0.7907, "step": 2289 }, { "epoch": 0.7064630572265926, "grad_norm": 0.0, "learning_rate": 4.187840444619251e-06, "loss": 0.7364, "step": 2290 }, { "epoch": 0.70677155637822, "grad_norm": 0.0, "learning_rate": 4.179709499638857e-06, "loss": 0.7315, "step": 2291 }, { "epoch": 0.7070800555298473, "grad_norm": 0.0, "learning_rate": 4.171584369737322e-06, "loss": 0.7142, "step": 2292 }, { "epoch": 0.7073885546814747, "grad_norm": 0.0, "learning_rate": 4.163465063032507e-06, "loss": 0.7166, "step": 2293 }, { "epoch": 0.707697053833102, "grad_norm": 0.0, "learning_rate": 4.1553515876364435e-06, "loss": 0.6211, "step": 2294 }, { "epoch": 0.7080055529847293, "grad_norm": 0.0, "learning_rate": 4.147243951655341e-06, "loss": 0.7314, "step": 2295 }, { "epoch": 0.7083140521363567, "grad_norm": 0.0, "learning_rate": 4.139142163189573e-06, "loss": 0.7196, "step": 2296 }, { "epoch": 0.7086225512879839, "grad_norm": 0.0, "learning_rate": 4.131046230333682e-06, "loss": 0.7261, "step": 2297 }, { "epoch": 0.7089310504396112, "grad_norm": 0.0, "learning_rate": 4.1229561611763445e-06, "loss": 0.7069, "step": 2298 }, { "epoch": 0.7092395495912386, "grad_norm": 0.0, "learning_rate": 4.114871963800385e-06, "loss": 0.6299, "step": 2299 }, { "epoch": 0.7095480487428659, "grad_norm": 0.0, "learning_rate": 4.106793646282769e-06, "loss": 0.7613, "step": 2300 }, { "epoch": 0.7098565478944933, "grad_norm": 0.0, "learning_rate": 4.098721216694572e-06, "loss": 0.7339, "step": 2301 }, { "epoch": 0.7101650470461206, "grad_norm": 0.0, "learning_rate": 4.090654683101007e-06, "loss": 0.7247, "step": 2302 }, { "epoch": 0.7104735461977479, "grad_norm": 0.0, "learning_rate": 4.082594053561369e-06, "loss": 0.664, "step": 2303 }, { "epoch": 0.7107820453493753, "grad_norm": 0.0, "learning_rate": 4.074539336129079e-06, "loss": 0.7243, "step": 2304 }, { "epoch": 0.7110905445010026, "grad_norm": 0.0, "learning_rate": 4.066490538851644e-06, "loss": 0.6925, "step": 2305 }, { "epoch": 0.71139904365263, "grad_norm": 0.0, "learning_rate": 4.0584476697706475e-06, "loss": 0.7386, "step": 2306 }, { "epoch": 0.7117075428042573, "grad_norm": 0.0, "learning_rate": 4.0504107369217686e-06, "loss": 0.6881, "step": 2307 }, { "epoch": 0.7120160419558846, "grad_norm": 0.0, "learning_rate": 4.042379748334727e-06, "loss": 0.719, "step": 2308 }, { "epoch": 0.712324541107512, "grad_norm": 0.0, "learning_rate": 4.034354712033332e-06, "loss": 0.7329, "step": 2309 }, { "epoch": 0.7126330402591393, "grad_norm": 0.0, "learning_rate": 4.026335636035429e-06, "loss": 0.7455, "step": 2310 }, { "epoch": 0.7129415394107667, "grad_norm": 0.0, "learning_rate": 4.018322528352917e-06, "loss": 0.7113, "step": 2311 }, { "epoch": 0.713250038562394, "grad_norm": 0.0, "learning_rate": 4.010315396991727e-06, "loss": 0.6837, "step": 2312 }, { "epoch": 0.7135585377140213, "grad_norm": 0.0, "learning_rate": 4.002314249951819e-06, "loss": 0.6868, "step": 2313 }, { "epoch": 0.7138670368656487, "grad_norm": 0.0, "learning_rate": 3.994319095227178e-06, "loss": 0.7261, "step": 2314 }, { "epoch": 0.7141755360172759, "grad_norm": 0.0, "learning_rate": 3.986329940805799e-06, "loss": 0.7157, "step": 2315 }, { "epoch": 0.7144840351689032, "grad_norm": 0.0, "learning_rate": 3.978346794669679e-06, "loss": 0.741, "step": 2316 }, { "epoch": 0.7147925343205306, "grad_norm": 0.0, "learning_rate": 3.970369664794823e-06, "loss": 0.7708, "step": 2317 }, { "epoch": 0.7151010334721579, "grad_norm": 0.0, "learning_rate": 3.9623985591512105e-06, "loss": 0.7344, "step": 2318 }, { "epoch": 0.7154095326237853, "grad_norm": 0.0, "learning_rate": 3.95443348570282e-06, "loss": 0.7342, "step": 2319 }, { "epoch": 0.7157180317754126, "grad_norm": 0.0, "learning_rate": 3.946474452407579e-06, "loss": 0.6732, "step": 2320 }, { "epoch": 0.7160265309270399, "grad_norm": 0.0, "learning_rate": 3.938521467217405e-06, "loss": 0.6827, "step": 2321 }, { "epoch": 0.7163350300786673, "grad_norm": 0.0, "learning_rate": 3.930574538078155e-06, "loss": 0.7727, "step": 2322 }, { "epoch": 0.7166435292302946, "grad_norm": 0.0, "learning_rate": 3.922633672929648e-06, "loss": 0.7735, "step": 2323 }, { "epoch": 0.716952028381922, "grad_norm": 0.0, "learning_rate": 3.914698879705635e-06, "loss": 0.7188, "step": 2324 }, { "epoch": 0.7172605275335493, "grad_norm": 0.0, "learning_rate": 3.906770166333802e-06, "loss": 0.644, "step": 2325 }, { "epoch": 0.7175690266851766, "grad_norm": 0.0, "learning_rate": 3.898847540735771e-06, "loss": 0.6054, "step": 2326 }, { "epoch": 0.717877525836804, "grad_norm": 0.0, "learning_rate": 3.890931010827062e-06, "loss": 0.6724, "step": 2327 }, { "epoch": 0.7181860249884313, "grad_norm": 0.0, "learning_rate": 3.883020584517129e-06, "loss": 0.8042, "step": 2328 }, { "epoch": 0.7184945241400587, "grad_norm": 0.0, "learning_rate": 3.875116269709307e-06, "loss": 0.7232, "step": 2329 }, { "epoch": 0.718803023291686, "grad_norm": 0.0, "learning_rate": 3.867218074300832e-06, "loss": 0.7611, "step": 2330 }, { "epoch": 0.7191115224433133, "grad_norm": 0.0, "learning_rate": 3.859326006182833e-06, "loss": 0.6313, "step": 2331 }, { "epoch": 0.7194200215949406, "grad_norm": 0.0, "learning_rate": 3.851440073240309e-06, "loss": 0.7193, "step": 2332 }, { "epoch": 0.7197285207465679, "grad_norm": 0.0, "learning_rate": 3.84356028335213e-06, "loss": 0.6549, "step": 2333 }, { "epoch": 0.7200370198981952, "grad_norm": 0.0, "learning_rate": 3.835686644391029e-06, "loss": 0.6637, "step": 2334 }, { "epoch": 0.7203455190498226, "grad_norm": 0.0, "learning_rate": 3.827819164223599e-06, "loss": 0.7928, "step": 2335 }, { "epoch": 0.7206540182014499, "grad_norm": 0.0, "learning_rate": 3.819957850710269e-06, "loss": 0.6682, "step": 2336 }, { "epoch": 0.7209625173530773, "grad_norm": 0.0, "learning_rate": 3.812102711705323e-06, "loss": 0.7203, "step": 2337 }, { "epoch": 0.7212710165047046, "grad_norm": 0.0, "learning_rate": 3.8042537550568603e-06, "loss": 0.6589, "step": 2338 }, { "epoch": 0.7215795156563319, "grad_norm": 0.0, "learning_rate": 3.7964109886068066e-06, "loss": 0.7143, "step": 2339 }, { "epoch": 0.7218880148079593, "grad_norm": 0.0, "learning_rate": 3.7885744201909115e-06, "loss": 0.7169, "step": 2340 }, { "epoch": 0.7221965139595866, "grad_norm": 0.0, "learning_rate": 3.7807440576387244e-06, "loss": 0.6901, "step": 2341 }, { "epoch": 0.722505013111214, "grad_norm": 0.0, "learning_rate": 3.7729199087735924e-06, "loss": 0.7984, "step": 2342 }, { "epoch": 0.7228135122628413, "grad_norm": 0.0, "learning_rate": 3.7651019814126656e-06, "loss": 0.693, "step": 2343 }, { "epoch": 0.7231220114144686, "grad_norm": 0.0, "learning_rate": 3.7572902833668635e-06, "loss": 0.7642, "step": 2344 }, { "epoch": 0.723430510566096, "grad_norm": 0.0, "learning_rate": 3.7494848224408998e-06, "loss": 0.6759, "step": 2345 }, { "epoch": 0.7237390097177233, "grad_norm": 0.0, "learning_rate": 3.741685606433233e-06, "loss": 0.7235, "step": 2346 }, { "epoch": 0.7240475088693507, "grad_norm": 0.0, "learning_rate": 3.7338926431361055e-06, "loss": 0.7229, "step": 2347 }, { "epoch": 0.724356008020978, "grad_norm": 0.0, "learning_rate": 3.726105940335495e-06, "loss": 0.7559, "step": 2348 }, { "epoch": 0.7246645071726052, "grad_norm": 0.0, "learning_rate": 3.71832550581114e-06, "loss": 0.7572, "step": 2349 }, { "epoch": 0.7249730063242326, "grad_norm": 0.0, "learning_rate": 3.710551347336504e-06, "loss": 0.7353, "step": 2350 }, { "epoch": 0.7252815054758599, "grad_norm": 0.0, "learning_rate": 3.7027834726787806e-06, "loss": 0.6815, "step": 2351 }, { "epoch": 0.7255900046274872, "grad_norm": 0.0, "learning_rate": 3.6950218895988966e-06, "loss": 0.7229, "step": 2352 }, { "epoch": 0.7258985037791146, "grad_norm": 0.0, "learning_rate": 3.6872666058514783e-06, "loss": 0.6987, "step": 2353 }, { "epoch": 0.7262070029307419, "grad_norm": 0.0, "learning_rate": 3.6795176291848713e-06, "loss": 0.7232, "step": 2354 }, { "epoch": 0.7265155020823693, "grad_norm": 0.0, "learning_rate": 3.6717749673411096e-06, "loss": 0.7647, "step": 2355 }, { "epoch": 0.7268240012339966, "grad_norm": 0.0, "learning_rate": 3.66403862805592e-06, "loss": 0.7256, "step": 2356 }, { "epoch": 0.7271325003856239, "grad_norm": 0.0, "learning_rate": 3.6563086190587215e-06, "loss": 0.6587, "step": 2357 }, { "epoch": 0.7274409995372513, "grad_norm": 0.0, "learning_rate": 3.6485849480725964e-06, "loss": 0.6631, "step": 2358 }, { "epoch": 0.7277494986888786, "grad_norm": 0.0, "learning_rate": 3.6408676228143013e-06, "loss": 0.7334, "step": 2359 }, { "epoch": 0.728057997840506, "grad_norm": 0.0, "learning_rate": 3.633156650994247e-06, "loss": 0.9905, "step": 2360 }, { "epoch": 0.7283664969921333, "grad_norm": 0.0, "learning_rate": 3.625452040316505e-06, "loss": 0.7015, "step": 2361 }, { "epoch": 0.7286749961437606, "grad_norm": 0.0, "learning_rate": 3.6177537984787924e-06, "loss": 0.731, "step": 2362 }, { "epoch": 0.728983495295388, "grad_norm": 0.0, "learning_rate": 3.610061933172445e-06, "loss": 0.7258, "step": 2363 }, { "epoch": 0.7292919944470153, "grad_norm": 0.0, "learning_rate": 3.602376452082451e-06, "loss": 0.7024, "step": 2364 }, { "epoch": 0.7296004935986427, "grad_norm": 0.0, "learning_rate": 3.5946973628874026e-06, "loss": 0.7763, "step": 2365 }, { "epoch": 0.72990899275027, "grad_norm": 0.0, "learning_rate": 3.587024673259519e-06, "loss": 0.7543, "step": 2366 }, { "epoch": 0.7302174919018972, "grad_norm": 0.0, "learning_rate": 3.579358390864618e-06, "loss": 0.9771, "step": 2367 }, { "epoch": 0.7305259910535246, "grad_norm": 0.0, "learning_rate": 3.571698523362113e-06, "loss": 0.7753, "step": 2368 }, { "epoch": 0.7308344902051519, "grad_norm": 0.0, "learning_rate": 3.5640450784050185e-06, "loss": 0.7656, "step": 2369 }, { "epoch": 0.7311429893567792, "grad_norm": 0.0, "learning_rate": 3.556398063639921e-06, "loss": 0.7309, "step": 2370 }, { "epoch": 0.7314514885084066, "grad_norm": 0.0, "learning_rate": 3.548757486706997e-06, "loss": 0.713, "step": 2371 }, { "epoch": 0.7317599876600339, "grad_norm": 0.0, "learning_rate": 3.5411233552399703e-06, "loss": 0.6742, "step": 2372 }, { "epoch": 0.7320684868116613, "grad_norm": 0.0, "learning_rate": 3.533495676866141e-06, "loss": 0.8157, "step": 2373 }, { "epoch": 0.7323769859632886, "grad_norm": 0.0, "learning_rate": 3.525874459206362e-06, "loss": 0.7107, "step": 2374 }, { "epoch": 0.7326854851149159, "grad_norm": 0.0, "learning_rate": 3.518259709875023e-06, "loss": 0.7892, "step": 2375 }, { "epoch": 0.7329939842665433, "grad_norm": 0.0, "learning_rate": 3.5106514364800548e-06, "loss": 0.6195, "step": 2376 }, { "epoch": 0.7333024834181706, "grad_norm": 0.0, "learning_rate": 3.503049646622915e-06, "loss": 0.7394, "step": 2377 }, { "epoch": 0.733610982569798, "grad_norm": 0.0, "learning_rate": 3.4954543478985926e-06, "loss": 0.7432, "step": 2378 }, { "epoch": 0.7339194817214253, "grad_norm": 0.0, "learning_rate": 3.4878655478955838e-06, "loss": 0.6289, "step": 2379 }, { "epoch": 0.7342279808730526, "grad_norm": 0.0, "learning_rate": 3.4802832541958886e-06, "loss": 0.7075, "step": 2380 }, { "epoch": 0.73453648002468, "grad_norm": 0.0, "learning_rate": 3.472707474375018e-06, "loss": 0.7032, "step": 2381 }, { "epoch": 0.7348449791763073, "grad_norm": 0.0, "learning_rate": 3.465138216001962e-06, "loss": 0.7202, "step": 2382 }, { "epoch": 0.7351534783279347, "grad_norm": 0.0, "learning_rate": 3.457575486639211e-06, "loss": 0.6907, "step": 2383 }, { "epoch": 0.7354619774795619, "grad_norm": 0.0, "learning_rate": 3.4500192938427093e-06, "loss": 0.5944, "step": 2384 }, { "epoch": 0.7357704766311892, "grad_norm": 0.0, "learning_rate": 3.44246964516189e-06, "loss": 0.7482, "step": 2385 }, { "epoch": 0.7360789757828166, "grad_norm": 0.0, "learning_rate": 3.4349265481396453e-06, "loss": 0.7875, "step": 2386 }, { "epoch": 0.7363874749344439, "grad_norm": 0.0, "learning_rate": 3.4273900103123125e-06, "loss": 0.7129, "step": 2387 }, { "epoch": 0.7366959740860712, "grad_norm": 0.0, "learning_rate": 3.4198600392096894e-06, "loss": 0.7024, "step": 2388 }, { "epoch": 0.7370044732376986, "grad_norm": 0.0, "learning_rate": 3.4123366423549907e-06, "loss": 0.7508, "step": 2389 }, { "epoch": 0.7373129723893259, "grad_norm": 0.0, "learning_rate": 3.404819827264888e-06, "loss": 0.7088, "step": 2390 }, { "epoch": 0.7376214715409533, "grad_norm": 0.0, "learning_rate": 3.397309601449459e-06, "loss": 0.766, "step": 2391 }, { "epoch": 0.7379299706925806, "grad_norm": 0.0, "learning_rate": 3.38980597241221e-06, "loss": 0.7314, "step": 2392 }, { "epoch": 0.7382384698442079, "grad_norm": 0.0, "learning_rate": 3.3823089476500492e-06, "loss": 0.7783, "step": 2393 }, { "epoch": 0.7385469689958353, "grad_norm": 0.0, "learning_rate": 3.3748185346532847e-06, "loss": 0.6801, "step": 2394 }, { "epoch": 0.7388554681474626, "grad_norm": 0.0, "learning_rate": 3.367334740905629e-06, "loss": 0.7453, "step": 2395 }, { "epoch": 0.73916396729909, "grad_norm": 0.0, "learning_rate": 3.3598575738841698e-06, "loss": 0.7184, "step": 2396 }, { "epoch": 0.7394724664507173, "grad_norm": 0.0, "learning_rate": 3.352387041059377e-06, "loss": 0.7691, "step": 2397 }, { "epoch": 0.7397809656023446, "grad_norm": 0.0, "learning_rate": 3.3449231498951016e-06, "loss": 0.7078, "step": 2398 }, { "epoch": 0.740089464753972, "grad_norm": 0.0, "learning_rate": 3.337465907848544e-06, "loss": 0.7818, "step": 2399 }, { "epoch": 0.7403979639055993, "grad_norm": 0.0, "learning_rate": 3.330015322370277e-06, "loss": 0.7287, "step": 2400 }, { "epoch": 0.7407064630572265, "grad_norm": 0.0, "learning_rate": 3.3225714009042045e-06, "loss": 0.6973, "step": 2401 }, { "epoch": 0.7410149622088539, "grad_norm": 0.0, "learning_rate": 3.31513415088759e-06, "loss": 0.6757, "step": 2402 }, { "epoch": 0.7413234613604812, "grad_norm": 0.0, "learning_rate": 3.307703579751018e-06, "loss": 0.661, "step": 2403 }, { "epoch": 0.7416319605121086, "grad_norm": 0.0, "learning_rate": 3.3002796949184135e-06, "loss": 0.8085, "step": 2404 }, { "epoch": 0.7419404596637359, "grad_norm": 0.0, "learning_rate": 3.2928625038070093e-06, "loss": 0.7602, "step": 2405 }, { "epoch": 0.7422489588153632, "grad_norm": 0.0, "learning_rate": 3.2854520138273526e-06, "loss": 0.7372, "step": 2406 }, { "epoch": 0.7425574579669906, "grad_norm": 0.0, "learning_rate": 3.278048232383305e-06, "loss": 0.6769, "step": 2407 }, { "epoch": 0.7428659571186179, "grad_norm": 0.0, "learning_rate": 3.27065116687201e-06, "loss": 0.6679, "step": 2408 }, { "epoch": 0.7431744562702453, "grad_norm": 0.0, "learning_rate": 3.263260824683917e-06, "loss": 0.7707, "step": 2409 }, { "epoch": 0.7434829554218726, "grad_norm": 0.0, "learning_rate": 3.255877213202748e-06, "loss": 0.6441, "step": 2410 }, { "epoch": 0.7437914545734999, "grad_norm": 0.0, "learning_rate": 3.248500339805499e-06, "loss": 0.7432, "step": 2411 }, { "epoch": 0.7440999537251273, "grad_norm": 0.0, "learning_rate": 3.2411302118624453e-06, "loss": 0.7155, "step": 2412 }, { "epoch": 0.7444084528767546, "grad_norm": 0.0, "learning_rate": 3.233766836737111e-06, "loss": 0.6777, "step": 2413 }, { "epoch": 0.744716952028382, "grad_norm": 0.0, "learning_rate": 3.2264102217862794e-06, "loss": 0.6547, "step": 2414 }, { "epoch": 0.7450254511800093, "grad_norm": 0.0, "learning_rate": 3.2190603743599723e-06, "loss": 0.724, "step": 2415 }, { "epoch": 0.7453339503316366, "grad_norm": 0.0, "learning_rate": 3.2117173018014656e-06, "loss": 0.7582, "step": 2416 }, { "epoch": 0.745642449483264, "grad_norm": 0.0, "learning_rate": 3.2043810114472508e-06, "loss": 0.7383, "step": 2417 }, { "epoch": 0.7459509486348913, "grad_norm": 0.0, "learning_rate": 3.1970515106270474e-06, "loss": 0.712, "step": 2418 }, { "epoch": 0.7462594477865185, "grad_norm": 0.0, "learning_rate": 3.1897288066638e-06, "loss": 0.7693, "step": 2419 }, { "epoch": 0.7465679469381459, "grad_norm": 0.0, "learning_rate": 3.1824129068736474e-06, "loss": 0.6618, "step": 2420 }, { "epoch": 0.7468764460897732, "grad_norm": 0.0, "learning_rate": 3.1751038185659467e-06, "loss": 0.7433, "step": 2421 }, { "epoch": 0.7471849452414006, "grad_norm": 0.0, "learning_rate": 3.1678015490432368e-06, "loss": 0.6502, "step": 2422 }, { "epoch": 0.7474934443930279, "grad_norm": 0.0, "learning_rate": 3.1605061056012465e-06, "loss": 0.7712, "step": 2423 }, { "epoch": 0.7478019435446552, "grad_norm": 0.0, "learning_rate": 3.153217495528893e-06, "loss": 0.6721, "step": 2424 }, { "epoch": 0.7481104426962826, "grad_norm": 0.0, "learning_rate": 3.1459357261082525e-06, "loss": 0.7167, "step": 2425 }, { "epoch": 0.7484189418479099, "grad_norm": 0.0, "learning_rate": 3.138660804614586e-06, "loss": 0.6691, "step": 2426 }, { "epoch": 0.7487274409995373, "grad_norm": 0.0, "learning_rate": 3.131392738316286e-06, "loss": 0.7755, "step": 2427 }, { "epoch": 0.7490359401511646, "grad_norm": 0.0, "learning_rate": 3.12413153447492e-06, "loss": 0.6478, "step": 2428 }, { "epoch": 0.7493444393027919, "grad_norm": 0.0, "learning_rate": 3.1168772003451855e-06, "loss": 0.7047, "step": 2429 }, { "epoch": 0.7496529384544193, "grad_norm": 0.0, "learning_rate": 3.1096297431749257e-06, "loss": 0.6735, "step": 2430 }, { "epoch": 0.7499614376060466, "grad_norm": 0.0, "learning_rate": 3.1023891702051057e-06, "loss": 1.0232, "step": 2431 }, { "epoch": 0.750269936757674, "grad_norm": 0.0, "learning_rate": 3.095155488669812e-06, "loss": 0.6308, "step": 2432 }, { "epoch": 0.7505784359093013, "grad_norm": 0.0, "learning_rate": 3.087928705796256e-06, "loss": 0.7367, "step": 2433 }, { "epoch": 0.7508869350609286, "grad_norm": 0.0, "learning_rate": 3.0807088288047447e-06, "loss": 0.6943, "step": 2434 }, { "epoch": 0.751195434212556, "grad_norm": 0.0, "learning_rate": 3.0734958649086878e-06, "loss": 0.6738, "step": 2435 }, { "epoch": 0.7515039333641832, "grad_norm": 0.0, "learning_rate": 3.0662898213145965e-06, "loss": 0.7519, "step": 2436 }, { "epoch": 0.7518124325158105, "grad_norm": 0.0, "learning_rate": 3.0590907052220566e-06, "loss": 0.6588, "step": 2437 }, { "epoch": 0.7521209316674379, "grad_norm": 0.0, "learning_rate": 3.051898523823743e-06, "loss": 0.6724, "step": 2438 }, { "epoch": 0.7524294308190652, "grad_norm": 0.0, "learning_rate": 3.044713284305395e-06, "loss": 0.7122, "step": 2439 }, { "epoch": 0.7527379299706926, "grad_norm": 0.0, "learning_rate": 3.037534993845818e-06, "loss": 0.6708, "step": 2440 }, { "epoch": 0.7530464291223199, "grad_norm": 0.0, "learning_rate": 3.0303636596168706e-06, "loss": 0.7263, "step": 2441 }, { "epoch": 0.7533549282739472, "grad_norm": 0.0, "learning_rate": 3.0231992887834725e-06, "loss": 0.7195, "step": 2442 }, { "epoch": 0.7536634274255746, "grad_norm": 0.0, "learning_rate": 3.016041888503578e-06, "loss": 0.7201, "step": 2443 }, { "epoch": 0.7539719265772019, "grad_norm": 0.0, "learning_rate": 3.008891465928172e-06, "loss": 0.7869, "step": 2444 }, { "epoch": 0.7542804257288293, "grad_norm": 0.0, "learning_rate": 3.0017480282012836e-06, "loss": 0.6756, "step": 2445 }, { "epoch": 0.7545889248804566, "grad_norm": 0.0, "learning_rate": 2.9946115824599465e-06, "loss": 0.7122, "step": 2446 }, { "epoch": 0.7548974240320839, "grad_norm": 0.0, "learning_rate": 2.987482135834223e-06, "loss": 0.6999, "step": 2447 }, { "epoch": 0.7552059231837113, "grad_norm": 0.0, "learning_rate": 2.9803596954471714e-06, "loss": 0.6999, "step": 2448 }, { "epoch": 0.7555144223353386, "grad_norm": 0.0, "learning_rate": 2.9732442684148533e-06, "loss": 0.7637, "step": 2449 }, { "epoch": 0.755822921486966, "grad_norm": 0.0, "learning_rate": 2.9661358618463297e-06, "loss": 0.724, "step": 2450 }, { "epoch": 0.7561314206385933, "grad_norm": 0.0, "learning_rate": 2.959034482843639e-06, "loss": 0.6431, "step": 2451 }, { "epoch": 0.7564399197902206, "grad_norm": 0.0, "learning_rate": 2.951940138501801e-06, "loss": 0.9853, "step": 2452 }, { "epoch": 0.7567484189418479, "grad_norm": 0.0, "learning_rate": 2.944852835908806e-06, "loss": 0.6493, "step": 2453 }, { "epoch": 0.7570569180934752, "grad_norm": 0.0, "learning_rate": 2.9377725821456138e-06, "loss": 0.6325, "step": 2454 }, { "epoch": 0.7573654172451025, "grad_norm": 0.0, "learning_rate": 2.9306993842861354e-06, "loss": 0.69, "step": 2455 }, { "epoch": 0.7576739163967299, "grad_norm": 0.0, "learning_rate": 2.9236332493972386e-06, "loss": 0.6958, "step": 2456 }, { "epoch": 0.7579824155483572, "grad_norm": 0.0, "learning_rate": 2.916574184538731e-06, "loss": 0.837, "step": 2457 }, { "epoch": 0.7582909146999846, "grad_norm": 0.0, "learning_rate": 2.9095221967633502e-06, "loss": 0.6747, "step": 2458 }, { "epoch": 0.7585994138516119, "grad_norm": 0.0, "learning_rate": 2.9024772931167777e-06, "loss": 0.6415, "step": 2459 }, { "epoch": 0.7589079130032392, "grad_norm": 0.0, "learning_rate": 2.8954394806376052e-06, "loss": 0.6934, "step": 2460 }, { "epoch": 0.7592164121548666, "grad_norm": 0.0, "learning_rate": 2.8884087663573403e-06, "loss": 0.6837, "step": 2461 }, { "epoch": 0.7595249113064939, "grad_norm": 0.0, "learning_rate": 2.881385157300408e-06, "loss": 0.7331, "step": 2462 }, { "epoch": 0.7598334104581213, "grad_norm": 0.0, "learning_rate": 2.8743686604841227e-06, "loss": 0.7135, "step": 2463 }, { "epoch": 0.7601419096097486, "grad_norm": 0.0, "learning_rate": 2.8673592829187057e-06, "loss": 0.765, "step": 2464 }, { "epoch": 0.7604504087613759, "grad_norm": 0.0, "learning_rate": 2.8603570316072484e-06, "loss": 0.7272, "step": 2465 }, { "epoch": 0.7607589079130033, "grad_norm": 0.0, "learning_rate": 2.8533619135457402e-06, "loss": 0.637, "step": 2466 }, { "epoch": 0.7610674070646306, "grad_norm": 0.0, "learning_rate": 2.846373935723029e-06, "loss": 0.7201, "step": 2467 }, { "epoch": 0.761375906216258, "grad_norm": 0.0, "learning_rate": 2.839393105120839e-06, "loss": 0.747, "step": 2468 }, { "epoch": 0.7616844053678853, "grad_norm": 0.0, "learning_rate": 2.8324194287137565e-06, "loss": 0.7197, "step": 2469 }, { "epoch": 0.7619929045195126, "grad_norm": 0.0, "learning_rate": 2.8254529134692e-06, "loss": 1.0135, "step": 2470 }, { "epoch": 0.7623014036711399, "grad_norm": 0.0, "learning_rate": 2.8184935663474567e-06, "loss": 0.7235, "step": 2471 }, { "epoch": 0.7626099028227672, "grad_norm": 0.0, "learning_rate": 2.8115413943016366e-06, "loss": 0.6625, "step": 2472 }, { "epoch": 0.7629184019743945, "grad_norm": 0.0, "learning_rate": 2.804596404277692e-06, "loss": 0.7001, "step": 2473 }, { "epoch": 0.7632269011260219, "grad_norm": 0.0, "learning_rate": 2.7976586032143914e-06, "loss": 0.6663, "step": 2474 }, { "epoch": 0.7635354002776492, "grad_norm": 0.0, "learning_rate": 2.7907279980433197e-06, "loss": 0.7304, "step": 2475 }, { "epoch": 0.7638438994292766, "grad_norm": 0.0, "learning_rate": 2.7838045956888836e-06, "loss": 0.7103, "step": 2476 }, { "epoch": 0.7641523985809039, "grad_norm": 0.0, "learning_rate": 2.776888403068282e-06, "loss": 0.6845, "step": 2477 }, { "epoch": 0.7644608977325312, "grad_norm": 0.0, "learning_rate": 2.769979427091515e-06, "loss": 0.6667, "step": 2478 }, { "epoch": 0.7647693968841586, "grad_norm": 0.0, "learning_rate": 2.7630776746613696e-06, "loss": 0.6478, "step": 2479 }, { "epoch": 0.7650778960357859, "grad_norm": 0.0, "learning_rate": 2.7561831526734207e-06, "loss": 0.7323, "step": 2480 }, { "epoch": 0.7653863951874132, "grad_norm": 0.0, "learning_rate": 2.749295868016022e-06, "loss": 0.6891, "step": 2481 }, { "epoch": 0.7656948943390406, "grad_norm": 0.0, "learning_rate": 2.7424158275702807e-06, "loss": 0.7104, "step": 2482 }, { "epoch": 0.7660033934906679, "grad_norm": 0.0, "learning_rate": 2.735543038210083e-06, "loss": 0.6838, "step": 2483 }, { "epoch": 0.7663118926422953, "grad_norm": 0.0, "learning_rate": 2.728677506802062e-06, "loss": 0.7601, "step": 2484 }, { "epoch": 0.7666203917939226, "grad_norm": 0.0, "learning_rate": 2.7218192402056052e-06, "loss": 0.6113, "step": 2485 }, { "epoch": 0.76692889094555, "grad_norm": 0.0, "learning_rate": 2.7149682452728354e-06, "loss": 0.6649, "step": 2486 }, { "epoch": 0.7672373900971773, "grad_norm": 0.0, "learning_rate": 2.70812452884861e-06, "loss": 0.7328, "step": 2487 }, { "epoch": 0.7675458892488045, "grad_norm": 0.0, "learning_rate": 2.7012880977705236e-06, "loss": 0.6515, "step": 2488 }, { "epoch": 0.7678543884004319, "grad_norm": 0.0, "learning_rate": 2.69445895886888e-06, "loss": 0.6507, "step": 2489 }, { "epoch": 0.7681628875520592, "grad_norm": 0.0, "learning_rate": 2.6876371189667107e-06, "loss": 1.0037, "step": 2490 }, { "epoch": 0.7684713867036865, "grad_norm": 0.0, "learning_rate": 2.6808225848797365e-06, "loss": 0.7197, "step": 2491 }, { "epoch": 0.7687798858553139, "grad_norm": 0.0, "learning_rate": 2.6740153634163955e-06, "loss": 0.6876, "step": 2492 }, { "epoch": 0.7690883850069412, "grad_norm": 0.0, "learning_rate": 2.6672154613778166e-06, "loss": 0.7228, "step": 2493 }, { "epoch": 0.7693968841585686, "grad_norm": 0.0, "learning_rate": 2.6604228855578096e-06, "loss": 0.6671, "step": 2494 }, { "epoch": 0.7697053833101959, "grad_norm": 0.0, "learning_rate": 2.653637642742869e-06, "loss": 0.7387, "step": 2495 }, { "epoch": 0.7700138824618232, "grad_norm": 0.0, "learning_rate": 2.6468597397121575e-06, "loss": 0.6826, "step": 2496 }, { "epoch": 0.7703223816134506, "grad_norm": 0.0, "learning_rate": 2.6400891832375163e-06, "loss": 0.7781, "step": 2497 }, { "epoch": 0.7706308807650779, "grad_norm": 0.0, "learning_rate": 2.6333259800834366e-06, "loss": 0.6769, "step": 2498 }, { "epoch": 0.7709393799167052, "grad_norm": 0.0, "learning_rate": 2.626570137007061e-06, "loss": 0.6654, "step": 2499 }, { "epoch": 0.7712478790683326, "grad_norm": 0.0, "learning_rate": 2.6198216607581907e-06, "loss": 0.6779, "step": 2500 }, { "epoch": 0.7715563782199599, "grad_norm": 0.0, "learning_rate": 2.613080558079253e-06, "loss": 0.6879, "step": 2501 }, { "epoch": 0.7718648773715873, "grad_norm": 0.0, "learning_rate": 2.606346835705319e-06, "loss": 0.7331, "step": 2502 }, { "epoch": 0.7721733765232146, "grad_norm": 0.0, "learning_rate": 2.5996205003640796e-06, "loss": 0.7353, "step": 2503 }, { "epoch": 0.772481875674842, "grad_norm": 0.0, "learning_rate": 2.5929015587758455e-06, "loss": 0.6572, "step": 2504 }, { "epoch": 0.7727903748264692, "grad_norm": 0.0, "learning_rate": 2.586190017653546e-06, "loss": 0.7388, "step": 2505 }, { "epoch": 0.7730988739780965, "grad_norm": 0.0, "learning_rate": 2.579485883702707e-06, "loss": 0.695, "step": 2506 }, { "epoch": 0.7734073731297239, "grad_norm": 0.0, "learning_rate": 2.5727891636214684e-06, "loss": 0.6227, "step": 2507 }, { "epoch": 0.7737158722813512, "grad_norm": 0.0, "learning_rate": 2.566099864100543e-06, "loss": 0.7622, "step": 2508 }, { "epoch": 0.7740243714329785, "grad_norm": 0.0, "learning_rate": 2.5594179918232476e-06, "loss": 0.7167, "step": 2509 }, { "epoch": 0.7743328705846059, "grad_norm": 0.0, "learning_rate": 2.5527435534654667e-06, "loss": 0.6815, "step": 2510 }, { "epoch": 0.7746413697362332, "grad_norm": 0.0, "learning_rate": 2.546076555695668e-06, "loss": 0.6534, "step": 2511 }, { "epoch": 0.7749498688878605, "grad_norm": 0.0, "learning_rate": 2.539417005174876e-06, "loss": 0.661, "step": 2512 }, { "epoch": 0.7752583680394879, "grad_norm": 0.0, "learning_rate": 2.5327649085566763e-06, "loss": 0.6576, "step": 2513 }, { "epoch": 0.7755668671911152, "grad_norm": 0.0, "learning_rate": 2.5261202724872126e-06, "loss": 0.7524, "step": 2514 }, { "epoch": 0.7758753663427426, "grad_norm": 0.0, "learning_rate": 2.5194831036051716e-06, "loss": 0.6885, "step": 2515 }, { "epoch": 0.7761838654943699, "grad_norm": 0.0, "learning_rate": 2.5128534085417734e-06, "loss": 0.6119, "step": 2516 }, { "epoch": 0.7764923646459972, "grad_norm": 0.0, "learning_rate": 2.506231193920783e-06, "loss": 0.6358, "step": 2517 }, { "epoch": 0.7768008637976246, "grad_norm": 0.0, "learning_rate": 2.49961646635848e-06, "loss": 0.684, "step": 2518 }, { "epoch": 0.7771093629492519, "grad_norm": 0.0, "learning_rate": 2.4930092324636744e-06, "loss": 0.7587, "step": 2519 }, { "epoch": 0.7774178621008793, "grad_norm": 0.0, "learning_rate": 2.486409498837683e-06, "loss": 0.7202, "step": 2520 }, { "epoch": 0.7777263612525066, "grad_norm": 0.0, "learning_rate": 2.4798172720743262e-06, "loss": 0.9812, "step": 2521 }, { "epoch": 0.7780348604041339, "grad_norm": 0.0, "learning_rate": 2.473232558759927e-06, "loss": 0.6815, "step": 2522 }, { "epoch": 0.7783433595557612, "grad_norm": 0.0, "learning_rate": 2.4666553654733095e-06, "loss": 0.7473, "step": 2523 }, { "epoch": 0.7786518587073885, "grad_norm": 0.0, "learning_rate": 2.4600856987857725e-06, "loss": 0.692, "step": 2524 }, { "epoch": 0.7789603578590159, "grad_norm": 0.0, "learning_rate": 2.4535235652610976e-06, "loss": 0.7427, "step": 2525 }, { "epoch": 0.7792688570106432, "grad_norm": 0.0, "learning_rate": 2.44696897145555e-06, "loss": 0.6775, "step": 2526 }, { "epoch": 0.7795773561622705, "grad_norm": 0.0, "learning_rate": 2.440421923917846e-06, "loss": 0.63, "step": 2527 }, { "epoch": 0.7798858553138979, "grad_norm": 0.0, "learning_rate": 2.4338824291891795e-06, "loss": 0.6496, "step": 2528 }, { "epoch": 0.7801943544655252, "grad_norm": 0.0, "learning_rate": 2.4273504938031835e-06, "loss": 0.7412, "step": 2529 }, { "epoch": 0.7805028536171525, "grad_norm": 0.0, "learning_rate": 2.420826124285943e-06, "loss": 0.797, "step": 2530 }, { "epoch": 0.7808113527687799, "grad_norm": 0.0, "learning_rate": 2.4143093271559925e-06, "loss": 0.7071, "step": 2531 }, { "epoch": 0.7811198519204072, "grad_norm": 0.0, "learning_rate": 2.4078001089242887e-06, "loss": 0.6181, "step": 2532 }, { "epoch": 0.7814283510720346, "grad_norm": 0.0, "learning_rate": 2.4012984760942224e-06, "loss": 0.6362, "step": 2533 }, { "epoch": 0.7817368502236619, "grad_norm": 0.0, "learning_rate": 2.394804435161603e-06, "loss": 0.6895, "step": 2534 }, { "epoch": 0.7820453493752892, "grad_norm": 0.0, "learning_rate": 2.3883179926146593e-06, "loss": 0.6591, "step": 2535 }, { "epoch": 0.7823538485269166, "grad_norm": 0.0, "learning_rate": 2.3818391549340225e-06, "loss": 0.7316, "step": 2536 }, { "epoch": 0.7826623476785439, "grad_norm": 0.0, "learning_rate": 2.3753679285927345e-06, "loss": 0.6605, "step": 2537 }, { "epoch": 0.7829708468301713, "grad_norm": 0.0, "learning_rate": 2.368904320056222e-06, "loss": 0.7423, "step": 2538 }, { "epoch": 0.7832793459817986, "grad_norm": 0.0, "learning_rate": 2.362448335782307e-06, "loss": 0.6575, "step": 2539 }, { "epoch": 0.7835878451334258, "grad_norm": 0.0, "learning_rate": 2.3559999822211943e-06, "loss": 0.7509, "step": 2540 }, { "epoch": 0.7838963442850532, "grad_norm": 0.0, "learning_rate": 2.349559265815463e-06, "loss": 0.6073, "step": 2541 }, { "epoch": 0.7842048434366805, "grad_norm": 0.0, "learning_rate": 2.3431261930000602e-06, "loss": 0.7227, "step": 2542 }, { "epoch": 0.7845133425883078, "grad_norm": 0.0, "learning_rate": 2.3367007702023016e-06, "loss": 0.6742, "step": 2543 }, { "epoch": 0.7848218417399352, "grad_norm": 0.0, "learning_rate": 2.330283003841853e-06, "loss": 0.8234, "step": 2544 }, { "epoch": 0.7851303408915625, "grad_norm": 0.0, "learning_rate": 2.3238729003307412e-06, "loss": 0.9959, "step": 2545 }, { "epoch": 0.7854388400431899, "grad_norm": 0.0, "learning_rate": 2.3174704660733192e-06, "loss": 0.7392, "step": 2546 }, { "epoch": 0.7857473391948172, "grad_norm": 0.0, "learning_rate": 2.311075707466297e-06, "loss": 0.7615, "step": 2547 }, { "epoch": 0.7860558383464445, "grad_norm": 0.0, "learning_rate": 2.3046886308987017e-06, "loss": 0.6454, "step": 2548 }, { "epoch": 0.7863643374980719, "grad_norm": 0.0, "learning_rate": 2.298309242751896e-06, "loss": 0.6645, "step": 2549 }, { "epoch": 0.7866728366496992, "grad_norm": 0.0, "learning_rate": 2.291937549399552e-06, "loss": 0.7703, "step": 2550 }, { "epoch": 0.7869813358013266, "grad_norm": 0.0, "learning_rate": 2.2855735572076564e-06, "loss": 0.6995, "step": 2551 }, { "epoch": 0.7872898349529539, "grad_norm": 0.0, "learning_rate": 2.2792172725345084e-06, "loss": 0.6252, "step": 2552 }, { "epoch": 0.7875983341045812, "grad_norm": 0.0, "learning_rate": 2.272868701730695e-06, "loss": 0.6929, "step": 2553 }, { "epoch": 0.7879068332562086, "grad_norm": 0.0, "learning_rate": 2.2665278511391075e-06, "loss": 0.6335, "step": 2554 }, { "epoch": 0.7882153324078359, "grad_norm": 0.0, "learning_rate": 2.2601947270949164e-06, "loss": 0.7216, "step": 2555 }, { "epoch": 0.7885238315594633, "grad_norm": 0.0, "learning_rate": 2.2538693359255724e-06, "loss": 0.7121, "step": 2556 }, { "epoch": 0.7888323307110905, "grad_norm": 0.0, "learning_rate": 2.2475516839508072e-06, "loss": 0.6249, "step": 2557 }, { "epoch": 0.7891408298627178, "grad_norm": 0.0, "learning_rate": 2.2412417774826144e-06, "loss": 0.6763, "step": 2558 }, { "epoch": 0.7894493290143452, "grad_norm": 0.0, "learning_rate": 2.2349396228252506e-06, "loss": 0.7082, "step": 2559 }, { "epoch": 0.7897578281659725, "grad_norm": 0.0, "learning_rate": 2.2286452262752223e-06, "loss": 0.7742, "step": 2560 }, { "epoch": 0.7900663273175998, "grad_norm": 0.0, "learning_rate": 2.2223585941212956e-06, "loss": 0.6681, "step": 2561 }, { "epoch": 0.7903748264692272, "grad_norm": 0.0, "learning_rate": 2.216079732644476e-06, "loss": 0.7311, "step": 2562 }, { "epoch": 0.7906833256208545, "grad_norm": 0.0, "learning_rate": 2.2098086481179947e-06, "loss": 0.6645, "step": 2563 }, { "epoch": 0.7909918247724819, "grad_norm": 0.0, "learning_rate": 2.2035453468073263e-06, "loss": 0.557, "step": 2564 }, { "epoch": 0.7913003239241092, "grad_norm": 0.0, "learning_rate": 2.197289834970161e-06, "loss": 0.6466, "step": 2565 }, { "epoch": 0.7916088230757365, "grad_norm": 0.0, "learning_rate": 2.1910421188564134e-06, "loss": 0.6967, "step": 2566 }, { "epoch": 0.7919173222273639, "grad_norm": 0.0, "learning_rate": 2.184802204708202e-06, "loss": 0.7423, "step": 2567 }, { "epoch": 0.7922258213789912, "grad_norm": 0.0, "learning_rate": 2.178570098759851e-06, "loss": 0.7138, "step": 2568 }, { "epoch": 0.7925343205306186, "grad_norm": 0.0, "learning_rate": 2.1723458072378924e-06, "loss": 0.6411, "step": 2569 }, { "epoch": 0.7928428196822459, "grad_norm": 0.0, "learning_rate": 2.166129336361038e-06, "loss": 0.8109, "step": 2570 }, { "epoch": 0.7931513188338732, "grad_norm": 0.0, "learning_rate": 2.159920692340202e-06, "loss": 0.7287, "step": 2571 }, { "epoch": 0.7934598179855006, "grad_norm": 0.0, "learning_rate": 2.1537198813784554e-06, "loss": 0.7112, "step": 2572 }, { "epoch": 0.7937683171371279, "grad_norm": 0.0, "learning_rate": 2.147526909671064e-06, "loss": 0.6677, "step": 2573 }, { "epoch": 0.7940768162887553, "grad_norm": 0.0, "learning_rate": 2.141341783405457e-06, "loss": 0.7404, "step": 2574 }, { "epoch": 0.7943853154403825, "grad_norm": 0.0, "learning_rate": 2.1351645087612195e-06, "loss": 0.7257, "step": 2575 }, { "epoch": 0.7946938145920098, "grad_norm": 0.0, "learning_rate": 2.1289950919100932e-06, "loss": 0.7316, "step": 2576 }, { "epoch": 0.7950023137436372, "grad_norm": 0.0, "learning_rate": 2.1228335390159704e-06, "loss": 0.7996, "step": 2577 }, { "epoch": 0.7953108128952645, "grad_norm": 0.0, "learning_rate": 2.116679856234892e-06, "loss": 0.7102, "step": 2578 }, { "epoch": 0.7956193120468918, "grad_norm": 0.0, "learning_rate": 2.1105340497150264e-06, "loss": 0.7584, "step": 2579 }, { "epoch": 0.7959278111985192, "grad_norm": 0.0, "learning_rate": 2.104396125596675e-06, "loss": 0.7088, "step": 2580 }, { "epoch": 0.7962363103501465, "grad_norm": 0.0, "learning_rate": 2.098266090012271e-06, "loss": 0.6948, "step": 2581 }, { "epoch": 0.7965448095017739, "grad_norm": 0.0, "learning_rate": 2.092143949086356e-06, "loss": 0.6375, "step": 2582 }, { "epoch": 0.7968533086534012, "grad_norm": 0.0, "learning_rate": 2.0860297089355943e-06, "loss": 0.7181, "step": 2583 }, { "epoch": 0.7971618078050285, "grad_norm": 0.0, "learning_rate": 2.0799233756687477e-06, "loss": 0.7211, "step": 2584 }, { "epoch": 0.7974703069566559, "grad_norm": 0.0, "learning_rate": 2.0738249553866807e-06, "loss": 0.6163, "step": 2585 }, { "epoch": 0.7977788061082832, "grad_norm": 0.0, "learning_rate": 2.0677344541823573e-06, "loss": 0.7215, "step": 2586 }, { "epoch": 0.7980873052599106, "grad_norm": 0.0, "learning_rate": 2.061651878140819e-06, "loss": 0.7583, "step": 2587 }, { "epoch": 0.7983958044115379, "grad_norm": 0.0, "learning_rate": 2.0555772333392055e-06, "loss": 0.7149, "step": 2588 }, { "epoch": 0.7987043035631652, "grad_norm": 0.0, "learning_rate": 2.0495105258467085e-06, "loss": 0.7232, "step": 2589 }, { "epoch": 0.7990128027147926, "grad_norm": 0.0, "learning_rate": 2.043451761724614e-06, "loss": 0.7209, "step": 2590 }, { "epoch": 0.7993213018664199, "grad_norm": 0.0, "learning_rate": 2.0374009470262545e-06, "loss": 0.7242, "step": 2591 }, { "epoch": 0.7996298010180471, "grad_norm": 0.0, "learning_rate": 2.031358087797032e-06, "loss": 0.7512, "step": 2592 }, { "epoch": 0.7999383001696745, "grad_norm": 0.0, "learning_rate": 2.0253231900743906e-06, "loss": 0.6766, "step": 2593 }, { "epoch": 0.8002467993213018, "grad_norm": 0.0, "learning_rate": 2.019296259887825e-06, "loss": 0.7268, "step": 2594 }, { "epoch": 0.8005552984729292, "grad_norm": 0.0, "learning_rate": 2.0132773032588717e-06, "loss": 0.7501, "step": 2595 }, { "epoch": 0.8008637976245565, "grad_norm": 0.0, "learning_rate": 2.007266326201095e-06, "loss": 0.7455, "step": 2596 }, { "epoch": 0.8011722967761838, "grad_norm": 0.0, "learning_rate": 2.0012633347200873e-06, "loss": 0.6826, "step": 2597 }, { "epoch": 0.8014807959278112, "grad_norm": 0.0, "learning_rate": 1.9952683348134704e-06, "loss": 0.6854, "step": 2598 }, { "epoch": 0.8017892950794385, "grad_norm": 0.0, "learning_rate": 1.9892813324708705e-06, "loss": 0.7561, "step": 2599 }, { "epoch": 0.8020977942310659, "grad_norm": 0.0, "learning_rate": 1.9833023336739354e-06, "loss": 0.654, "step": 2600 }, { "epoch": 0.8024062933826932, "grad_norm": 0.0, "learning_rate": 1.977331344396306e-06, "loss": 0.6279, "step": 2601 }, { "epoch": 0.8027147925343205, "grad_norm": 0.0, "learning_rate": 1.971368370603626e-06, "loss": 0.7695, "step": 2602 }, { "epoch": 0.8030232916859479, "grad_norm": 0.0, "learning_rate": 1.9654134182535267e-06, "loss": 0.7384, "step": 2603 }, { "epoch": 0.8033317908375752, "grad_norm": 0.0, "learning_rate": 1.959466493295633e-06, "loss": 0.6961, "step": 2604 }, { "epoch": 0.8036402899892026, "grad_norm": 0.0, "learning_rate": 1.953527601671543e-06, "loss": 0.7267, "step": 2605 }, { "epoch": 0.8039487891408299, "grad_norm": 0.0, "learning_rate": 1.9475967493148283e-06, "loss": 0.6406, "step": 2606 }, { "epoch": 0.8042572882924572, "grad_norm": 0.0, "learning_rate": 1.9416739421510354e-06, "loss": 0.674, "step": 2607 }, { "epoch": 0.8045657874440846, "grad_norm": 0.0, "learning_rate": 1.935759186097662e-06, "loss": 0.63, "step": 2608 }, { "epoch": 0.8048742865957118, "grad_norm": 0.0, "learning_rate": 1.9298524870641734e-06, "loss": 0.712, "step": 2609 }, { "epoch": 0.8051827857473391, "grad_norm": 0.0, "learning_rate": 1.9239538509519784e-06, "loss": 0.7013, "step": 2610 }, { "epoch": 0.8054912848989665, "grad_norm": 0.0, "learning_rate": 1.9180632836544268e-06, "loss": 0.7126, "step": 2611 }, { "epoch": 0.8057997840505938, "grad_norm": 0.0, "learning_rate": 1.912180791056817e-06, "loss": 0.6296, "step": 2612 }, { "epoch": 0.8061082832022212, "grad_norm": 0.0, "learning_rate": 1.9063063790363733e-06, "loss": 0.6604, "step": 2613 }, { "epoch": 0.8064167823538485, "grad_norm": 0.0, "learning_rate": 1.9004400534622458e-06, "loss": 0.7148, "step": 2614 }, { "epoch": 0.8067252815054758, "grad_norm": 0.0, "learning_rate": 1.8945818201955047e-06, "loss": 0.7608, "step": 2615 }, { "epoch": 0.8070337806571032, "grad_norm": 0.0, "learning_rate": 1.8887316850891446e-06, "loss": 0.6483, "step": 2616 }, { "epoch": 0.8073422798087305, "grad_norm": 0.0, "learning_rate": 1.8828896539880549e-06, "loss": 0.755, "step": 2617 }, { "epoch": 0.8076507789603579, "grad_norm": 0.0, "learning_rate": 1.8770557327290418e-06, "loss": 0.7258, "step": 2618 }, { "epoch": 0.8079592781119852, "grad_norm": 0.0, "learning_rate": 1.8712299271407998e-06, "loss": 0.597, "step": 2619 }, { "epoch": 0.8082677772636125, "grad_norm": 0.0, "learning_rate": 1.865412243043916e-06, "loss": 0.7922, "step": 2620 }, { "epoch": 0.8085762764152399, "grad_norm": 0.0, "learning_rate": 1.8596026862508697e-06, "loss": 0.7384, "step": 2621 }, { "epoch": 0.8088847755668672, "grad_norm": 0.0, "learning_rate": 1.853801262566014e-06, "loss": 0.6699, "step": 2622 }, { "epoch": 0.8091932747184946, "grad_norm": 0.0, "learning_rate": 1.848007977785573e-06, "loss": 0.6537, "step": 2623 }, { "epoch": 0.8095017738701219, "grad_norm": 0.0, "learning_rate": 1.8422228376976526e-06, "loss": 0.6377, "step": 2624 }, { "epoch": 0.8098102730217492, "grad_norm": 0.0, "learning_rate": 1.8364458480822045e-06, "loss": 0.7561, "step": 2625 }, { "epoch": 0.8101187721733766, "grad_norm": 0.0, "learning_rate": 1.8306770147110542e-06, "loss": 0.6598, "step": 2626 }, { "epoch": 0.8104272713250038, "grad_norm": 0.0, "learning_rate": 1.8249163433478601e-06, "loss": 0.665, "step": 2627 }, { "epoch": 0.8107357704766311, "grad_norm": 0.0, "learning_rate": 1.8191638397481415e-06, "loss": 0.7308, "step": 2628 }, { "epoch": 0.8110442696282585, "grad_norm": 0.0, "learning_rate": 1.8134195096592466e-06, "loss": 0.7206, "step": 2629 }, { "epoch": 0.8113527687798858, "grad_norm": 0.0, "learning_rate": 1.807683358820368e-06, "loss": 0.722, "step": 2630 }, { "epoch": 0.8116612679315132, "grad_norm": 0.0, "learning_rate": 1.8019553929625166e-06, "loss": 0.6939, "step": 2631 }, { "epoch": 0.8119697670831405, "grad_norm": 0.0, "learning_rate": 1.796235617808526e-06, "loss": 0.6726, "step": 2632 }, { "epoch": 0.8122782662347678, "grad_norm": 0.0, "learning_rate": 1.7905240390730571e-06, "loss": 0.6318, "step": 2633 }, { "epoch": 0.8125867653863952, "grad_norm": 0.0, "learning_rate": 1.7848206624625675e-06, "loss": 0.7716, "step": 2634 }, { "epoch": 0.8128952645380225, "grad_norm": 0.0, "learning_rate": 1.7791254936753323e-06, "loss": 0.6721, "step": 2635 }, { "epoch": 0.8132037636896499, "grad_norm": 0.0, "learning_rate": 1.7734385384014175e-06, "loss": 0.6664, "step": 2636 }, { "epoch": 0.8135122628412772, "grad_norm": 0.0, "learning_rate": 1.767759802322685e-06, "loss": 0.6266, "step": 2637 }, { "epoch": 0.8138207619929045, "grad_norm": 0.0, "learning_rate": 1.7620892911127897e-06, "loss": 0.6798, "step": 2638 }, { "epoch": 0.8141292611445319, "grad_norm": 0.0, "learning_rate": 1.7564270104371638e-06, "loss": 0.6637, "step": 2639 }, { "epoch": 0.8144377602961592, "grad_norm": 0.0, "learning_rate": 1.750772965953017e-06, "loss": 0.7297, "step": 2640 }, { "epoch": 0.8147462594477866, "grad_norm": 0.0, "learning_rate": 1.7451271633093292e-06, "loss": 0.6687, "step": 2641 }, { "epoch": 0.8150547585994139, "grad_norm": 0.0, "learning_rate": 1.7394896081468527e-06, "loss": 0.7067, "step": 2642 }, { "epoch": 0.8153632577510412, "grad_norm": 0.0, "learning_rate": 1.733860306098092e-06, "loss": 1.034, "step": 2643 }, { "epoch": 0.8156717569026685, "grad_norm": 0.0, "learning_rate": 1.7282392627873046e-06, "loss": 0.6971, "step": 2644 }, { "epoch": 0.8159802560542958, "grad_norm": 0.0, "learning_rate": 1.7226264838305074e-06, "loss": 0.7338, "step": 2645 }, { "epoch": 0.8162887552059231, "grad_norm": 0.0, "learning_rate": 1.7170219748354488e-06, "loss": 0.7389, "step": 2646 }, { "epoch": 0.8165972543575505, "grad_norm": 0.0, "learning_rate": 1.7114257414016223e-06, "loss": 0.7335, "step": 2647 }, { "epoch": 0.8169057535091778, "grad_norm": 0.0, "learning_rate": 1.7058377891202493e-06, "loss": 0.6609, "step": 2648 }, { "epoch": 0.8172142526608052, "grad_norm": 0.0, "learning_rate": 1.7002581235742766e-06, "loss": 0.6985, "step": 2649 }, { "epoch": 0.8175227518124325, "grad_norm": 0.0, "learning_rate": 1.6946867503383768e-06, "loss": 0.7653, "step": 2650 }, { "epoch": 0.8178312509640598, "grad_norm": 0.0, "learning_rate": 1.6891236749789297e-06, "loss": 0.7605, "step": 2651 }, { "epoch": 0.8181397501156872, "grad_norm": 0.0, "learning_rate": 1.6835689030540382e-06, "loss": 0.6845, "step": 2652 }, { "epoch": 0.8184482492673145, "grad_norm": 0.0, "learning_rate": 1.6780224401134903e-06, "loss": 0.687, "step": 2653 }, { "epoch": 0.8187567484189419, "grad_norm": 0.0, "learning_rate": 1.6724842916987882e-06, "loss": 0.6749, "step": 2654 }, { "epoch": 0.8190652475705692, "grad_norm": 0.0, "learning_rate": 1.6669544633431178e-06, "loss": 0.6974, "step": 2655 }, { "epoch": 0.8193737467221965, "grad_norm": 0.0, "learning_rate": 1.6614329605713597e-06, "loss": 0.6402, "step": 2656 }, { "epoch": 0.8196822458738239, "grad_norm": 0.0, "learning_rate": 1.655919788900071e-06, "loss": 0.6724, "step": 2657 }, { "epoch": 0.8199907450254512, "grad_norm": 0.0, "learning_rate": 1.6504149538374848e-06, "loss": 0.7253, "step": 2658 }, { "epoch": 0.8202992441770786, "grad_norm": 0.0, "learning_rate": 1.6449184608835112e-06, "loss": 0.7499, "step": 2659 }, { "epoch": 0.8206077433287059, "grad_norm": 0.0, "learning_rate": 1.6394303155297186e-06, "loss": 0.6714, "step": 2660 }, { "epoch": 0.8209162424803331, "grad_norm": 0.0, "learning_rate": 1.6339505232593356e-06, "loss": 0.6722, "step": 2661 }, { "epoch": 0.8212247416319605, "grad_norm": 0.0, "learning_rate": 1.6284790895472536e-06, "loss": 0.7015, "step": 2662 }, { "epoch": 0.8215332407835878, "grad_norm": 0.0, "learning_rate": 1.623016019860002e-06, "loss": 0.6303, "step": 2663 }, { "epoch": 0.8218417399352151, "grad_norm": 0.0, "learning_rate": 1.6175613196557648e-06, "loss": 0.7468, "step": 2664 }, { "epoch": 0.8221502390868425, "grad_norm": 0.0, "learning_rate": 1.6121149943843494e-06, "loss": 0.6882, "step": 2665 }, { "epoch": 0.8224587382384698, "grad_norm": 0.0, "learning_rate": 1.606677049487212e-06, "loss": 0.6778, "step": 2666 }, { "epoch": 0.8227672373900972, "grad_norm": 0.0, "learning_rate": 1.6012474903974218e-06, "loss": 0.6876, "step": 2667 }, { "epoch": 0.8230757365417245, "grad_norm": 0.0, "learning_rate": 1.5958263225396796e-06, "loss": 0.6676, "step": 2668 }, { "epoch": 0.8233842356933518, "grad_norm": 0.0, "learning_rate": 1.5904135513303032e-06, "loss": 0.6778, "step": 2669 }, { "epoch": 0.8236927348449792, "grad_norm": 0.0, "learning_rate": 1.5850091821772085e-06, "loss": 0.7142, "step": 2670 }, { "epoch": 0.8240012339966065, "grad_norm": 0.0, "learning_rate": 1.579613220479932e-06, "loss": 0.6722, "step": 2671 }, { "epoch": 0.8243097331482339, "grad_norm": 0.0, "learning_rate": 1.5742256716295978e-06, "loss": 0.657, "step": 2672 }, { "epoch": 0.8246182322998612, "grad_norm": 0.0, "learning_rate": 1.5688465410089371e-06, "loss": 0.7935, "step": 2673 }, { "epoch": 0.8249267314514885, "grad_norm": 0.0, "learning_rate": 1.5634758339922619e-06, "loss": 0.7298, "step": 2674 }, { "epoch": 0.8252352306031159, "grad_norm": 0.0, "learning_rate": 1.5581135559454653e-06, "loss": 0.6653, "step": 2675 }, { "epoch": 0.8255437297547432, "grad_norm": 0.0, "learning_rate": 1.5527597122260307e-06, "loss": 0.6033, "step": 2676 }, { "epoch": 0.8258522289063706, "grad_norm": 0.0, "learning_rate": 1.547414308183005e-06, "loss": 0.6975, "step": 2677 }, { "epoch": 0.8261607280579979, "grad_norm": 0.0, "learning_rate": 1.542077349157005e-06, "loss": 0.6166, "step": 2678 }, { "epoch": 0.8264692272096251, "grad_norm": 0.0, "learning_rate": 1.536748840480209e-06, "loss": 0.7719, "step": 2679 }, { "epoch": 0.8267777263612525, "grad_norm": 0.0, "learning_rate": 1.5314287874763578e-06, "loss": 0.7122, "step": 2680 }, { "epoch": 0.8270862255128798, "grad_norm": 0.0, "learning_rate": 1.5261171954607435e-06, "loss": 0.7992, "step": 2681 }, { "epoch": 0.8273947246645071, "grad_norm": 0.0, "learning_rate": 1.5208140697401942e-06, "loss": 0.7702, "step": 2682 }, { "epoch": 0.8277032238161345, "grad_norm": 0.0, "learning_rate": 1.5155194156130936e-06, "loss": 0.6515, "step": 2683 }, { "epoch": 0.8280117229677618, "grad_norm": 0.0, "learning_rate": 1.5102332383693496e-06, "loss": 0.6377, "step": 2684 }, { "epoch": 0.8283202221193892, "grad_norm": 0.0, "learning_rate": 1.5049555432904118e-06, "loss": 0.6163, "step": 2685 }, { "epoch": 0.8286287212710165, "grad_norm": 0.0, "learning_rate": 1.4996863356492464e-06, "loss": 0.6824, "step": 2686 }, { "epoch": 0.8289372204226438, "grad_norm": 0.0, "learning_rate": 1.4944256207103403e-06, "loss": 0.734, "step": 2687 }, { "epoch": 0.8292457195742712, "grad_norm": 0.0, "learning_rate": 1.4891734037297035e-06, "loss": 0.7307, "step": 2688 }, { "epoch": 0.8295542187258985, "grad_norm": 0.0, "learning_rate": 1.4839296899548438e-06, "loss": 0.6306, "step": 2689 }, { "epoch": 0.8298627178775259, "grad_norm": 0.0, "learning_rate": 1.4786944846247887e-06, "loss": 0.7072, "step": 2690 }, { "epoch": 0.8301712170291532, "grad_norm": 0.0, "learning_rate": 1.4734677929700435e-06, "loss": 0.7571, "step": 2691 }, { "epoch": 0.8304797161807805, "grad_norm": 0.0, "learning_rate": 1.4682496202126283e-06, "loss": 0.723, "step": 2692 }, { "epoch": 0.8307882153324079, "grad_norm": 0.0, "learning_rate": 1.4630399715660425e-06, "loss": 0.7182, "step": 2693 }, { "epoch": 0.8310967144840352, "grad_norm": 0.0, "learning_rate": 1.4578388522352682e-06, "loss": 0.623, "step": 2694 }, { "epoch": 0.8314052136356626, "grad_norm": 0.0, "learning_rate": 1.4526462674167685e-06, "loss": 0.6567, "step": 2695 }, { "epoch": 0.8317137127872898, "grad_norm": 0.0, "learning_rate": 1.447462222298477e-06, "loss": 0.662, "step": 2696 }, { "epoch": 0.8320222119389171, "grad_norm": 0.0, "learning_rate": 1.442286722059799e-06, "loss": 0.7251, "step": 2697 }, { "epoch": 0.8323307110905445, "grad_norm": 0.0, "learning_rate": 1.4371197718715967e-06, "loss": 0.6979, "step": 2698 }, { "epoch": 0.8326392102421718, "grad_norm": 0.0, "learning_rate": 1.4319613768962003e-06, "loss": 0.6503, "step": 2699 }, { "epoch": 0.8329477093937991, "grad_norm": 0.0, "learning_rate": 1.4268115422873807e-06, "loss": 0.7083, "step": 2700 }, { "epoch": 0.8332562085454265, "grad_norm": 0.0, "learning_rate": 1.4216702731903608e-06, "loss": 0.6817, "step": 2701 }, { "epoch": 0.8335647076970538, "grad_norm": 0.0, "learning_rate": 1.4165375747418097e-06, "loss": 0.7009, "step": 2702 }, { "epoch": 0.8338732068486812, "grad_norm": 0.0, "learning_rate": 1.4114134520698286e-06, "loss": 0.7503, "step": 2703 }, { "epoch": 0.8341817060003085, "grad_norm": 0.0, "learning_rate": 1.4062979102939478e-06, "loss": 0.6682, "step": 2704 }, { "epoch": 0.8344902051519358, "grad_norm": 0.0, "learning_rate": 1.4011909545251357e-06, "loss": 0.6859, "step": 2705 }, { "epoch": 0.8347987043035632, "grad_norm": 0.0, "learning_rate": 1.3960925898657674e-06, "loss": 0.7062, "step": 2706 }, { "epoch": 0.8351072034551905, "grad_norm": 0.0, "learning_rate": 1.3910028214096517e-06, "loss": 0.6987, "step": 2707 }, { "epoch": 0.8354157026068179, "grad_norm": 0.0, "learning_rate": 1.3859216542419907e-06, "loss": 0.6976, "step": 2708 }, { "epoch": 0.8357242017584452, "grad_norm": 0.0, "learning_rate": 1.3808490934394058e-06, "loss": 0.7817, "step": 2709 }, { "epoch": 0.8360327009100725, "grad_norm": 0.0, "learning_rate": 1.3757851440699132e-06, "loss": 0.6481, "step": 2710 }, { "epoch": 0.8363412000616999, "grad_norm": 0.0, "learning_rate": 1.3707298111929335e-06, "loss": 0.7614, "step": 2711 }, { "epoch": 0.8366496992133272, "grad_norm": 0.0, "learning_rate": 1.3656830998592674e-06, "loss": 0.6758, "step": 2712 }, { "epoch": 0.8369581983649544, "grad_norm": 0.0, "learning_rate": 1.3606450151111083e-06, "loss": 0.6121, "step": 2713 }, { "epoch": 0.8372666975165818, "grad_norm": 0.0, "learning_rate": 1.3556155619820322e-06, "loss": 0.6688, "step": 2714 }, { "epoch": 0.8375751966682091, "grad_norm": 0.0, "learning_rate": 1.3505947454969849e-06, "loss": 0.6738, "step": 2715 }, { "epoch": 0.8378836958198365, "grad_norm": 0.0, "learning_rate": 1.3455825706722913e-06, "loss": 0.6668, "step": 2716 }, { "epoch": 0.8381921949714638, "grad_norm": 0.0, "learning_rate": 1.340579042515636e-06, "loss": 0.7641, "step": 2717 }, { "epoch": 0.8385006941230911, "grad_norm": 0.0, "learning_rate": 1.3355841660260648e-06, "loss": 0.677, "step": 2718 }, { "epoch": 0.8388091932747185, "grad_norm": 0.0, "learning_rate": 1.3305979461939866e-06, "loss": 0.7054, "step": 2719 }, { "epoch": 0.8391176924263458, "grad_norm": 0.0, "learning_rate": 1.3256203880011531e-06, "loss": 0.7803, "step": 2720 }, { "epoch": 0.8394261915779732, "grad_norm": 0.0, "learning_rate": 1.3206514964206664e-06, "loss": 0.6283, "step": 2721 }, { "epoch": 0.8397346907296005, "grad_norm": 0.0, "learning_rate": 1.315691276416966e-06, "loss": 0.6933, "step": 2722 }, { "epoch": 0.8400431898812278, "grad_norm": 0.0, "learning_rate": 1.3107397329458348e-06, "loss": 0.6158, "step": 2723 }, { "epoch": 0.8403516890328552, "grad_norm": 0.0, "learning_rate": 1.3057968709543812e-06, "loss": 0.6628, "step": 2724 }, { "epoch": 0.8406601881844825, "grad_norm": 0.0, "learning_rate": 1.3008626953810388e-06, "loss": 0.7122, "step": 2725 }, { "epoch": 0.8409686873361099, "grad_norm": 0.0, "learning_rate": 1.295937211155568e-06, "loss": 0.7095, "step": 2726 }, { "epoch": 0.8412771864877372, "grad_norm": 0.0, "learning_rate": 1.2910204231990397e-06, "loss": 0.6882, "step": 2727 }, { "epoch": 0.8415856856393645, "grad_norm": 0.0, "learning_rate": 1.2861123364238438e-06, "loss": 0.7383, "step": 2728 }, { "epoch": 0.8418941847909919, "grad_norm": 0.0, "learning_rate": 1.2812129557336684e-06, "loss": 0.7447, "step": 2729 }, { "epoch": 0.8422026839426192, "grad_norm": 0.0, "learning_rate": 1.276322286023506e-06, "loss": 0.6829, "step": 2730 }, { "epoch": 0.8425111830942464, "grad_norm": 0.0, "learning_rate": 1.271440332179652e-06, "loss": 0.6144, "step": 2731 }, { "epoch": 0.8428196822458738, "grad_norm": 0.0, "learning_rate": 1.266567099079683e-06, "loss": 0.7344, "step": 2732 }, { "epoch": 0.8431281813975011, "grad_norm": 0.0, "learning_rate": 1.2617025915924764e-06, "loss": 0.6714, "step": 2733 }, { "epoch": 0.8434366805491285, "grad_norm": 0.0, "learning_rate": 1.2568468145781753e-06, "loss": 0.6946, "step": 2734 }, { "epoch": 0.8437451797007558, "grad_norm": 0.0, "learning_rate": 1.2519997728882138e-06, "loss": 0.7134, "step": 2735 }, { "epoch": 0.8440536788523831, "grad_norm": 0.0, "learning_rate": 1.2471614713652902e-06, "loss": 0.7373, "step": 2736 }, { "epoch": 0.8443621780040105, "grad_norm": 0.0, "learning_rate": 1.2423319148433777e-06, "loss": 0.7617, "step": 2737 }, { "epoch": 0.8446706771556378, "grad_norm": 0.0, "learning_rate": 1.2375111081477065e-06, "loss": 0.6986, "step": 2738 }, { "epoch": 0.8449791763072652, "grad_norm": 0.0, "learning_rate": 1.2326990560947627e-06, "loss": 0.6288, "step": 2739 }, { "epoch": 0.8452876754588925, "grad_norm": 0.0, "learning_rate": 1.2278957634922972e-06, "loss": 0.6866, "step": 2740 }, { "epoch": 0.8455961746105198, "grad_norm": 0.0, "learning_rate": 1.2231012351392958e-06, "loss": 0.664, "step": 2741 }, { "epoch": 0.8459046737621472, "grad_norm": 0.0, "learning_rate": 1.218315475825994e-06, "loss": 0.6362, "step": 2742 }, { "epoch": 0.8462131729137745, "grad_norm": 0.0, "learning_rate": 1.2135384903338699e-06, "loss": 0.7224, "step": 2743 }, { "epoch": 0.8465216720654019, "grad_norm": 0.0, "learning_rate": 1.2087702834356264e-06, "loss": 0.6184, "step": 2744 }, { "epoch": 0.8468301712170292, "grad_norm": 0.0, "learning_rate": 1.2040108598952072e-06, "loss": 0.7239, "step": 2745 }, { "epoch": 0.8471386703686565, "grad_norm": 0.0, "learning_rate": 1.1992602244677653e-06, "loss": 0.7085, "step": 2746 }, { "epoch": 0.8474471695202839, "grad_norm": 0.0, "learning_rate": 1.194518381899691e-06, "loss": 0.712, "step": 2747 }, { "epoch": 0.8477556686719111, "grad_norm": 0.0, "learning_rate": 1.1897853369285738e-06, "loss": 0.6558, "step": 2748 }, { "epoch": 0.8480641678235384, "grad_norm": 0.0, "learning_rate": 1.185061094283224e-06, "loss": 0.6414, "step": 2749 }, { "epoch": 0.8483726669751658, "grad_norm": 0.0, "learning_rate": 1.180345658683658e-06, "loss": 0.6253, "step": 2750 }, { "epoch": 0.8486811661267931, "grad_norm": 0.0, "learning_rate": 1.1756390348410819e-06, "loss": 0.699, "step": 2751 }, { "epoch": 0.8489896652784205, "grad_norm": 0.0, "learning_rate": 1.170941227457909e-06, "loss": 0.6701, "step": 2752 }, { "epoch": 0.8492981644300478, "grad_norm": 0.0, "learning_rate": 1.1662522412277388e-06, "loss": 0.7196, "step": 2753 }, { "epoch": 0.8496066635816751, "grad_norm": 0.0, "learning_rate": 1.1615720808353604e-06, "loss": 0.7144, "step": 2754 }, { "epoch": 0.8499151627333025, "grad_norm": 0.0, "learning_rate": 1.156900750956742e-06, "loss": 0.7172, "step": 2755 }, { "epoch": 0.8502236618849298, "grad_norm": 0.0, "learning_rate": 1.1522382562590305e-06, "loss": 0.6855, "step": 2756 }, { "epoch": 0.8505321610365572, "grad_norm": 0.0, "learning_rate": 1.1475846014005477e-06, "loss": 0.6519, "step": 2757 }, { "epoch": 0.8508406601881845, "grad_norm": 0.0, "learning_rate": 1.1429397910307794e-06, "loss": 0.6767, "step": 2758 }, { "epoch": 0.8511491593398118, "grad_norm": 0.0, "learning_rate": 1.138303829790378e-06, "loss": 0.7498, "step": 2759 }, { "epoch": 0.8514576584914392, "grad_norm": 0.0, "learning_rate": 1.1336767223111521e-06, "loss": 0.7421, "step": 2760 }, { "epoch": 0.8517661576430665, "grad_norm": 0.0, "learning_rate": 1.129058473216066e-06, "loss": 0.6301, "step": 2761 }, { "epoch": 0.8520746567946939, "grad_norm": 0.0, "learning_rate": 1.1244490871192382e-06, "loss": 0.6477, "step": 2762 }, { "epoch": 0.8523831559463212, "grad_norm": 0.0, "learning_rate": 1.1198485686259208e-06, "loss": 0.6272, "step": 2763 }, { "epoch": 0.8526916550979485, "grad_norm": 0.0, "learning_rate": 1.1152569223325171e-06, "loss": 0.698, "step": 2764 }, { "epoch": 0.8530001542495758, "grad_norm": 0.0, "learning_rate": 1.1106741528265585e-06, "loss": 0.6155, "step": 2765 }, { "epoch": 0.8533086534012031, "grad_norm": 0.0, "learning_rate": 1.1061002646867159e-06, "loss": 0.6813, "step": 2766 }, { "epoch": 0.8536171525528304, "grad_norm": 0.0, "learning_rate": 1.101535262482778e-06, "loss": 0.6624, "step": 2767 }, { "epoch": 0.8539256517044578, "grad_norm": 0.0, "learning_rate": 1.0969791507756588e-06, "loss": 0.9815, "step": 2768 }, { "epoch": 0.8542341508560851, "grad_norm": 0.0, "learning_rate": 1.0924319341173938e-06, "loss": 0.7458, "step": 2769 }, { "epoch": 0.8545426500077125, "grad_norm": 0.0, "learning_rate": 1.0878936170511246e-06, "loss": 0.7608, "step": 2770 }, { "epoch": 0.8548511491593398, "grad_norm": 0.0, "learning_rate": 1.0833642041111102e-06, "loss": 0.666, "step": 2771 }, { "epoch": 0.8551596483109671, "grad_norm": 0.0, "learning_rate": 1.0788436998226991e-06, "loss": 0.7159, "step": 2772 }, { "epoch": 0.8554681474625945, "grad_norm": 0.0, "learning_rate": 1.0743321087023528e-06, "loss": 0.6403, "step": 2773 }, { "epoch": 0.8557766466142218, "grad_norm": 0.0, "learning_rate": 1.0698294352576232e-06, "loss": 0.706, "step": 2774 }, { "epoch": 0.8560851457658492, "grad_norm": 0.0, "learning_rate": 1.065335683987152e-06, "loss": 0.634, "step": 2775 }, { "epoch": 0.8563936449174765, "grad_norm": 0.0, "learning_rate": 1.0608508593806655e-06, "loss": 0.7068, "step": 2776 }, { "epoch": 0.8567021440691038, "grad_norm": 0.0, "learning_rate": 1.0563749659189703e-06, "loss": 0.7045, "step": 2777 }, { "epoch": 0.8570106432207312, "grad_norm": 0.0, "learning_rate": 1.0519080080739562e-06, "loss": 0.6228, "step": 2778 }, { "epoch": 0.8573191423723585, "grad_norm": 0.0, "learning_rate": 1.047449990308579e-06, "loss": 0.767, "step": 2779 }, { "epoch": 0.8576276415239859, "grad_norm": 0.0, "learning_rate": 1.043000917076863e-06, "loss": 0.714, "step": 2780 }, { "epoch": 0.8579361406756132, "grad_norm": 0.0, "learning_rate": 1.0385607928239017e-06, "loss": 0.6613, "step": 2781 }, { "epoch": 0.8582446398272405, "grad_norm": 0.0, "learning_rate": 1.0341296219858398e-06, "loss": 0.6747, "step": 2782 }, { "epoch": 0.8585531389788678, "grad_norm": 0.0, "learning_rate": 1.029707408989884e-06, "loss": 0.7062, "step": 2783 }, { "epoch": 0.8588616381304951, "grad_norm": 0.0, "learning_rate": 1.0252941582542852e-06, "loss": 0.5831, "step": 2784 }, { "epoch": 0.8591701372821224, "grad_norm": 0.0, "learning_rate": 1.02088987418834e-06, "loss": 0.6918, "step": 2785 }, { "epoch": 0.8594786364337498, "grad_norm": 0.0, "learning_rate": 1.0164945611923948e-06, "loss": 0.7209, "step": 2786 }, { "epoch": 0.8597871355853771, "grad_norm": 0.0, "learning_rate": 1.0121082236578205e-06, "loss": 0.6809, "step": 2787 }, { "epoch": 0.8600956347370045, "grad_norm": 0.0, "learning_rate": 1.0077308659670348e-06, "loss": 0.7467, "step": 2788 }, { "epoch": 0.8604041338886318, "grad_norm": 0.0, "learning_rate": 1.003362492493466e-06, "loss": 0.6971, "step": 2789 }, { "epoch": 0.8607126330402591, "grad_norm": 0.0, "learning_rate": 9.990031076015838e-07, "loss": 0.7136, "step": 2790 }, { "epoch": 0.8610211321918865, "grad_norm": 0.0, "learning_rate": 9.946527156468643e-07, "loss": 0.7539, "step": 2791 }, { "epoch": 0.8613296313435138, "grad_norm": 0.0, "learning_rate": 9.903113209758098e-07, "loss": 0.7467, "step": 2792 }, { "epoch": 0.8616381304951412, "grad_norm": 0.0, "learning_rate": 9.859789279259225e-07, "loss": 0.6418, "step": 2793 }, { "epoch": 0.8619466296467685, "grad_norm": 0.0, "learning_rate": 9.816555408257177e-07, "loss": 0.7238, "step": 2794 }, { "epoch": 0.8622551287983958, "grad_norm": 0.0, "learning_rate": 9.773411639947117e-07, "loss": 0.6799, "step": 2795 }, { "epoch": 0.8625636279500232, "grad_norm": 0.0, "learning_rate": 9.730358017434194e-07, "loss": 0.6993, "step": 2796 }, { "epoch": 0.8628721271016505, "grad_norm": 0.0, "learning_rate": 9.687394583733435e-07, "loss": 0.659, "step": 2797 }, { "epoch": 0.8631806262532778, "grad_norm": 0.0, "learning_rate": 9.644521381769855e-07, "loss": 0.7527, "step": 2798 }, { "epoch": 0.8634891254049052, "grad_norm": 0.0, "learning_rate": 9.601738454378229e-07, "loss": 0.7055, "step": 2799 }, { "epoch": 0.8637976245565324, "grad_norm": 0.0, "learning_rate": 9.559045844303193e-07, "loss": 0.9917, "step": 2800 }, { "epoch": 0.8641061237081598, "grad_norm": 0.0, "learning_rate": 9.516443594199143e-07, "loss": 0.677, "step": 2801 }, { "epoch": 0.8644146228597871, "grad_norm": 0.0, "learning_rate": 9.47393174663016e-07, "loss": 0.7474, "step": 2802 }, { "epoch": 0.8647231220114144, "grad_norm": 0.0, "learning_rate": 9.431510344070005e-07, "loss": 0.7235, "step": 2803 }, { "epoch": 0.8650316211630418, "grad_norm": 0.0, "learning_rate": 9.389179428902139e-07, "loss": 0.5918, "step": 2804 }, { "epoch": 0.8653401203146691, "grad_norm": 0.0, "learning_rate": 9.346939043419568e-07, "loss": 0.6162, "step": 2805 }, { "epoch": 0.8656486194662965, "grad_norm": 0.0, "learning_rate": 9.304789229824806e-07, "loss": 0.7186, "step": 2806 }, { "epoch": 0.8659571186179238, "grad_norm": 0.0, "learning_rate": 9.262730030229983e-07, "loss": 0.9891, "step": 2807 }, { "epoch": 0.8662656177695511, "grad_norm": 0.0, "learning_rate": 9.220761486656593e-07, "loss": 0.7034, "step": 2808 }, { "epoch": 0.8665741169211785, "grad_norm": 0.0, "learning_rate": 9.178883641035652e-07, "loss": 0.7197, "step": 2809 }, { "epoch": 0.8668826160728058, "grad_norm": 0.0, "learning_rate": 9.137096535207457e-07, "loss": 0.6205, "step": 2810 }, { "epoch": 0.8671911152244332, "grad_norm": 0.0, "learning_rate": 9.095400210921712e-07, "loss": 0.6383, "step": 2811 }, { "epoch": 0.8674996143760605, "grad_norm": 0.0, "learning_rate": 9.053794709837415e-07, "loss": 0.7297, "step": 2812 }, { "epoch": 0.8678081135276878, "grad_norm": 0.0, "learning_rate": 9.012280073522816e-07, "loss": 0.6416, "step": 2813 }, { "epoch": 0.8681166126793152, "grad_norm": 0.0, "learning_rate": 8.97085634345537e-07, "loss": 0.752, "step": 2814 }, { "epoch": 0.8684251118309425, "grad_norm": 0.0, "learning_rate": 8.929523561021691e-07, "loss": 0.6442, "step": 2815 }, { "epoch": 0.8687336109825698, "grad_norm": 0.0, "learning_rate": 8.888281767517582e-07, "loss": 0.6653, "step": 2816 }, { "epoch": 0.8690421101341971, "grad_norm": 0.0, "learning_rate": 8.847131004147891e-07, "loss": 0.7447, "step": 2817 }, { "epoch": 0.8693506092858244, "grad_norm": 0.0, "learning_rate": 8.806071312026554e-07, "loss": 0.7057, "step": 2818 }, { "epoch": 0.8696591084374518, "grad_norm": 0.0, "learning_rate": 8.765102732176489e-07, "loss": 0.7113, "step": 2819 }, { "epoch": 0.8699676075890791, "grad_norm": 0.0, "learning_rate": 8.724225305529577e-07, "loss": 0.7002, "step": 2820 }, { "epoch": 0.8702761067407064, "grad_norm": 0.0, "learning_rate": 8.683439072926658e-07, "loss": 0.7051, "step": 2821 }, { "epoch": 0.8705846058923338, "grad_norm": 0.0, "learning_rate": 8.642744075117448e-07, "loss": 0.6729, "step": 2822 }, { "epoch": 0.8708931050439611, "grad_norm": 0.0, "learning_rate": 8.602140352760469e-07, "loss": 0.6511, "step": 2823 }, { "epoch": 0.8712016041955885, "grad_norm": 0.0, "learning_rate": 8.561627946423134e-07, "loss": 0.6652, "step": 2824 }, { "epoch": 0.8715101033472158, "grad_norm": 0.0, "learning_rate": 8.521206896581535e-07, "loss": 0.6922, "step": 2825 }, { "epoch": 0.8718186024988431, "grad_norm": 0.0, "learning_rate": 8.480877243620578e-07, "loss": 0.7121, "step": 2826 }, { "epoch": 0.8721271016504705, "grad_norm": 0.0, "learning_rate": 8.44063902783373e-07, "loss": 0.7083, "step": 2827 }, { "epoch": 0.8724356008020978, "grad_norm": 0.0, "learning_rate": 8.400492289423235e-07, "loss": 0.8806, "step": 2828 }, { "epoch": 0.8727440999537251, "grad_norm": 0.0, "learning_rate": 8.360437068499849e-07, "loss": 0.6684, "step": 2829 }, { "epoch": 0.8730525991053525, "grad_norm": 0.0, "learning_rate": 8.320473405082952e-07, "loss": 0.7055, "step": 2830 }, { "epoch": 0.8733610982569798, "grad_norm": 0.0, "learning_rate": 8.280601339100436e-07, "loss": 0.6907, "step": 2831 }, { "epoch": 0.8736695974086072, "grad_norm": 0.0, "learning_rate": 8.240820910388614e-07, "loss": 0.6585, "step": 2832 }, { "epoch": 0.8739780965602345, "grad_norm": 0.0, "learning_rate": 8.201132158692327e-07, "loss": 0.7069, "step": 2833 }, { "epoch": 0.8742865957118618, "grad_norm": 0.0, "learning_rate": 8.161535123664776e-07, "loss": 0.6241, "step": 2834 }, { "epoch": 0.8745950948634891, "grad_norm": 0.0, "learning_rate": 8.122029844867562e-07, "loss": 0.6609, "step": 2835 }, { "epoch": 0.8749035940151164, "grad_norm": 0.0, "learning_rate": 8.082616361770579e-07, "loss": 0.7406, "step": 2836 }, { "epoch": 0.8752120931667438, "grad_norm": 0.0, "learning_rate": 8.043294713751992e-07, "loss": 0.6594, "step": 2837 }, { "epoch": 0.8755205923183711, "grad_norm": 0.0, "learning_rate": 8.004064940098277e-07, "loss": 0.6504, "step": 2838 }, { "epoch": 0.8758290914699984, "grad_norm": 0.0, "learning_rate": 7.964927080004059e-07, "loss": 0.6821, "step": 2839 }, { "epoch": 0.8761375906216258, "grad_norm": 0.0, "learning_rate": 7.925881172572169e-07, "loss": 0.7181, "step": 2840 }, { "epoch": 0.8764460897732531, "grad_norm": 0.0, "learning_rate": 7.886927256813514e-07, "loss": 0.6449, "step": 2841 }, { "epoch": 0.8767545889248805, "grad_norm": 0.0, "learning_rate": 7.848065371647162e-07, "loss": 0.6459, "step": 2842 }, { "epoch": 0.8770630880765078, "grad_norm": 0.0, "learning_rate": 7.809295555900231e-07, "loss": 0.7463, "step": 2843 }, { "epoch": 0.8773715872281351, "grad_norm": 0.0, "learning_rate": 7.770617848307749e-07, "loss": 0.6198, "step": 2844 }, { "epoch": 0.8776800863797625, "grad_norm": 0.0, "learning_rate": 7.732032287512847e-07, "loss": 0.7149, "step": 2845 }, { "epoch": 0.8779885855313898, "grad_norm": 0.0, "learning_rate": 7.6935389120665e-07, "loss": 0.6899, "step": 2846 }, { "epoch": 0.8782970846830171, "grad_norm": 0.0, "learning_rate": 7.655137760427645e-07, "loss": 0.6932, "step": 2847 }, { "epoch": 0.8786055838346445, "grad_norm": 0.0, "learning_rate": 7.616828870963023e-07, "loss": 0.6516, "step": 2848 }, { "epoch": 0.8789140829862718, "grad_norm": 0.0, "learning_rate": 7.578612281947229e-07, "loss": 0.6304, "step": 2849 }, { "epoch": 0.8792225821378992, "grad_norm": 0.0, "learning_rate": 7.540488031562642e-07, "loss": 0.6516, "step": 2850 }, { "epoch": 0.8795310812895265, "grad_norm": 0.0, "learning_rate": 7.502456157899351e-07, "loss": 0.6183, "step": 2851 }, { "epoch": 0.8798395804411537, "grad_norm": 0.0, "learning_rate": 7.464516698955226e-07, "loss": 0.6367, "step": 2852 }, { "epoch": 0.8801480795927811, "grad_norm": 0.0, "learning_rate": 7.426669692635691e-07, "loss": 0.673, "step": 2853 }, { "epoch": 0.8804565787444084, "grad_norm": 0.0, "learning_rate": 7.388915176753886e-07, "loss": 0.7608, "step": 2854 }, { "epoch": 0.8807650778960358, "grad_norm": 0.0, "learning_rate": 7.351253189030538e-07, "loss": 0.7474, "step": 2855 }, { "epoch": 0.8810735770476631, "grad_norm": 0.0, "learning_rate": 7.313683767093915e-07, "loss": 0.669, "step": 2856 }, { "epoch": 0.8813820761992904, "grad_norm": 0.0, "learning_rate": 7.276206948479769e-07, "loss": 0.633, "step": 2857 }, { "epoch": 0.8816905753509178, "grad_norm": 0.0, "learning_rate": 7.23882277063136e-07, "loss": 0.6076, "step": 2858 }, { "epoch": 0.8819990745025451, "grad_norm": 0.0, "learning_rate": 7.20153127089942e-07, "loss": 0.6447, "step": 2859 }, { "epoch": 0.8823075736541725, "grad_norm": 0.0, "learning_rate": 7.164332486542036e-07, "loss": 0.7517, "step": 2860 }, { "epoch": 0.8826160728057998, "grad_norm": 0.0, "learning_rate": 7.127226454724689e-07, "loss": 0.7321, "step": 2861 }, { "epoch": 0.8829245719574271, "grad_norm": 0.0, "learning_rate": 7.090213212520191e-07, "loss": 0.6784, "step": 2862 }, { "epoch": 0.8832330711090545, "grad_norm": 0.0, "learning_rate": 7.053292796908629e-07, "loss": 0.7292, "step": 2863 }, { "epoch": 0.8835415702606818, "grad_norm": 0.0, "learning_rate": 7.01646524477737e-07, "loss": 0.6657, "step": 2864 }, { "epoch": 0.8838500694123091, "grad_norm": 0.0, "learning_rate": 6.979730592920997e-07, "loss": 0.7044, "step": 2865 }, { "epoch": 0.8841585685639365, "grad_norm": 0.0, "learning_rate": 6.943088878041238e-07, "loss": 0.7455, "step": 2866 }, { "epoch": 0.8844670677155638, "grad_norm": 0.0, "learning_rate": 6.906540136747031e-07, "loss": 0.6747, "step": 2867 }, { "epoch": 0.8847755668671912, "grad_norm": 0.0, "learning_rate": 6.870084405554367e-07, "loss": 0.6485, "step": 2868 }, { "epoch": 0.8850840660188184, "grad_norm": 0.0, "learning_rate": 6.833721720886377e-07, "loss": 0.7415, "step": 2869 }, { "epoch": 0.8853925651704457, "grad_norm": 0.0, "learning_rate": 6.797452119073111e-07, "loss": 0.6579, "step": 2870 }, { "epoch": 0.8857010643220731, "grad_norm": 0.0, "learning_rate": 6.76127563635176e-07, "loss": 0.7267, "step": 2871 }, { "epoch": 0.8860095634737004, "grad_norm": 0.0, "learning_rate": 6.725192308866368e-07, "loss": 0.7453, "step": 2872 }, { "epoch": 0.8863180626253278, "grad_norm": 0.0, "learning_rate": 6.689202172667996e-07, "loss": 0.6308, "step": 2873 }, { "epoch": 0.8866265617769551, "grad_norm": 0.0, "learning_rate": 6.653305263714526e-07, "loss": 0.6391, "step": 2874 }, { "epoch": 0.8869350609285824, "grad_norm": 0.0, "learning_rate": 6.617501617870726e-07, "loss": 0.6722, "step": 2875 }, { "epoch": 0.8872435600802098, "grad_norm": 0.0, "learning_rate": 6.581791270908189e-07, "loss": 0.657, "step": 2876 }, { "epoch": 0.8875520592318371, "grad_norm": 0.0, "learning_rate": 6.546174258505289e-07, "loss": 0.6685, "step": 2877 }, { "epoch": 0.8878605583834644, "grad_norm": 0.0, "learning_rate": 6.510650616247116e-07, "loss": 0.6154, "step": 2878 }, { "epoch": 0.8881690575350918, "grad_norm": 0.0, "learning_rate": 6.475220379625546e-07, "loss": 0.7037, "step": 2879 }, { "epoch": 0.8884775566867191, "grad_norm": 0.0, "learning_rate": 6.439883584039064e-07, "loss": 0.6844, "step": 2880 }, { "epoch": 0.8887860558383465, "grad_norm": 0.0, "learning_rate": 6.404640264792839e-07, "loss": 0.7061, "step": 2881 }, { "epoch": 0.8890945549899738, "grad_norm": 0.0, "learning_rate": 6.369490457098626e-07, "loss": 0.7333, "step": 2882 }, { "epoch": 0.8894030541416011, "grad_norm": 0.0, "learning_rate": 6.334434196074757e-07, "loss": 0.7895, "step": 2883 }, { "epoch": 0.8897115532932285, "grad_norm": 0.0, "learning_rate": 6.299471516746092e-07, "loss": 0.6718, "step": 2884 }, { "epoch": 0.8900200524448558, "grad_norm": 0.0, "learning_rate": 6.264602454044033e-07, "loss": 0.7163, "step": 2885 }, { "epoch": 0.8903285515964832, "grad_norm": 0.0, "learning_rate": 6.229827042806403e-07, "loss": 0.7093, "step": 2886 }, { "epoch": 0.8906370507481104, "grad_norm": 0.0, "learning_rate": 6.195145317777452e-07, "loss": 0.6634, "step": 2887 }, { "epoch": 0.8909455498997377, "grad_norm": 0.0, "learning_rate": 6.160557313607884e-07, "loss": 0.72, "step": 2888 }, { "epoch": 0.8912540490513651, "grad_norm": 0.0, "learning_rate": 6.126063064854715e-07, "loss": 0.7211, "step": 2889 }, { "epoch": 0.8915625482029924, "grad_norm": 0.0, "learning_rate": 6.091662605981319e-07, "loss": 0.7169, "step": 2890 }, { "epoch": 0.8918710473546198, "grad_norm": 0.0, "learning_rate": 6.057355971357359e-07, "loss": 0.6511, "step": 2891 }, { "epoch": 0.8921795465062471, "grad_norm": 0.0, "learning_rate": 6.023143195258708e-07, "loss": 0.7037, "step": 2892 }, { "epoch": 0.8924880456578744, "grad_norm": 0.0, "learning_rate": 5.989024311867553e-07, "loss": 0.7011, "step": 2893 }, { "epoch": 0.8927965448095018, "grad_norm": 0.0, "learning_rate": 5.954999355272207e-07, "loss": 0.7156, "step": 2894 }, { "epoch": 0.8931050439611291, "grad_norm": 0.0, "learning_rate": 5.921068359467164e-07, "loss": 0.7205, "step": 2895 }, { "epoch": 0.8934135431127564, "grad_norm": 0.0, "learning_rate": 5.887231358353018e-07, "loss": 0.7144, "step": 2896 }, { "epoch": 0.8937220422643838, "grad_norm": 0.0, "learning_rate": 5.853488385736506e-07, "loss": 0.6505, "step": 2897 }, { "epoch": 0.8940305414160111, "grad_norm": 0.0, "learning_rate": 5.819839475330358e-07, "loss": 0.69, "step": 2898 }, { "epoch": 0.8943390405676385, "grad_norm": 0.0, "learning_rate": 5.786284660753372e-07, "loss": 0.7309, "step": 2899 }, { "epoch": 0.8946475397192658, "grad_norm": 0.0, "learning_rate": 5.752823975530308e-07, "loss": 0.6849, "step": 2900 }, { "epoch": 0.8949560388708931, "grad_norm": 0.0, "learning_rate": 5.719457453091881e-07, "loss": 0.6756, "step": 2901 }, { "epoch": 0.8952645380225205, "grad_norm": 0.0, "learning_rate": 5.686185126774757e-07, "loss": 0.6996, "step": 2902 }, { "epoch": 0.8955730371741478, "grad_norm": 0.0, "learning_rate": 5.653007029821467e-07, "loss": 0.719, "step": 2903 }, { "epoch": 0.895881536325775, "grad_norm": 0.0, "learning_rate": 5.619923195380372e-07, "loss": 0.7229, "step": 2904 }, { "epoch": 0.8961900354774024, "grad_norm": 0.0, "learning_rate": 5.586933656505711e-07, "loss": 0.7041, "step": 2905 }, { "epoch": 0.8964985346290297, "grad_norm": 0.0, "learning_rate": 5.554038446157473e-07, "loss": 0.6751, "step": 2906 }, { "epoch": 0.8968070337806571, "grad_norm": 0.0, "learning_rate": 5.521237597201423e-07, "loss": 0.7222, "step": 2907 }, { "epoch": 0.8971155329322844, "grad_norm": 0.0, "learning_rate": 5.488531142409015e-07, "loss": 0.6142, "step": 2908 }, { "epoch": 0.8974240320839117, "grad_norm": 0.0, "learning_rate": 5.455919114457442e-07, "loss": 0.6486, "step": 2909 }, { "epoch": 0.8977325312355391, "grad_norm": 0.0, "learning_rate": 5.423401545929518e-07, "loss": 0.6912, "step": 2910 }, { "epoch": 0.8980410303871664, "grad_norm": 0.0, "learning_rate": 5.390978469313701e-07, "loss": 0.7119, "step": 2911 }, { "epoch": 0.8983495295387938, "grad_norm": 0.0, "learning_rate": 5.358649917004033e-07, "loss": 0.6782, "step": 2912 }, { "epoch": 0.8986580286904211, "grad_norm": 0.0, "learning_rate": 5.326415921300099e-07, "loss": 0.7136, "step": 2913 }, { "epoch": 0.8989665278420484, "grad_norm": 0.0, "learning_rate": 5.294276514407071e-07, "loss": 0.6287, "step": 2914 }, { "epoch": 0.8992750269936758, "grad_norm": 0.0, "learning_rate": 5.262231728435541e-07, "loss": 0.732, "step": 2915 }, { "epoch": 0.8995835261453031, "grad_norm": 0.0, "learning_rate": 5.230281595401621e-07, "loss": 0.6561, "step": 2916 }, { "epoch": 0.8998920252969305, "grad_norm": 0.0, "learning_rate": 5.198426147226843e-07, "loss": 0.6318, "step": 2917 }, { "epoch": 0.9002005244485578, "grad_norm": 0.0, "learning_rate": 5.166665415738093e-07, "loss": 0.6749, "step": 2918 }, { "epoch": 0.9005090236001851, "grad_norm": 0.0, "learning_rate": 5.134999432667708e-07, "loss": 0.6787, "step": 2919 }, { "epoch": 0.9008175227518125, "grad_norm": 0.0, "learning_rate": 5.103428229653296e-07, "loss": 0.7511, "step": 2920 }, { "epoch": 0.9011260219034397, "grad_norm": 0.0, "learning_rate": 5.071951838237787e-07, "loss": 0.665, "step": 2921 }, { "epoch": 0.901434521055067, "grad_norm": 0.0, "learning_rate": 5.040570289869384e-07, "loss": 0.6458, "step": 2922 }, { "epoch": 0.9017430202066944, "grad_norm": 0.0, "learning_rate": 5.009283615901561e-07, "loss": 0.669, "step": 2923 }, { "epoch": 0.9020515193583217, "grad_norm": 0.0, "learning_rate": 4.978091847592959e-07, "loss": 0.6912, "step": 2924 }, { "epoch": 0.9023600185099491, "grad_norm": 0.0, "learning_rate": 4.946995016107392e-07, "loss": 0.6912, "step": 2925 }, { "epoch": 0.9026685176615764, "grad_norm": 0.0, "learning_rate": 4.915993152513887e-07, "loss": 0.7632, "step": 2926 }, { "epoch": 0.9029770168132037, "grad_norm": 0.0, "learning_rate": 4.885086287786522e-07, "loss": 0.6636, "step": 2927 }, { "epoch": 0.9032855159648311, "grad_norm": 0.0, "learning_rate": 4.854274452804519e-07, "loss": 0.7389, "step": 2928 }, { "epoch": 0.9035940151164584, "grad_norm": 0.0, "learning_rate": 4.823557678352097e-07, "loss": 0.6548, "step": 2929 }, { "epoch": 0.9039025142680858, "grad_norm": 0.0, "learning_rate": 4.792935995118509e-07, "loss": 0.7511, "step": 2930 }, { "epoch": 0.9042110134197131, "grad_norm": 0.0, "learning_rate": 4.7624094336980565e-07, "loss": 0.7295, "step": 2931 }, { "epoch": 0.9045195125713404, "grad_norm": 0.0, "learning_rate": 4.731978024589945e-07, "loss": 0.65, "step": 2932 }, { "epoch": 0.9048280117229678, "grad_norm": 0.0, "learning_rate": 4.701641798198353e-07, "loss": 0.6513, "step": 2933 }, { "epoch": 0.9051365108745951, "grad_norm": 0.0, "learning_rate": 4.671400784832314e-07, "loss": 0.7507, "step": 2934 }, { "epoch": 0.9054450100262225, "grad_norm": 0.0, "learning_rate": 4.641255014705781e-07, "loss": 0.6802, "step": 2935 }, { "epoch": 0.9057535091778498, "grad_norm": 0.0, "learning_rate": 4.6112045179375286e-07, "loss": 0.6565, "step": 2936 }, { "epoch": 0.9060620083294771, "grad_norm": 0.0, "learning_rate": 4.58124932455114e-07, "loss": 0.7055, "step": 2937 }, { "epoch": 0.9063705074811045, "grad_norm": 0.0, "learning_rate": 4.5513894644750087e-07, "loss": 0.7324, "step": 2938 }, { "epoch": 0.9066790066327317, "grad_norm": 0.0, "learning_rate": 4.5216249675422153e-07, "loss": 0.7231, "step": 2939 }, { "epoch": 0.906987505784359, "grad_norm": 0.0, "learning_rate": 4.491955863490649e-07, "loss": 0.69, "step": 2940 }, { "epoch": 0.9072960049359864, "grad_norm": 0.0, "learning_rate": 4.462382181962832e-07, "loss": 0.6883, "step": 2941 }, { "epoch": 0.9076045040876137, "grad_norm": 0.0, "learning_rate": 4.43290395250594e-07, "loss": 0.6147, "step": 2942 }, { "epoch": 0.9079130032392411, "grad_norm": 0.0, "learning_rate": 4.4035212045718366e-07, "loss": 0.7843, "step": 2943 }, { "epoch": 0.9082215023908684, "grad_norm": 0.0, "learning_rate": 4.37423396751695e-07, "loss": 0.7698, "step": 2944 }, { "epoch": 0.9085300015424957, "grad_norm": 0.0, "learning_rate": 4.3450422706022956e-07, "loss": 0.7347, "step": 2945 }, { "epoch": 0.9088385006941231, "grad_norm": 0.0, "learning_rate": 4.315946142993432e-07, "loss": 0.6458, "step": 2946 }, { "epoch": 0.9091469998457504, "grad_norm": 0.0, "learning_rate": 4.286945613760429e-07, "loss": 0.7321, "step": 2947 }, { "epoch": 0.9094554989973778, "grad_norm": 0.0, "learning_rate": 4.258040711877842e-07, "loss": 0.7338, "step": 2948 }, { "epoch": 0.9097639981490051, "grad_norm": 0.0, "learning_rate": 4.229231466224693e-07, "loss": 0.6162, "step": 2949 }, { "epoch": 0.9100724973006324, "grad_norm": 0.0, "learning_rate": 4.200517905584467e-07, "loss": 0.632, "step": 2950 }, { "epoch": 0.9103809964522598, "grad_norm": 0.0, "learning_rate": 4.1719000586449507e-07, "loss": 0.662, "step": 2951 }, { "epoch": 0.9106894956038871, "grad_norm": 0.0, "learning_rate": 4.143377953998418e-07, "loss": 0.7428, "step": 2952 }, { "epoch": 0.9109979947555145, "grad_norm": 0.0, "learning_rate": 4.1149516201413944e-07, "loss": 0.6866, "step": 2953 }, { "epoch": 0.9113064939071418, "grad_norm": 0.0, "learning_rate": 4.0866210854747956e-07, "loss": 0.6504, "step": 2954 }, { "epoch": 0.9116149930587691, "grad_norm": 0.0, "learning_rate": 4.058386378303769e-07, "loss": 0.6522, "step": 2955 }, { "epoch": 0.9119234922103964, "grad_norm": 0.0, "learning_rate": 4.0302475268377386e-07, "loss": 0.6448, "step": 2956 }, { "epoch": 0.9122319913620237, "grad_norm": 0.0, "learning_rate": 4.00220455919037e-07, "loss": 0.6592, "step": 2957 }, { "epoch": 0.912540490513651, "grad_norm": 0.0, "learning_rate": 3.974257503379508e-07, "loss": 0.6448, "step": 2958 }, { "epoch": 0.9128489896652784, "grad_norm": 0.0, "learning_rate": 3.9464063873272064e-07, "loss": 0.6848, "step": 2959 }, { "epoch": 0.9131574888169057, "grad_norm": 0.0, "learning_rate": 3.9186512388596053e-07, "loss": 0.7141, "step": 2960 }, { "epoch": 0.9134659879685331, "grad_norm": 0.0, "learning_rate": 3.8909920857070237e-07, "loss": 0.6235, "step": 2961 }, { "epoch": 0.9137744871201604, "grad_norm": 0.0, "learning_rate": 3.863428955503856e-07, "loss": 0.6826, "step": 2962 }, { "epoch": 0.9140829862717877, "grad_norm": 0.0, "learning_rate": 3.835961875788552e-07, "loss": 0.6908, "step": 2963 }, { "epoch": 0.9143914854234151, "grad_norm": 0.0, "learning_rate": 3.8085908740035706e-07, "loss": 0.6579, "step": 2964 }, { "epoch": 0.9146999845750424, "grad_norm": 0.0, "learning_rate": 3.7813159774954256e-07, "loss": 0.7451, "step": 2965 }, { "epoch": 0.9150084837266698, "grad_norm": 0.0, "learning_rate": 3.754137213514586e-07, "loss": 0.6747, "step": 2966 }, { "epoch": 0.9153169828782971, "grad_norm": 0.0, "learning_rate": 3.7270546092154856e-07, "loss": 0.7102, "step": 2967 }, { "epoch": 0.9156254820299244, "grad_norm": 0.0, "learning_rate": 3.7000681916564583e-07, "loss": 0.7626, "step": 2968 }, { "epoch": 0.9159339811815518, "grad_norm": 0.0, "learning_rate": 3.673177987799781e-07, "loss": 0.6208, "step": 2969 }, { "epoch": 0.9162424803331791, "grad_norm": 0.0, "learning_rate": 3.646384024511562e-07, "loss": 0.7493, "step": 2970 }, { "epoch": 0.9165509794848065, "grad_norm": 0.0, "learning_rate": 3.6196863285618e-07, "loss": 0.7094, "step": 2971 }, { "epoch": 0.9168594786364338, "grad_norm": 0.0, "learning_rate": 3.5930849266242465e-07, "loss": 0.6915, "step": 2972 }, { "epoch": 0.917167977788061, "grad_norm": 0.0, "learning_rate": 3.5665798452764966e-07, "loss": 0.6739, "step": 2973 }, { "epoch": 0.9174764769396884, "grad_norm": 0.0, "learning_rate": 3.540171110999913e-07, "loss": 0.6801, "step": 2974 }, { "epoch": 0.9177849760913157, "grad_norm": 0.0, "learning_rate": 3.5138587501795884e-07, "loss": 0.6038, "step": 2975 }, { "epoch": 0.918093475242943, "grad_norm": 0.0, "learning_rate": 3.487642789104295e-07, "loss": 0.6319, "step": 2976 }, { "epoch": 0.9184019743945704, "grad_norm": 0.0, "learning_rate": 3.4615232539665254e-07, "loss": 0.7123, "step": 2977 }, { "epoch": 0.9187104735461977, "grad_norm": 0.0, "learning_rate": 3.4355001708624493e-07, "loss": 0.651, "step": 2978 }, { "epoch": 0.9190189726978251, "grad_norm": 0.0, "learning_rate": 3.4095735657918127e-07, "loss": 0.6915, "step": 2979 }, { "epoch": 0.9193274718494524, "grad_norm": 0.0, "learning_rate": 3.3837434646580514e-07, "loss": 0.8021, "step": 2980 }, { "epoch": 0.9196359710010797, "grad_norm": 0.0, "learning_rate": 3.3580098932680994e-07, "loss": 0.6535, "step": 2981 }, { "epoch": 0.9199444701527071, "grad_norm": 0.0, "learning_rate": 3.3323728773324904e-07, "loss": 0.6707, "step": 2982 }, { "epoch": 0.9202529693043344, "grad_norm": 0.0, "learning_rate": 3.306832442465302e-07, "loss": 0.6688, "step": 2983 }, { "epoch": 0.9205614684559618, "grad_norm": 0.0, "learning_rate": 3.281388614184089e-07, "loss": 0.6877, "step": 2984 }, { "epoch": 0.9208699676075891, "grad_norm": 0.0, "learning_rate": 3.256041417909894e-07, "loss": 0.6977, "step": 2985 }, { "epoch": 0.9211784667592164, "grad_norm": 0.0, "learning_rate": 3.230790878967216e-07, "loss": 0.6933, "step": 2986 }, { "epoch": 0.9214869659108438, "grad_norm": 0.0, "learning_rate": 3.205637022583985e-07, "loss": 0.6552, "step": 2987 }, { "epoch": 0.9217954650624711, "grad_norm": 0.0, "learning_rate": 3.180579873891565e-07, "loss": 0.6738, "step": 2988 }, { "epoch": 0.9221039642140985, "grad_norm": 0.0, "learning_rate": 3.155619457924608e-07, "loss": 0.6119, "step": 2989 }, { "epoch": 0.9224124633657258, "grad_norm": 0.0, "learning_rate": 3.130755799621221e-07, "loss": 0.643, "step": 2990 }, { "epoch": 0.922720962517353, "grad_norm": 0.0, "learning_rate": 3.1059889238227893e-07, "loss": 0.6854, "step": 2991 }, { "epoch": 0.9230294616689804, "grad_norm": 0.0, "learning_rate": 3.0813188552740067e-07, "loss": 0.6532, "step": 2992 }, { "epoch": 0.9233379608206077, "grad_norm": 0.0, "learning_rate": 3.0567456186228694e-07, "loss": 0.6868, "step": 2993 }, { "epoch": 0.923646459972235, "grad_norm": 0.0, "learning_rate": 3.032269238420582e-07, "loss": 0.7246, "step": 2994 }, { "epoch": 0.9239549591238624, "grad_norm": 0.0, "learning_rate": 3.0078897391216387e-07, "loss": 0.6767, "step": 2995 }, { "epoch": 0.9242634582754897, "grad_norm": 0.0, "learning_rate": 2.9836071450836776e-07, "loss": 0.6672, "step": 2996 }, { "epoch": 0.9245719574271171, "grad_norm": 0.0, "learning_rate": 2.9594214805675703e-07, "loss": 0.6284, "step": 2997 }, { "epoch": 0.9248804565787444, "grad_norm": 0.0, "learning_rate": 2.935332769737331e-07, "loss": 0.703, "step": 2998 }, { "epoch": 0.9251889557303717, "grad_norm": 0.0, "learning_rate": 2.911341036660065e-07, "loss": 0.7389, "step": 2999 }, { "epoch": 0.9254974548819991, "grad_norm": 0.0, "learning_rate": 2.887446305306074e-07, "loss": 0.6566, "step": 3000 }, { "epoch": 0.9258059540336264, "grad_norm": 0.0, "learning_rate": 2.863648599548652e-07, "loss": 0.6818, "step": 3001 }, { "epoch": 0.9261144531852538, "grad_norm": 0.0, "learning_rate": 2.839947943164223e-07, "loss": 0.6706, "step": 3002 }, { "epoch": 0.9264229523368811, "grad_norm": 0.0, "learning_rate": 2.81634435983219e-07, "loss": 0.6665, "step": 3003 }, { "epoch": 0.9267314514885084, "grad_norm": 0.0, "learning_rate": 2.792837873135035e-07, "loss": 0.6966, "step": 3004 }, { "epoch": 0.9270399506401358, "grad_norm": 0.0, "learning_rate": 2.7694285065581807e-07, "loss": 0.7047, "step": 3005 }, { "epoch": 0.9273484497917631, "grad_norm": 0.0, "learning_rate": 2.746116283490019e-07, "loss": 0.7193, "step": 3006 }, { "epoch": 0.9276569489433905, "grad_norm": 0.0, "learning_rate": 2.722901227221919e-07, "loss": 0.6711, "step": 3007 }, { "epoch": 0.9279654480950177, "grad_norm": 0.0, "learning_rate": 2.6997833609481384e-07, "loss": 0.6848, "step": 3008 }, { "epoch": 0.928273947246645, "grad_norm": 0.0, "learning_rate": 2.6767627077658563e-07, "loss": 0.6251, "step": 3009 }, { "epoch": 0.9285824463982724, "grad_norm": 0.0, "learning_rate": 2.653839290675109e-07, "loss": 0.6504, "step": 3010 }, { "epoch": 0.9288909455498997, "grad_norm": 0.0, "learning_rate": 2.6310131325787634e-07, "loss": 0.5879, "step": 3011 }, { "epoch": 0.929199444701527, "grad_norm": 0.0, "learning_rate": 2.608284256282567e-07, "loss": 0.7294, "step": 3012 }, { "epoch": 0.9295079438531544, "grad_norm": 0.0, "learning_rate": 2.5856526844950324e-07, "loss": 0.7099, "step": 3013 }, { "epoch": 0.9298164430047817, "grad_norm": 0.0, "learning_rate": 2.5631184398274834e-07, "loss": 0.6724, "step": 3014 }, { "epoch": 0.9301249421564091, "grad_norm": 0.0, "learning_rate": 2.540681544793955e-07, "loss": 0.7407, "step": 3015 }, { "epoch": 0.9304334413080364, "grad_norm": 0.0, "learning_rate": 2.518342021811271e-07, "loss": 0.6528, "step": 3016 }, { "epoch": 0.9307419404596637, "grad_norm": 0.0, "learning_rate": 2.496099893198944e-07, "loss": 0.7048, "step": 3017 }, { "epoch": 0.9310504396112911, "grad_norm": 0.0, "learning_rate": 2.4739551811791996e-07, "loss": 0.7183, "step": 3018 }, { "epoch": 0.9313589387629184, "grad_norm": 0.0, "learning_rate": 2.451907907876916e-07, "loss": 0.6742, "step": 3019 }, { "epoch": 0.9316674379145458, "grad_norm": 0.0, "learning_rate": 2.4299580953196066e-07, "loss": 0.6479, "step": 3020 }, { "epoch": 0.9319759370661731, "grad_norm": 0.0, "learning_rate": 2.408105765437452e-07, "loss": 0.6814, "step": 3021 }, { "epoch": 0.9322844362178004, "grad_norm": 0.0, "learning_rate": 2.386350940063209e-07, "loss": 0.6509, "step": 3022 }, { "epoch": 0.9325929353694278, "grad_norm": 0.0, "learning_rate": 2.364693640932214e-07, "loss": 0.6871, "step": 3023 }, { "epoch": 0.9329014345210551, "grad_norm": 0.0, "learning_rate": 2.3431338896823917e-07, "loss": 0.6811, "step": 3024 }, { "epoch": 0.9332099336726823, "grad_norm": 0.0, "learning_rate": 2.321671707854156e-07, "loss": 0.6711, "step": 3025 }, { "epoch": 0.9335184328243097, "grad_norm": 0.0, "learning_rate": 2.300307116890521e-07, "loss": 0.6324, "step": 3026 }, { "epoch": 0.933826931975937, "grad_norm": 0.0, "learning_rate": 2.279040138136901e-07, "loss": 0.6814, "step": 3027 }, { "epoch": 0.9341354311275644, "grad_norm": 0.0, "learning_rate": 2.2578707928412545e-07, "loss": 0.6596, "step": 3028 }, { "epoch": 0.9344439302791917, "grad_norm": 0.0, "learning_rate": 2.2367991021539637e-07, "loss": 0.679, "step": 3029 }, { "epoch": 0.934752429430819, "grad_norm": 0.0, "learning_rate": 2.215825087127843e-07, "loss": 0.601, "step": 3030 }, { "epoch": 0.9350609285824464, "grad_norm": 0.0, "learning_rate": 2.1949487687181525e-07, "loss": 0.691, "step": 3031 }, { "epoch": 0.9353694277340737, "grad_norm": 0.0, "learning_rate": 2.1741701677824966e-07, "loss": 0.7404, "step": 3032 }, { "epoch": 0.9356779268857011, "grad_norm": 0.0, "learning_rate": 2.1534893050808802e-07, "loss": 0.654, "step": 3033 }, { "epoch": 0.9359864260373284, "grad_norm": 0.0, "learning_rate": 2.1329062012756308e-07, "loss": 0.622, "step": 3034 }, { "epoch": 0.9362949251889557, "grad_norm": 0.0, "learning_rate": 2.1124208769314424e-07, "loss": 0.7046, "step": 3035 }, { "epoch": 0.9366034243405831, "grad_norm": 0.0, "learning_rate": 2.092033352515288e-07, "loss": 0.6661, "step": 3036 }, { "epoch": 0.9369119234922104, "grad_norm": 0.0, "learning_rate": 2.0717436483964293e-07, "loss": 0.7741, "step": 3037 }, { "epoch": 0.9372204226438378, "grad_norm": 0.0, "learning_rate": 2.0515517848464284e-07, "loss": 0.6348, "step": 3038 }, { "epoch": 0.9375289217954651, "grad_norm": 0.0, "learning_rate": 2.031457782039037e-07, "loss": 0.6986, "step": 3039 }, { "epoch": 0.9378374209470924, "grad_norm": 0.0, "learning_rate": 2.0114616600502845e-07, "loss": 0.6558, "step": 3040 }, { "epoch": 0.9381459200987198, "grad_norm": 0.0, "learning_rate": 1.991563438858368e-07, "loss": 0.6811, "step": 3041 }, { "epoch": 0.9384544192503471, "grad_norm": 0.0, "learning_rate": 1.9717631383437075e-07, "loss": 0.6629, "step": 3042 }, { "epoch": 0.9387629184019743, "grad_norm": 0.0, "learning_rate": 1.952060778288889e-07, "loss": 0.6583, "step": 3043 }, { "epoch": 0.9390714175536017, "grad_norm": 0.0, "learning_rate": 1.9324563783785888e-07, "loss": 0.6649, "step": 3044 }, { "epoch": 0.939379916705229, "grad_norm": 0.0, "learning_rate": 1.9129499581996945e-07, "loss": 0.7522, "step": 3045 }, { "epoch": 0.9396884158568564, "grad_norm": 0.0, "learning_rate": 1.893541537241128e-07, "loss": 0.6718, "step": 3046 }, { "epoch": 0.9399969150084837, "grad_norm": 0.0, "learning_rate": 1.8742311348939558e-07, "loss": 0.6987, "step": 3047 }, { "epoch": 0.940305414160111, "grad_norm": 0.0, "learning_rate": 1.855018770451278e-07, "loss": 0.6032, "step": 3048 }, { "epoch": 0.9406139133117384, "grad_norm": 0.0, "learning_rate": 1.83590446310824e-07, "loss": 0.701, "step": 3049 }, { "epoch": 0.9409224124633657, "grad_norm": 0.0, "learning_rate": 1.8168882319620663e-07, "loss": 0.7369, "step": 3050 }, { "epoch": 0.9412309116149931, "grad_norm": 0.0, "learning_rate": 1.797970096011936e-07, "loss": 0.7331, "step": 3051 }, { "epoch": 0.9415394107666204, "grad_norm": 0.0, "learning_rate": 1.779150074159064e-07, "loss": 0.6953, "step": 3052 }, { "epoch": 0.9418479099182477, "grad_norm": 0.0, "learning_rate": 1.7604281852065973e-07, "loss": 0.7046, "step": 3053 }, { "epoch": 0.9421564090698751, "grad_norm": 0.0, "learning_rate": 1.7418044478596742e-07, "loss": 0.7163, "step": 3054 }, { "epoch": 0.9424649082215024, "grad_norm": 0.0, "learning_rate": 1.7232788807253654e-07, "loss": 0.6754, "step": 3055 }, { "epoch": 0.9427734073731298, "grad_norm": 0.0, "learning_rate": 1.7048515023126323e-07, "loss": 0.6923, "step": 3056 }, { "epoch": 0.9430819065247571, "grad_norm": 0.0, "learning_rate": 1.6865223310323586e-07, "loss": 0.6807, "step": 3057 }, { "epoch": 0.9433904056763844, "grad_norm": 0.0, "learning_rate": 1.6682913851972959e-07, "loss": 0.6595, "step": 3058 }, { "epoch": 0.9436989048280118, "grad_norm": 0.0, "learning_rate": 1.6501586830220852e-07, "loss": 0.7242, "step": 3059 }, { "epoch": 0.944007403979639, "grad_norm": 0.0, "learning_rate": 1.6321242426231787e-07, "loss": 0.9792, "step": 3060 }, { "epoch": 0.9443159031312663, "grad_norm": 0.0, "learning_rate": 1.6141880820188749e-07, "loss": 0.6886, "step": 3061 }, { "epoch": 0.9446244022828937, "grad_norm": 0.0, "learning_rate": 1.596350219129261e-07, "loss": 0.6266, "step": 3062 }, { "epoch": 0.944932901434521, "grad_norm": 0.0, "learning_rate": 1.5786106717762373e-07, "loss": 0.6921, "step": 3063 }, { "epoch": 0.9452414005861484, "grad_norm": 0.0, "learning_rate": 1.5609694576834698e-07, "loss": 0.682, "step": 3064 }, { "epoch": 0.9455498997377757, "grad_norm": 0.0, "learning_rate": 1.5434265944763717e-07, "loss": 0.609, "step": 3065 }, { "epoch": 0.945858398889403, "grad_norm": 0.0, "learning_rate": 1.5259820996820884e-07, "loss": 0.6423, "step": 3066 }, { "epoch": 0.9461668980410304, "grad_norm": 0.0, "learning_rate": 1.5086359907295124e-07, "loss": 0.6499, "step": 3067 }, { "epoch": 0.9464753971926577, "grad_norm": 0.0, "learning_rate": 1.4913882849492022e-07, "loss": 0.6977, "step": 3068 }, { "epoch": 0.9467838963442851, "grad_norm": 0.0, "learning_rate": 1.4742389995734519e-07, "loss": 0.7011, "step": 3069 }, { "epoch": 0.9470923954959124, "grad_norm": 0.0, "learning_rate": 1.457188151736144e-07, "loss": 0.6401, "step": 3070 }, { "epoch": 0.9474008946475397, "grad_norm": 0.0, "learning_rate": 1.4402357584728966e-07, "loss": 0.7007, "step": 3071 }, { "epoch": 0.9477093937991671, "grad_norm": 0.0, "learning_rate": 1.423381836720894e-07, "loss": 0.6718, "step": 3072 }, { "epoch": 0.9480178929507944, "grad_norm": 0.0, "learning_rate": 1.4066264033190002e-07, "loss": 0.6217, "step": 3073 }, { "epoch": 0.9483263921024218, "grad_norm": 0.0, "learning_rate": 1.3899694750076241e-07, "loss": 0.6629, "step": 3074 }, { "epoch": 0.9486348912540491, "grad_norm": 0.0, "learning_rate": 1.3734110684287761e-07, "loss": 0.6666, "step": 3075 }, { "epoch": 0.9489433904056764, "grad_norm": 0.0, "learning_rate": 1.3569512001260444e-07, "loss": 0.7184, "step": 3076 }, { "epoch": 0.9492518895573037, "grad_norm": 0.0, "learning_rate": 1.3405898865445522e-07, "loss": 0.6494, "step": 3077 }, { "epoch": 0.949560388708931, "grad_norm": 0.0, "learning_rate": 1.3243271440309679e-07, "loss": 0.7152, "step": 3078 }, { "epoch": 0.9498688878605583, "grad_norm": 0.0, "learning_rate": 1.3081629888334834e-07, "loss": 0.6792, "step": 3079 }, { "epoch": 0.9501773870121857, "grad_norm": 0.0, "learning_rate": 1.292097437101747e-07, "loss": 0.6981, "step": 3080 }, { "epoch": 0.950485886163813, "grad_norm": 0.0, "learning_rate": 1.276130504886963e-07, "loss": 0.6675, "step": 3081 }, { "epoch": 0.9507943853154404, "grad_norm": 0.0, "learning_rate": 1.260262208141738e-07, "loss": 0.6856, "step": 3082 }, { "epoch": 0.9511028844670677, "grad_norm": 0.0, "learning_rate": 1.2444925627201786e-07, "loss": 0.7035, "step": 3083 }, { "epoch": 0.951411383618695, "grad_norm": 0.0, "learning_rate": 1.2288215843777928e-07, "loss": 0.652, "step": 3084 }, { "epoch": 0.9517198827703224, "grad_norm": 0.0, "learning_rate": 1.2132492887715454e-07, "loss": 0.7783, "step": 3085 }, { "epoch": 0.9520283819219497, "grad_norm": 0.0, "learning_rate": 1.1977756914597792e-07, "loss": 0.682, "step": 3086 }, { "epoch": 0.9523368810735771, "grad_norm": 0.0, "learning_rate": 1.1824008079022286e-07, "loss": 0.6684, "step": 3087 }, { "epoch": 0.9526453802252044, "grad_norm": 0.0, "learning_rate": 1.1671246534600167e-07, "loss": 0.6782, "step": 3088 }, { "epoch": 0.9529538793768317, "grad_norm": 0.0, "learning_rate": 1.1519472433956125e-07, "loss": 0.6979, "step": 3089 }, { "epoch": 0.9532623785284591, "grad_norm": 0.0, "learning_rate": 1.136868592872864e-07, "loss": 0.6788, "step": 3090 }, { "epoch": 0.9535708776800864, "grad_norm": 0.0, "learning_rate": 1.1218887169568871e-07, "loss": 0.7168, "step": 3091 }, { "epoch": 0.9538793768317138, "grad_norm": 0.0, "learning_rate": 1.107007630614143e-07, "loss": 0.6758, "step": 3092 }, { "epoch": 0.9541878759833411, "grad_norm": 0.0, "learning_rate": 1.0922253487124279e-07, "loss": 0.7599, "step": 3093 }, { "epoch": 0.9544963751349684, "grad_norm": 0.0, "learning_rate": 1.0775418860207498e-07, "loss": 0.6001, "step": 3094 }, { "epoch": 0.9548048742865957, "grad_norm": 0.0, "learning_rate": 1.0629572572094515e-07, "loss": 0.6489, "step": 3095 }, { "epoch": 0.955113373438223, "grad_norm": 0.0, "learning_rate": 1.048471476850077e-07, "loss": 0.6668, "step": 3096 }, { "epoch": 0.9554218725898503, "grad_norm": 0.0, "learning_rate": 1.0340845594154603e-07, "loss": 0.6543, "step": 3097 }, { "epoch": 0.9557303717414777, "grad_norm": 0.0, "learning_rate": 1.0197965192796145e-07, "loss": 0.7041, "step": 3098 }, { "epoch": 0.956038870893105, "grad_norm": 0.0, "learning_rate": 1.0056073707177982e-07, "loss": 0.7049, "step": 3099 }, { "epoch": 0.9563473700447324, "grad_norm": 0.0, "learning_rate": 9.915171279064606e-08, "loss": 0.7802, "step": 3100 }, { "epoch": 0.9566558691963597, "grad_norm": 0.0, "learning_rate": 9.775258049232072e-08, "loss": 0.6623, "step": 3101 }, { "epoch": 0.956964368347987, "grad_norm": 0.0, "learning_rate": 9.636334157468563e-08, "loss": 0.7155, "step": 3102 }, { "epoch": 0.9572728674996144, "grad_norm": 0.0, "learning_rate": 9.498399742573495e-08, "loss": 0.7015, "step": 3103 }, { "epoch": 0.9575813666512417, "grad_norm": 0.0, "learning_rate": 9.361454942357628e-08, "loss": 0.6937, "step": 3104 }, { "epoch": 0.9578898658028691, "grad_norm": 0.0, "learning_rate": 9.225499893643297e-08, "loss": 0.6619, "step": 3105 }, { "epoch": 0.9581983649544964, "grad_norm": 0.0, "learning_rate": 9.090534732263734e-08, "loss": 0.6644, "step": 3106 }, { "epoch": 0.9585068641061237, "grad_norm": 0.0, "learning_rate": 8.956559593063408e-08, "loss": 0.6596, "step": 3107 }, { "epoch": 0.9588153632577511, "grad_norm": 0.0, "learning_rate": 8.823574609897134e-08, "loss": 0.6561, "step": 3108 }, { "epoch": 0.9591238624093784, "grad_norm": 0.0, "learning_rate": 8.691579915631299e-08, "loss": 0.6519, "step": 3109 }, { "epoch": 0.9594323615610058, "grad_norm": 0.0, "learning_rate": 8.560575642141966e-08, "loss": 0.7131, "step": 3110 }, { "epoch": 0.9597408607126331, "grad_norm": 0.0, "learning_rate": 8.430561920316438e-08, "loss": 0.755, "step": 3111 }, { "epoch": 0.9600493598642603, "grad_norm": 0.0, "learning_rate": 8.301538880051808e-08, "loss": 0.6812, "step": 3112 }, { "epoch": 0.9603578590158877, "grad_norm": 0.0, "learning_rate": 8.173506650255625e-08, "loss": 0.6656, "step": 3113 }, { "epoch": 0.960666358167515, "grad_norm": 0.0, "learning_rate": 8.046465358845568e-08, "loss": 0.6969, "step": 3114 }, { "epoch": 0.9609748573191423, "grad_norm": 0.0, "learning_rate": 7.920415132748993e-08, "loss": 0.6985, "step": 3115 }, { "epoch": 0.9612833564707697, "grad_norm": 0.0, "learning_rate": 7.795356097903495e-08, "loss": 0.654, "step": 3116 }, { "epoch": 0.961591855622397, "grad_norm": 0.0, "learning_rate": 7.671288379256015e-08, "loss": 0.783, "step": 3117 }, { "epoch": 0.9619003547740244, "grad_norm": 0.0, "learning_rate": 7.548212100763063e-08, "loss": 0.7332, "step": 3118 }, { "epoch": 0.9622088539256517, "grad_norm": 0.0, "learning_rate": 7.426127385390947e-08, "loss": 0.6841, "step": 3119 }, { "epoch": 0.962517353077279, "grad_norm": 0.0, "learning_rate": 7.30503435511487e-08, "loss": 0.6868, "step": 3120 }, { "epoch": 0.9628258522289064, "grad_norm": 0.0, "learning_rate": 7.184933130919614e-08, "loss": 0.6421, "step": 3121 }, { "epoch": 0.9631343513805337, "grad_norm": 0.0, "learning_rate": 7.065823832798524e-08, "loss": 0.7666, "step": 3122 }, { "epoch": 0.963442850532161, "grad_norm": 0.0, "learning_rate": 6.947706579754632e-08, "loss": 0.701, "step": 3123 }, { "epoch": 0.9637513496837884, "grad_norm": 0.0, "learning_rate": 6.830581489799204e-08, "loss": 1.0016, "step": 3124 }, { "epoch": 0.9640598488354157, "grad_norm": 0.0, "learning_rate": 6.714448679952524e-08, "loss": 0.6931, "step": 3125 }, { "epoch": 0.9643683479870431, "grad_norm": 0.0, "learning_rate": 6.599308266243443e-08, "loss": 0.6796, "step": 3126 }, { "epoch": 0.9646768471386704, "grad_norm": 0.0, "learning_rate": 6.485160363709053e-08, "loss": 0.6407, "step": 3127 }, { "epoch": 0.9649853462902978, "grad_norm": 0.0, "learning_rate": 6.37200508639535e-08, "loss": 0.7245, "step": 3128 }, { "epoch": 0.965293845441925, "grad_norm": 0.0, "learning_rate": 6.259842547356231e-08, "loss": 0.6961, "step": 3129 }, { "epoch": 0.9656023445935523, "grad_norm": 0.0, "learning_rate": 6.148672858653615e-08, "loss": 0.7058, "step": 3130 }, { "epoch": 0.9659108437451797, "grad_norm": 0.0, "learning_rate": 6.038496131357874e-08, "loss": 0.6755, "step": 3131 }, { "epoch": 0.966219342896807, "grad_norm": 0.0, "learning_rate": 5.929312475546845e-08, "loss": 0.7858, "step": 3132 }, { "epoch": 0.9665278420484343, "grad_norm": 0.0, "learning_rate": 5.8211220003067114e-08, "loss": 0.6399, "step": 3133 }, { "epoch": 0.9668363412000617, "grad_norm": 0.0, "learning_rate": 5.7139248137307865e-08, "loss": 0.6506, "step": 3134 }, { "epoch": 0.967144840351689, "grad_norm": 0.0, "learning_rate": 5.607721022920398e-08, "loss": 0.6636, "step": 3135 }, { "epoch": 0.9674533395033164, "grad_norm": 0.0, "learning_rate": 5.5025107339842234e-08, "loss": 0.7279, "step": 3136 }, { "epoch": 0.9677618386549437, "grad_norm": 0.0, "learning_rate": 5.3982940520382885e-08, "loss": 0.659, "step": 3137 }, { "epoch": 0.968070337806571, "grad_norm": 0.0, "learning_rate": 5.295071081206194e-08, "loss": 0.6571, "step": 3138 }, { "epoch": 0.9683788369581984, "grad_norm": 0.0, "learning_rate": 5.1928419246181085e-08, "loss": 0.6417, "step": 3139 }, { "epoch": 0.9686873361098257, "grad_norm": 0.0, "learning_rate": 5.091606684411998e-08, "loss": 0.6781, "step": 3140 }, { "epoch": 0.968995835261453, "grad_norm": 0.0, "learning_rate": 4.9913654617322894e-08, "loss": 0.7228, "step": 3141 }, { "epoch": 0.9693043344130804, "grad_norm": 0.0, "learning_rate": 4.892118356730313e-08, "loss": 0.9788, "step": 3142 }, { "epoch": 0.9696128335647077, "grad_norm": 0.0, "learning_rate": 4.79386546856464e-08, "loss": 0.6991, "step": 3143 }, { "epoch": 0.9699213327163351, "grad_norm": 0.0, "learning_rate": 4.696606895399858e-08, "loss": 0.6679, "step": 3144 }, { "epoch": 0.9702298318679624, "grad_norm": 0.0, "learning_rate": 4.60034273440757e-08, "loss": 0.7438, "step": 3145 }, { "epoch": 0.9705383310195898, "grad_norm": 0.0, "learning_rate": 4.505073081765843e-08, "loss": 0.6752, "step": 3146 }, { "epoch": 0.970846830171217, "grad_norm": 0.0, "learning_rate": 4.4107980326589806e-08, "loss": 0.6864, "step": 3147 }, { "epoch": 0.9711553293228443, "grad_norm": 0.0, "learning_rate": 4.317517681277528e-08, "loss": 0.6253, "step": 3148 }, { "epoch": 0.9714638284744717, "grad_norm": 0.0, "learning_rate": 4.225232120818268e-08, "loss": 0.6613, "step": 3149 }, { "epoch": 0.971772327626099, "grad_norm": 0.0, "learning_rate": 4.133941443484335e-08, "loss": 0.6816, "step": 3150 }, { "epoch": 0.9720808267777263, "grad_norm": 0.0, "learning_rate": 4.043645740484436e-08, "loss": 0.6524, "step": 3151 }, { "epoch": 0.9723893259293537, "grad_norm": 0.0, "learning_rate": 3.95434510203363e-08, "loss": 0.6208, "step": 3152 }, { "epoch": 0.972697825080981, "grad_norm": 0.0, "learning_rate": 3.866039617352324e-08, "loss": 0.7372, "step": 3153 }, { "epoch": 0.9730063242326084, "grad_norm": 0.0, "learning_rate": 3.778729374667278e-08, "loss": 0.7214, "step": 3154 }, { "epoch": 0.9733148233842357, "grad_norm": 0.0, "learning_rate": 3.6924144612102695e-08, "loss": 0.6511, "step": 3155 }, { "epoch": 0.973623322535863, "grad_norm": 0.0, "learning_rate": 3.6070949632190934e-08, "loss": 0.6205, "step": 3156 }, { "epoch": 0.9739318216874904, "grad_norm": 0.0, "learning_rate": 3.5227709659367834e-08, "loss": 0.7322, "step": 3157 }, { "epoch": 0.9742403208391177, "grad_norm": 0.0, "learning_rate": 3.439442553611727e-08, "loss": 0.9699, "step": 3158 }, { "epoch": 0.974548819990745, "grad_norm": 0.0, "learning_rate": 3.3571098094978825e-08, "loss": 0.6859, "step": 3159 }, { "epoch": 0.9748573191423724, "grad_norm": 0.0, "learning_rate": 3.275772815854228e-08, "loss": 0.69, "step": 3160 }, { "epoch": 0.9751658182939997, "grad_norm": 0.0, "learning_rate": 3.195431653944869e-08, "loss": 0.6999, "step": 3161 }, { "epoch": 0.9754743174456271, "grad_norm": 0.0, "learning_rate": 3.1160864040391534e-08, "loss": 0.6989, "step": 3162 }, { "epoch": 0.9757828165972544, "grad_norm": 0.0, "learning_rate": 3.0377371454112234e-08, "loss": 0.6986, "step": 3163 }, { "epoch": 0.9760913157488816, "grad_norm": 0.0, "learning_rate": 2.9603839563400183e-08, "loss": 0.6149, "step": 3164 }, { "epoch": 0.976399814900509, "grad_norm": 0.0, "learning_rate": 2.8840269141097165e-08, "loss": 0.729, "step": 3165 }, { "epoch": 0.9767083140521363, "grad_norm": 0.0, "learning_rate": 2.8086660950088497e-08, "loss": 0.6686, "step": 3166 }, { "epoch": 0.9770168132037637, "grad_norm": 0.0, "learning_rate": 2.7343015743307443e-08, "loss": 0.6762, "step": 3167 }, { "epoch": 0.977325312355391, "grad_norm": 0.0, "learning_rate": 2.660933426373413e-08, "loss": 0.7482, "step": 3168 }, { "epoch": 0.9776338115070183, "grad_norm": 0.0, "learning_rate": 2.5885617244392204e-08, "loss": 0.7199, "step": 3169 }, { "epoch": 0.9779423106586457, "grad_norm": 0.0, "learning_rate": 2.5171865408352147e-08, "loss": 0.7257, "step": 3170 }, { "epoch": 0.978250809810273, "grad_norm": 0.0, "learning_rate": 2.4468079468724647e-08, "loss": 0.6469, "step": 3171 }, { "epoch": 0.9785593089619004, "grad_norm": 0.0, "learning_rate": 2.3774260128667238e-08, "loss": 0.8081, "step": 3172 }, { "epoch": 0.9788678081135277, "grad_norm": 0.0, "learning_rate": 2.3090408081376525e-08, "loss": 0.6983, "step": 3173 }, { "epoch": 0.979176307265155, "grad_norm": 0.0, "learning_rate": 2.2416524010092644e-08, "loss": 0.6102, "step": 3174 }, { "epoch": 0.9794848064167824, "grad_norm": 0.0, "learning_rate": 2.1752608588097024e-08, "loss": 0.6399, "step": 3175 }, { "epoch": 0.9797933055684097, "grad_norm": 0.0, "learning_rate": 2.1098662478710173e-08, "loss": 0.6928, "step": 3176 }, { "epoch": 0.980101804720037, "grad_norm": 0.0, "learning_rate": 2.0454686335292794e-08, "loss": 0.8059, "step": 3177 }, { "epoch": 0.9804103038716644, "grad_norm": 0.0, "learning_rate": 1.9820680801243554e-08, "loss": 0.6157, "step": 3178 }, { "epoch": 0.9807188030232917, "grad_norm": 0.0, "learning_rate": 1.9196646510001303e-08, "loss": 0.6005, "step": 3179 }, { "epoch": 0.9810273021749191, "grad_norm": 0.0, "learning_rate": 1.8582584085041765e-08, "loss": 0.7006, "step": 3180 }, { "epoch": 0.9813358013265463, "grad_norm": 0.0, "learning_rate": 1.797849413987529e-08, "loss": 0.7217, "step": 3181 }, { "epoch": 0.9816443004781736, "grad_norm": 0.0, "learning_rate": 1.7384377278053533e-08, "loss": 0.7453, "step": 3182 }, { "epoch": 0.981952799629801, "grad_norm": 0.0, "learning_rate": 1.680023409316056e-08, "loss": 0.7559, "step": 3183 }, { "epoch": 0.9822612987814283, "grad_norm": 0.0, "learning_rate": 1.6226065168816198e-08, "loss": 0.6353, "step": 3184 }, { "epoch": 0.9825697979330557, "grad_norm": 0.0, "learning_rate": 1.5661871078674895e-08, "loss": 0.7237, "step": 3185 }, { "epoch": 0.982878297084683, "grad_norm": 0.0, "learning_rate": 1.510765238642575e-08, "loss": 0.7412, "step": 3186 }, { "epoch": 0.9831867962363103, "grad_norm": 0.0, "learning_rate": 1.4563409645792503e-08, "loss": 0.6923, "step": 3187 }, { "epoch": 0.9834952953879377, "grad_norm": 0.0, "learning_rate": 1.4029143400529077e-08, "loss": 0.662, "step": 3188 }, { "epoch": 0.983803794539565, "grad_norm": 0.0, "learning_rate": 1.3504854184422932e-08, "loss": 0.5827, "step": 3189 }, { "epoch": 0.9841122936911924, "grad_norm": 0.0, "learning_rate": 1.2990542521295057e-08, "loss": 0.6758, "step": 3190 }, { "epoch": 0.9844207928428197, "grad_norm": 0.0, "learning_rate": 1.2486208924996634e-08, "loss": 0.7292, "step": 3191 }, { "epoch": 0.984729291994447, "grad_norm": 0.0, "learning_rate": 1.1991853899409044e-08, "loss": 0.6786, "step": 3192 }, { "epoch": 0.9850377911460744, "grad_norm": 0.0, "learning_rate": 1.1507477938443868e-08, "loss": 0.6944, "step": 3193 }, { "epoch": 0.9853462902977017, "grad_norm": 0.0, "learning_rate": 1.1033081526045098e-08, "loss": 0.6941, "step": 3194 }, { "epoch": 0.985654789449329, "grad_norm": 0.0, "learning_rate": 1.0568665136183597e-08, "loss": 0.6545, "step": 3195 }, { "epoch": 0.9859632886009564, "grad_norm": 0.0, "learning_rate": 1.0114229232859319e-08, "loss": 0.7082, "step": 3196 }, { "epoch": 0.9862717877525837, "grad_norm": 0.0, "learning_rate": 9.669774270102406e-09, "loss": 0.6543, "step": 3197 }, { "epoch": 0.9865802869042111, "grad_norm": 0.0, "learning_rate": 9.235300691969872e-09, "loss": 0.6542, "step": 3198 }, { "epoch": 0.9868887860558383, "grad_norm": 0.0, "learning_rate": 8.81080893254449e-09, "loss": 0.6826, "step": 3199 }, { "epoch": 0.9871972852074656, "grad_norm": 0.0, "learning_rate": 8.396299415939223e-09, "loss": 0.6522, "step": 3200 }, { "epoch": 0.987505784359093, "grad_norm": 0.0, "learning_rate": 7.991772556291689e-09, "loss": 0.6831, "step": 3201 }, { "epoch": 0.9878142835107203, "grad_norm": 0.0, "learning_rate": 7.597228757767472e-09, "loss": 0.6952, "step": 3202 }, { "epoch": 0.9881227826623477, "grad_norm": 0.0, "learning_rate": 7.212668414556811e-09, "loss": 0.7034, "step": 3203 }, { "epoch": 0.988431281813975, "grad_norm": 0.0, "learning_rate": 6.8380919108757e-09, "loss": 0.7407, "step": 3204 }, { "epoch": 0.9887397809656023, "grad_norm": 0.0, "learning_rate": 6.473499620965884e-09, "loss": 0.7395, "step": 3205 }, { "epoch": 0.9890482801172297, "grad_norm": 0.0, "learning_rate": 6.11889190909043e-09, "loss": 0.7463, "step": 3206 }, { "epoch": 0.989356779268857, "grad_norm": 0.0, "learning_rate": 5.774269129543708e-09, "loss": 0.7032, "step": 3207 }, { "epoch": 0.9896652784204844, "grad_norm": 0.0, "learning_rate": 5.439631626635855e-09, "loss": 0.6845, "step": 3208 }, { "epoch": 0.9899737775721117, "grad_norm": 0.0, "learning_rate": 5.114979734707204e-09, "loss": 0.6526, "step": 3209 }, { "epoch": 0.990282276723739, "grad_norm": 0.0, "learning_rate": 4.8003137781194035e-09, "loss": 0.6862, "step": 3210 }, { "epoch": 0.9905907758753664, "grad_norm": 0.0, "learning_rate": 4.495634071254307e-09, "loss": 0.6675, "step": 3211 }, { "epoch": 0.9908992750269937, "grad_norm": 0.0, "learning_rate": 4.200940918520635e-09, "loss": 0.6661, "step": 3212 }, { "epoch": 0.991207774178621, "grad_norm": 0.0, "learning_rate": 3.916234614346204e-09, "loss": 0.685, "step": 3213 }, { "epoch": 0.9915162733302484, "grad_norm": 0.0, "learning_rate": 3.6415154431845846e-09, "loss": 0.6182, "step": 3214 }, { "epoch": 0.9918247724818757, "grad_norm": 0.0, "learning_rate": 3.3767836795062235e-09, "loss": 0.7722, "step": 3215 }, { "epoch": 0.992133271633503, "grad_norm": 0.0, "learning_rate": 3.1220395878084343e-09, "loss": 0.6592, "step": 3216 }, { "epoch": 0.9924417707851303, "grad_norm": 0.0, "learning_rate": 2.8772834226054036e-09, "loss": 0.6126, "step": 3217 }, { "epoch": 0.9927502699367576, "grad_norm": 0.0, "learning_rate": 2.6425154284359655e-09, "loss": 0.7914, "step": 3218 }, { "epoch": 0.993058769088385, "grad_norm": 0.0, "learning_rate": 2.4177358398558283e-09, "loss": 0.6343, "step": 3219 }, { "epoch": 0.9933672682400123, "grad_norm": 0.0, "learning_rate": 2.2029448814431254e-09, "loss": 0.6759, "step": 3220 }, { "epoch": 0.9936757673916397, "grad_norm": 0.0, "learning_rate": 1.9981427677995267e-09, "loss": 0.7549, "step": 3221 }, { "epoch": 0.993984266543267, "grad_norm": 0.0, "learning_rate": 1.8033297035402463e-09, "loss": 0.7574, "step": 3222 }, { "epoch": 0.9942927656948943, "grad_norm": 0.0, "learning_rate": 1.6185058833062538e-09, "loss": 0.6929, "step": 3223 }, { "epoch": 0.9946012648465217, "grad_norm": 0.0, "learning_rate": 1.4436714917542839e-09, "loss": 0.6079, "step": 3224 }, { "epoch": 0.994909763998149, "grad_norm": 0.0, "learning_rate": 1.278826703563496e-09, "loss": 0.6822, "step": 3225 }, { "epoch": 0.9952182631497763, "grad_norm": 0.0, "learning_rate": 1.1239716834310354e-09, "loss": 0.9537, "step": 3226 }, { "epoch": 0.9955267623014037, "grad_norm": 0.0, "learning_rate": 9.791065860720316e-10, "loss": 0.6772, "step": 3227 }, { "epoch": 0.995835261453031, "grad_norm": 0.0, "learning_rate": 8.442315562229298e-10, "loss": 0.686, "step": 3228 }, { "epoch": 0.9961437606046584, "grad_norm": 0.0, "learning_rate": 7.193467286370492e-10, "loss": 0.68, "step": 3229 }, { "epoch": 0.9964522597562857, "grad_norm": 0.0, "learning_rate": 6.044522280890252e-10, "loss": 0.6778, "step": 3230 }, { "epoch": 0.996760758907913, "grad_norm": 0.0, "learning_rate": 4.995481693681469e-10, "loss": 0.6982, "step": 3231 }, { "epoch": 0.9970692580595404, "grad_norm": 0.0, "learning_rate": 4.0463465728723947e-10, "loss": 0.6542, "step": 3232 }, { "epoch": 0.9973777572111676, "grad_norm": 0.0, "learning_rate": 3.1971178667267176e-10, "loss": 0.6921, "step": 3233 }, { "epoch": 0.997686256362795, "grad_norm": 0.0, "learning_rate": 2.4477964237212823e-10, "loss": 0.71, "step": 3234 }, { "epoch": 0.9979947555144223, "grad_norm": 0.0, "learning_rate": 1.7983829925016793e-10, "loss": 0.7023, "step": 3235 }, { "epoch": 0.9983032546660496, "grad_norm": 0.0, "learning_rate": 1.248878221904448e-10, "loss": 0.6873, "step": 3236 }, { "epoch": 0.998611753817677, "grad_norm": 0.0, "learning_rate": 7.992826609459769e-11, "loss": 0.6443, "step": 3237 }, { "epoch": 0.9989202529693043, "grad_norm": 0.0, "learning_rate": 4.4959675881139916e-11, "loss": 0.6832, "step": 3238 }, { "epoch": 0.9992287521209317, "grad_norm": 0.0, "learning_rate": 1.9982086487679853e-11, "loss": 0.6543, "step": 3239 }, { "epoch": 0.999537251272559, "grad_norm": 0.0, "learning_rate": 4.995522869810643e-12, "loss": 0.7005, "step": 3240 }, { "epoch": 0.9998457504241863, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.8583, "step": 3241 }, { "epoch": 0.9998457504241863, "step": 3241, "total_flos": 1.4116677844331397e+19, "train_loss": 0.5099010296678588, "train_runtime": 75843.298, "train_samples_per_second": 5.47, "train_steps_per_second": 0.043 } ], "logging_steps": 1.0, "max_steps": 3241, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.4116677844331397e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }