|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 2514, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002386634844868735, |
|
"grad_norm": 79.25410461425781, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"loss": 9.018, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00477326968973747, |
|
"grad_norm": 69.27806854248047, |
|
"learning_rate": 4.7619047619047615e-06, |
|
"loss": 8.1397, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.007159904534606206, |
|
"grad_norm": 44.50214767456055, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 6.5055, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00954653937947494, |
|
"grad_norm": 22.070268630981445, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 5.3274, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011933174224343675, |
|
"grad_norm": 17.811525344848633, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 5.0629, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.014319809069212411, |
|
"grad_norm": 13.21521282196045, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 4.8887, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.016706443914081145, |
|
"grad_norm": 85.0751724243164, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 4.6865, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01909307875894988, |
|
"grad_norm": 11.005032539367676, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 4.4609, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.021479713603818614, |
|
"grad_norm": 9.891936302185059, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 4.4208, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02386634844868735, |
|
"grad_norm": 6.92100715637207, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 4.3251, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.026252983293556086, |
|
"grad_norm": 4.971158504486084, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 3.9813, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.028639618138424822, |
|
"grad_norm": 4.140079021453857, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 4.0783, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.031026252983293555, |
|
"grad_norm": 4.761113166809082, |
|
"learning_rate": 3.095238095238095e-05, |
|
"loss": 4.1418, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03341288782816229, |
|
"grad_norm": 5.746503829956055, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 3.8944, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03579952267303103, |
|
"grad_norm": 4.554972171783447, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 3.7807, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03818615751789976, |
|
"grad_norm": 3.873955726623535, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 3.607, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0405727923627685, |
|
"grad_norm": 3.376633644104004, |
|
"learning_rate": 4.047619047619048e-05, |
|
"loss": 3.4685, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04295942720763723, |
|
"grad_norm": 5.888181686401367, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 3.4832, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.045346062052505964, |
|
"grad_norm": 5.094118595123291, |
|
"learning_rate": 4.523809523809524e-05, |
|
"loss": 3.3647, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0477326968973747, |
|
"grad_norm": 4.940067291259766, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 3.2268, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.050119331742243436, |
|
"grad_norm": 3.514155149459839, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1229, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05250596658711217, |
|
"grad_norm": 3.922811985015869, |
|
"learning_rate": 5.2380952380952384e-05, |
|
"loss": 3.0612, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05489260143198091, |
|
"grad_norm": 3.203274726867676, |
|
"learning_rate": 5.4761904761904766e-05, |
|
"loss": 2.8537, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.057279236276849645, |
|
"grad_norm": 2.8945529460906982, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 2.9128, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.059665871121718374, |
|
"grad_norm": 3.287409543991089, |
|
"learning_rate": 5.9523809523809524e-05, |
|
"loss": 2.8958, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06205250596658711, |
|
"grad_norm": 3.5249319076538086, |
|
"learning_rate": 6.19047619047619e-05, |
|
"loss": 2.8925, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06443914081145585, |
|
"grad_norm": 3.4028635025024414, |
|
"learning_rate": 6.428571428571429e-05, |
|
"loss": 2.8153, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06682577565632458, |
|
"grad_norm": 3.1509742736816406, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.7562, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06921241050119331, |
|
"grad_norm": 2.745244264602661, |
|
"learning_rate": 6.904761904761905e-05, |
|
"loss": 2.6552, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07159904534606205, |
|
"grad_norm": 2.562229633331299, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 2.6686, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07398568019093078, |
|
"grad_norm": 3.0172386169433594, |
|
"learning_rate": 7.380952380952382e-05, |
|
"loss": 2.782, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07637231503579953, |
|
"grad_norm": 3.038167953491211, |
|
"learning_rate": 7.619047619047618e-05, |
|
"loss": 2.6845, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07875894988066826, |
|
"grad_norm": 3.3530476093292236, |
|
"learning_rate": 7.857142857142858e-05, |
|
"loss": 2.5982, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.081145584725537, |
|
"grad_norm": 2.9540939331054688, |
|
"learning_rate": 8.095238095238096e-05, |
|
"loss": 2.5005, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08353221957040573, |
|
"grad_norm": 2.9407269954681396, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 2.5421, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08591885441527446, |
|
"grad_norm": 2.8846545219421387, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 2.5134, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0883054892601432, |
|
"grad_norm": 3.2429230213165283, |
|
"learning_rate": 8.80952380952381e-05, |
|
"loss": 2.4542, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09069212410501193, |
|
"grad_norm": 2.5776901245117188, |
|
"learning_rate": 9.047619047619048e-05, |
|
"loss": 2.5908, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09307875894988067, |
|
"grad_norm": 2.4036340713500977, |
|
"learning_rate": 9.285714285714286e-05, |
|
"loss": 2.4376, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0954653937947494, |
|
"grad_norm": 3.9573254585266113, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 2.4042, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09785202863961814, |
|
"grad_norm": 3.6064600944519043, |
|
"learning_rate": 9.761904761904762e-05, |
|
"loss": 2.5074, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10023866348448687, |
|
"grad_norm": 3.3975508213043213, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2322, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1026252983293556, |
|
"grad_norm": 2.3529391288757324, |
|
"learning_rate": 9.999961058466053e-05, |
|
"loss": 2.4629, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.10501193317422435, |
|
"grad_norm": 2.434084892272949, |
|
"learning_rate": 9.999844234470782e-05, |
|
"loss": 2.3296, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10739856801909307, |
|
"grad_norm": 2.450005054473877, |
|
"learning_rate": 9.999649529833915e-05, |
|
"loss": 2.3464, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.10978520286396182, |
|
"grad_norm": 3.4513914585113525, |
|
"learning_rate": 9.999376947588288e-05, |
|
"loss": 2.4644, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11217183770883055, |
|
"grad_norm": 3.2405099868774414, |
|
"learning_rate": 9.999026491979808e-05, |
|
"loss": 2.3977, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.11455847255369929, |
|
"grad_norm": 1.8948777914047241, |
|
"learning_rate": 9.99859816846739e-05, |
|
"loss": 2.3372, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11694510739856802, |
|
"grad_norm": 2.3878233432769775, |
|
"learning_rate": 9.998091983722863e-05, |
|
"loss": 2.3408, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11933174224343675, |
|
"grad_norm": 2.373782157897949, |
|
"learning_rate": 9.99750794563087e-05, |
|
"loss": 2.1927, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12171837708830549, |
|
"grad_norm": 2.473146677017212, |
|
"learning_rate": 9.996846063288747e-05, |
|
"loss": 2.2674, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.12410501193317422, |
|
"grad_norm": 2.17854905128479, |
|
"learning_rate": 9.996106347006379e-05, |
|
"loss": 2.4093, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.12649164677804295, |
|
"grad_norm": 2.668506622314453, |
|
"learning_rate": 9.99528880830604e-05, |
|
"loss": 2.234, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.1288782816229117, |
|
"grad_norm": 2.30295991897583, |
|
"learning_rate": 9.994393459922218e-05, |
|
"loss": 2.174, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.13126491646778043, |
|
"grad_norm": 2.299704074859619, |
|
"learning_rate": 9.993420315801406e-05, |
|
"loss": 2.1369, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13365155131264916, |
|
"grad_norm": 4.072019100189209, |
|
"learning_rate": 9.992369391101895e-05, |
|
"loss": 2.2051, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1360381861575179, |
|
"grad_norm": 2.203756809234619, |
|
"learning_rate": 9.991240702193532e-05, |
|
"loss": 2.3608, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.13842482100238662, |
|
"grad_norm": 2.2594192028045654, |
|
"learning_rate": 9.990034266657467e-05, |
|
"loss": 2.2503, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.14081145584725538, |
|
"grad_norm": 2.214170217514038, |
|
"learning_rate": 9.988750103285883e-05, |
|
"loss": 2.1698, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1431980906921241, |
|
"grad_norm": 2.1277706623077393, |
|
"learning_rate": 9.987388232081694e-05, |
|
"loss": 2.2199, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14558472553699284, |
|
"grad_norm": 2.1861696243286133, |
|
"learning_rate": 9.985948674258243e-05, |
|
"loss": 2.1487, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.14797136038186157, |
|
"grad_norm": 2.062450647354126, |
|
"learning_rate": 9.984431452238967e-05, |
|
"loss": 2.2716, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.15035799522673032, |
|
"grad_norm": 2.335073471069336, |
|
"learning_rate": 9.982836589657043e-05, |
|
"loss": 2.1853, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.15274463007159905, |
|
"grad_norm": 1.7519389390945435, |
|
"learning_rate": 9.981164111355035e-05, |
|
"loss": 2.2452, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15513126491646778, |
|
"grad_norm": 4.789851665496826, |
|
"learning_rate": 9.979414043384485e-05, |
|
"loss": 2.224, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1575178997613365, |
|
"grad_norm": 2.454585552215576, |
|
"learning_rate": 9.977586413005531e-05, |
|
"loss": 2.2943, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15990453460620524, |
|
"grad_norm": 2.2278664112091064, |
|
"learning_rate": 9.975681248686461e-05, |
|
"loss": 2.2947, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.162291169451074, |
|
"grad_norm": 2.118945837020874, |
|
"learning_rate": 9.973698580103285e-05, |
|
"loss": 2.2039, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.16467780429594273, |
|
"grad_norm": 2.1315999031066895, |
|
"learning_rate": 9.971638438139266e-05, |
|
"loss": 2.3493, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.16706443914081145, |
|
"grad_norm": 2.902245283126831, |
|
"learning_rate": 9.96950085488444e-05, |
|
"loss": 2.0854, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16945107398568018, |
|
"grad_norm": 2.160647392272949, |
|
"learning_rate": 9.967285863635112e-05, |
|
"loss": 2.3386, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.1718377088305489, |
|
"grad_norm": 3.592740774154663, |
|
"learning_rate": 9.964993498893349e-05, |
|
"loss": 2.2387, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.17422434367541767, |
|
"grad_norm": 3.9422338008880615, |
|
"learning_rate": 9.962623796366429e-05, |
|
"loss": 2.2304, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.1766109785202864, |
|
"grad_norm": 3.213841438293457, |
|
"learning_rate": 9.960176792966289e-05, |
|
"loss": 2.1835, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.17899761336515513, |
|
"grad_norm": 2.4797472953796387, |
|
"learning_rate": 9.95765252680896e-05, |
|
"loss": 2.2491, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.18138424821002386, |
|
"grad_norm": 2.0377020835876465, |
|
"learning_rate": 9.95505103721396e-05, |
|
"loss": 2.1921, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.18377088305489261, |
|
"grad_norm": 2.2581214904785156, |
|
"learning_rate": 9.952372364703687e-05, |
|
"loss": 2.2655, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.18615751789976134, |
|
"grad_norm": 2.89260196685791, |
|
"learning_rate": 9.949616551002787e-05, |
|
"loss": 2.1804, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.18854415274463007, |
|
"grad_norm": 2.4222378730773926, |
|
"learning_rate": 9.946783639037504e-05, |
|
"loss": 2.1525, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1909307875894988, |
|
"grad_norm": 2.1491341590881348, |
|
"learning_rate": 9.943873672935014e-05, |
|
"loss": 2.1605, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19331742243436753, |
|
"grad_norm": 1.9782716035842896, |
|
"learning_rate": 9.940886698022734e-05, |
|
"loss": 2.0402, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.1957040572792363, |
|
"grad_norm": 1.9801812171936035, |
|
"learning_rate": 9.93782276082762e-05, |
|
"loss": 2.1493, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.19809069212410502, |
|
"grad_norm": 1.7495440244674683, |
|
"learning_rate": 9.934681909075434e-05, |
|
"loss": 2.1087, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.20047732696897375, |
|
"grad_norm": 1.9998902082443237, |
|
"learning_rate": 9.931464191690015e-05, |
|
"loss": 2.1841, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.20286396181384247, |
|
"grad_norm": 2.2117197513580322, |
|
"learning_rate": 9.928169658792498e-05, |
|
"loss": 2.1848, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2052505966587112, |
|
"grad_norm": 2.210057497024536, |
|
"learning_rate": 9.924798361700553e-05, |
|
"loss": 2.1324, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.20763723150357996, |
|
"grad_norm": 1.9545291662216187, |
|
"learning_rate": 9.92135035292757e-05, |
|
"loss": 2.1454, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.2100238663484487, |
|
"grad_norm": 2.3102903366088867, |
|
"learning_rate": 9.91782568618185e-05, |
|
"loss": 2.183, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.21241050119331742, |
|
"grad_norm": 1.937774419784546, |
|
"learning_rate": 9.914224416365764e-05, |
|
"loss": 2.2033, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.21479713603818615, |
|
"grad_norm": 1.8689621686935425, |
|
"learning_rate": 9.910546599574902e-05, |
|
"loss": 2.3252, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2171837708830549, |
|
"grad_norm": 2.4020369052886963, |
|
"learning_rate": 9.906792293097194e-05, |
|
"loss": 2.1485, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.21957040572792363, |
|
"grad_norm": 4.170408248901367, |
|
"learning_rate": 9.90296155541202e-05, |
|
"loss": 2.1413, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.22195704057279236, |
|
"grad_norm": 1.9683239459991455, |
|
"learning_rate": 9.899054446189304e-05, |
|
"loss": 1.9998, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.2243436754176611, |
|
"grad_norm": 1.9885108470916748, |
|
"learning_rate": 9.895071026288574e-05, |
|
"loss": 2.0373, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.22673031026252982, |
|
"grad_norm": 2.297595977783203, |
|
"learning_rate": 9.891011357758022e-05, |
|
"loss": 2.0459, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.22911694510739858, |
|
"grad_norm": 2.0850205421447754, |
|
"learning_rate": 9.886875503833536e-05, |
|
"loss": 2.2041, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2315035799522673, |
|
"grad_norm": 2.3734419345855713, |
|
"learning_rate": 9.882663528937717e-05, |
|
"loss": 2.1135, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.23389021479713604, |
|
"grad_norm": 2.0763649940490723, |
|
"learning_rate": 9.87837549867887e-05, |
|
"loss": 1.955, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.23627684964200477, |
|
"grad_norm": 2.171724557876587, |
|
"learning_rate": 9.87401147984998e-05, |
|
"loss": 2.2409, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2386634844868735, |
|
"grad_norm": 2.6933743953704834, |
|
"learning_rate": 9.869571540427689e-05, |
|
"loss": 2.1241, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24105011933174225, |
|
"grad_norm": 1.917708396911621, |
|
"learning_rate": 9.865055749571215e-05, |
|
"loss": 2.1745, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.24343675417661098, |
|
"grad_norm": 2.3014838695526123, |
|
"learning_rate": 9.860464177621284e-05, |
|
"loss": 1.9313, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2458233890214797, |
|
"grad_norm": 2.2263333797454834, |
|
"learning_rate": 9.855796896099045e-05, |
|
"loss": 2.1881, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.24821002386634844, |
|
"grad_norm": 2.161557197570801, |
|
"learning_rate": 9.851053977704931e-05, |
|
"loss": 1.9702, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.25059665871121717, |
|
"grad_norm": 2.1806318759918213, |
|
"learning_rate": 9.846235496317555e-05, |
|
"loss": 1.9893, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2529832935560859, |
|
"grad_norm": 2.226500988006592, |
|
"learning_rate": 9.841341526992536e-05, |
|
"loss": 2.0987, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2553699284009546, |
|
"grad_norm": 1.8919053077697754, |
|
"learning_rate": 9.836372145961345e-05, |
|
"loss": 2.1568, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2577565632458234, |
|
"grad_norm": 1.9861871004104614, |
|
"learning_rate": 9.83132743063011e-05, |
|
"loss": 2.1504, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.26014319809069214, |
|
"grad_norm": 2.438837766647339, |
|
"learning_rate": 9.826207459578411e-05, |
|
"loss": 1.9779, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.26252983293556087, |
|
"grad_norm": 2.6724672317504883, |
|
"learning_rate": 9.821012312558058e-05, |
|
"loss": 1.9427, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2649164677804296, |
|
"grad_norm": 1.813621163368225, |
|
"learning_rate": 9.815742070491852e-05, |
|
"loss": 2.1153, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.26730310262529833, |
|
"grad_norm": 2.0258703231811523, |
|
"learning_rate": 9.810396815472314e-05, |
|
"loss": 2.02, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.26968973747016706, |
|
"grad_norm": 1.9743555784225464, |
|
"learning_rate": 9.804976630760419e-05, |
|
"loss": 2.0681, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.2720763723150358, |
|
"grad_norm": 1.8202836513519287, |
|
"learning_rate": 9.799481600784286e-05, |
|
"loss": 1.9733, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.2744630071599045, |
|
"grad_norm": 2.296698808670044, |
|
"learning_rate": 9.793911811137875e-05, |
|
"loss": 2.0407, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.27684964200477324, |
|
"grad_norm": 2.002511501312256, |
|
"learning_rate": 9.788267348579648e-05, |
|
"loss": 2.0666, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.27923627684964203, |
|
"grad_norm": 2.08190655708313, |
|
"learning_rate": 9.782548301031217e-05, |
|
"loss": 2.0803, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.28162291169451076, |
|
"grad_norm": 2.150238513946533, |
|
"learning_rate": 9.776754757575975e-05, |
|
"loss": 2.0132, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2840095465393795, |
|
"grad_norm": 2.2527501583099365, |
|
"learning_rate": 9.770886808457709e-05, |
|
"loss": 2.0905, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2863961813842482, |
|
"grad_norm": 2.284032106399536, |
|
"learning_rate": 9.764944545079196e-05, |
|
"loss": 2.1845, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.28878281622911695, |
|
"grad_norm": 1.89547598361969, |
|
"learning_rate": 9.758928060000778e-05, |
|
"loss": 2.1135, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2911694510739857, |
|
"grad_norm": 2.5930140018463135, |
|
"learning_rate": 9.752837446938915e-05, |
|
"loss": 1.9517, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2935560859188544, |
|
"grad_norm": 1.86557137966156, |
|
"learning_rate": 9.746672800764735e-05, |
|
"loss": 1.9579, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.29594272076372313, |
|
"grad_norm": 1.8821232318878174, |
|
"learning_rate": 9.740434217502547e-05, |
|
"loss": 1.9665, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.29832935560859186, |
|
"grad_norm": 2.0216808319091797, |
|
"learning_rate": 9.734121794328357e-05, |
|
"loss": 2.0612, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.30071599045346065, |
|
"grad_norm": 1.8245267868041992, |
|
"learning_rate": 9.727735629568336e-05, |
|
"loss": 2.0857, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.3031026252983294, |
|
"grad_norm": 1.9001407623291016, |
|
"learning_rate": 9.721275822697306e-05, |
|
"loss": 1.9954, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3054892601431981, |
|
"grad_norm": 1.7628698348999023, |
|
"learning_rate": 9.714742474337186e-05, |
|
"loss": 2.1095, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.30787589498806683, |
|
"grad_norm": 1.687436819076538, |
|
"learning_rate": 9.708135686255416e-05, |
|
"loss": 2.1919, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.31026252983293556, |
|
"grad_norm": 2.219071388244629, |
|
"learning_rate": 9.701455561363379e-05, |
|
"loss": 1.9392, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3126491646778043, |
|
"grad_norm": 1.9545831680297852, |
|
"learning_rate": 9.6947022037148e-05, |
|
"loss": 1.9879, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.315035799522673, |
|
"grad_norm": 2.1863789558410645, |
|
"learning_rate": 9.687875718504126e-05, |
|
"loss": 1.9631, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.31742243436754175, |
|
"grad_norm": 3.1137101650238037, |
|
"learning_rate": 9.680976212064874e-05, |
|
"loss": 2.0387, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3198090692124105, |
|
"grad_norm": 1.9021557569503784, |
|
"learning_rate": 9.674003791867991e-05, |
|
"loss": 2.0447, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3221957040572792, |
|
"grad_norm": 1.8783704042434692, |
|
"learning_rate": 9.666958566520174e-05, |
|
"loss": 2.0777, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.324582338902148, |
|
"grad_norm": 1.910521388053894, |
|
"learning_rate": 9.659840645762175e-05, |
|
"loss": 2.1084, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3269689737470167, |
|
"grad_norm": 1.9456645250320435, |
|
"learning_rate": 9.652650140467093e-05, |
|
"loss": 2.0317, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.32935560859188545, |
|
"grad_norm": 1.8211055994033813, |
|
"learning_rate": 9.645387162638652e-05, |
|
"loss": 2.0386, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3317422434367542, |
|
"grad_norm": 2.032345771789551, |
|
"learning_rate": 9.638051825409453e-05, |
|
"loss": 2.2154, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3341288782816229, |
|
"grad_norm": 1.869388461112976, |
|
"learning_rate": 9.630644243039207e-05, |
|
"loss": 1.9595, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33651551312649164, |
|
"grad_norm": 1.9663021564483643, |
|
"learning_rate": 9.623164530912963e-05, |
|
"loss": 2.1678, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.33890214797136037, |
|
"grad_norm": 1.684556484222412, |
|
"learning_rate": 9.615612805539305e-05, |
|
"loss": 1.9458, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.3412887828162291, |
|
"grad_norm": 2.050321102142334, |
|
"learning_rate": 9.607989184548543e-05, |
|
"loss": 2.0412, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3436754176610978, |
|
"grad_norm": 1.753670334815979, |
|
"learning_rate": 9.600293786690872e-05, |
|
"loss": 2.037, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3460620525059666, |
|
"grad_norm": 1.8368828296661377, |
|
"learning_rate": 9.592526731834537e-05, |
|
"loss": 2.1845, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.34844868735083534, |
|
"grad_norm": 1.8000643253326416, |
|
"learning_rate": 9.584688140963944e-05, |
|
"loss": 1.8592, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.35083532219570407, |
|
"grad_norm": 1.7274373769760132, |
|
"learning_rate": 9.576778136177798e-05, |
|
"loss": 2.0366, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3532219570405728, |
|
"grad_norm": 1.8421188592910767, |
|
"learning_rate": 9.568796840687184e-05, |
|
"loss": 1.9281, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3556085918854415, |
|
"grad_norm": 1.8605895042419434, |
|
"learning_rate": 9.560744378813659e-05, |
|
"loss": 2.0214, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.35799522673031026, |
|
"grad_norm": 2.4788737297058105, |
|
"learning_rate": 9.552620875987311e-05, |
|
"loss": 1.9457, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.360381861575179, |
|
"grad_norm": 3.01055645942688, |
|
"learning_rate": 9.544426458744804e-05, |
|
"loss": 1.879, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3627684964200477, |
|
"grad_norm": 2.6994807720184326, |
|
"learning_rate": 9.536161254727408e-05, |
|
"loss": 1.9495, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.36515513126491644, |
|
"grad_norm": 1.736722469329834, |
|
"learning_rate": 9.527825392679012e-05, |
|
"loss": 1.942, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.36754176610978523, |
|
"grad_norm": 2.2707650661468506, |
|
"learning_rate": 9.51941900244412e-05, |
|
"loss": 1.9525, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.36992840095465396, |
|
"grad_norm": 1.7479629516601562, |
|
"learning_rate": 9.51094221496582e-05, |
|
"loss": 1.9385, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3723150357995227, |
|
"grad_norm": 2.1110448837280273, |
|
"learning_rate": 9.502395162283759e-05, |
|
"loss": 1.8335, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.3747016706443914, |
|
"grad_norm": 1.8206532001495361, |
|
"learning_rate": 9.493777977532072e-05, |
|
"loss": 1.9642, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.37708830548926014, |
|
"grad_norm": 2.622044801712036, |
|
"learning_rate": 9.485090794937319e-05, |
|
"loss": 1.9383, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.3794749403341289, |
|
"grad_norm": 2.016352653503418, |
|
"learning_rate": 9.476333749816382e-05, |
|
"loss": 1.8639, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.3818615751789976, |
|
"grad_norm": 2.027357816696167, |
|
"learning_rate": 9.467506978574371e-05, |
|
"loss": 1.862, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.38424821002386633, |
|
"grad_norm": 1.91681969165802, |
|
"learning_rate": 9.45861061870249e-05, |
|
"loss": 2.0105, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.38663484486873506, |
|
"grad_norm": 1.8423808813095093, |
|
"learning_rate": 9.449644808775902e-05, |
|
"loss": 1.9303, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.38902147971360385, |
|
"grad_norm": 1.5539026260375977, |
|
"learning_rate": 9.44060968845156e-05, |
|
"loss": 1.9071, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3914081145584726, |
|
"grad_norm": 1.8300362825393677, |
|
"learning_rate": 9.431505398466045e-05, |
|
"loss": 2.0194, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3937947494033413, |
|
"grad_norm": 1.6660507917404175, |
|
"learning_rate": 9.42233208063336e-05, |
|
"loss": 1.8226, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.39618138424821003, |
|
"grad_norm": 1.9865005016326904, |
|
"learning_rate": 9.413089877842736e-05, |
|
"loss": 2.0689, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.39856801909307876, |
|
"grad_norm": 1.9386086463928223, |
|
"learning_rate": 9.403778934056391e-05, |
|
"loss": 2.0289, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.4009546539379475, |
|
"grad_norm": 1.9398423433303833, |
|
"learning_rate": 9.394399394307303e-05, |
|
"loss": 2.2213, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4033412887828162, |
|
"grad_norm": 1.864970326423645, |
|
"learning_rate": 9.384951404696933e-05, |
|
"loss": 1.8574, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.40572792362768495, |
|
"grad_norm": 1.9465175867080688, |
|
"learning_rate": 9.375435112392969e-05, |
|
"loss": 2.0628, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4081145584725537, |
|
"grad_norm": 1.808294415473938, |
|
"learning_rate": 9.365850665627016e-05, |
|
"loss": 1.9223, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4105011933174224, |
|
"grad_norm": 2.4403979778289795, |
|
"learning_rate": 9.356198213692297e-05, |
|
"loss": 1.8865, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4128878281622912, |
|
"grad_norm": 1.8101935386657715, |
|
"learning_rate": 9.346477906941331e-05, |
|
"loss": 1.8335, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4152744630071599, |
|
"grad_norm": 1.718172311782837, |
|
"learning_rate": 9.336689896783573e-05, |
|
"loss": 1.8691, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.41766109785202865, |
|
"grad_norm": 1.8754642009735107, |
|
"learning_rate": 9.32683433568308e-05, |
|
"loss": 1.9212, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4200477326968974, |
|
"grad_norm": 2.471646785736084, |
|
"learning_rate": 9.316911377156117e-05, |
|
"loss": 2.0121, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4224343675417661, |
|
"grad_norm": 2.1250839233398438, |
|
"learning_rate": 9.306921175768775e-05, |
|
"loss": 1.9309, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.42482100238663484, |
|
"grad_norm": 2.0609402656555176, |
|
"learning_rate": 9.29686388713456e-05, |
|
"loss": 2.1438, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.42720763723150357, |
|
"grad_norm": 1.7126508951187134, |
|
"learning_rate": 9.286739667911972e-05, |
|
"loss": 1.9621, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.4295942720763723, |
|
"grad_norm": 1.7493348121643066, |
|
"learning_rate": 9.276548675802059e-05, |
|
"loss": 2.0063, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.431980906921241, |
|
"grad_norm": 1.8076331615447998, |
|
"learning_rate": 9.266291069545972e-05, |
|
"loss": 1.9256, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4343675417661098, |
|
"grad_norm": 1.6762983798980713, |
|
"learning_rate": 9.255967008922474e-05, |
|
"loss": 2.0052, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.43675417661097854, |
|
"grad_norm": 1.4335881471633911, |
|
"learning_rate": 9.245576654745471e-05, |
|
"loss": 2.0899, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.43914081145584727, |
|
"grad_norm": 1.6479798555374146, |
|
"learning_rate": 9.235120168861496e-05, |
|
"loss": 1.7962, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.441527446300716, |
|
"grad_norm": 2.1050121784210205, |
|
"learning_rate": 9.224597714147186e-05, |
|
"loss": 2.0109, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.4439140811455847, |
|
"grad_norm": 1.6112617254257202, |
|
"learning_rate": 9.214009454506753e-05, |
|
"loss": 1.8432, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.44630071599045346, |
|
"grad_norm": 1.8361741304397583, |
|
"learning_rate": 9.203355554869428e-05, |
|
"loss": 1.9433, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4486873508353222, |
|
"grad_norm": 3.137519121170044, |
|
"learning_rate": 9.192636181186888e-05, |
|
"loss": 1.7776, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4510739856801909, |
|
"grad_norm": 1.79214346408844, |
|
"learning_rate": 9.181851500430673e-05, |
|
"loss": 1.8203, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.45346062052505964, |
|
"grad_norm": 2.010784149169922, |
|
"learning_rate": 9.171001680589588e-05, |
|
"loss": 1.8505, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.45584725536992843, |
|
"grad_norm": 2.2128775119781494, |
|
"learning_rate": 9.160086890667086e-05, |
|
"loss": 1.9007, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.45823389021479716, |
|
"grad_norm": 1.6658575534820557, |
|
"learning_rate": 9.14910730067863e-05, |
|
"loss": 1.9516, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4606205250596659, |
|
"grad_norm": 1.6554961204528809, |
|
"learning_rate": 9.138063081649051e-05, |
|
"loss": 2.0685, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4630071599045346, |
|
"grad_norm": 1.6959861516952515, |
|
"learning_rate": 9.126954405609882e-05, |
|
"loss": 1.9156, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.46539379474940334, |
|
"grad_norm": 1.795530915260315, |
|
"learning_rate": 9.115781445596676e-05, |
|
"loss": 1.7886, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4677804295942721, |
|
"grad_norm": 1.9987218379974365, |
|
"learning_rate": 9.104544375646313e-05, |
|
"loss": 2.1443, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.4701670644391408, |
|
"grad_norm": 1.8751397132873535, |
|
"learning_rate": 9.093243370794291e-05, |
|
"loss": 1.8975, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.47255369928400953, |
|
"grad_norm": 1.921032428741455, |
|
"learning_rate": 9.081878607071996e-05, |
|
"loss": 1.9969, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.47494033412887826, |
|
"grad_norm": 4.745877742767334, |
|
"learning_rate": 9.07045026150396e-05, |
|
"loss": 1.9351, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.477326968973747, |
|
"grad_norm": 1.8512818813323975, |
|
"learning_rate": 9.058958512105104e-05, |
|
"loss": 1.8773, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4797136038186158, |
|
"grad_norm": 2.7076666355133057, |
|
"learning_rate": 9.047403537877971e-05, |
|
"loss": 1.8907, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4821002386634845, |
|
"grad_norm": 1.9602298736572266, |
|
"learning_rate": 9.035785518809927e-05, |
|
"loss": 1.955, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.48448687350835323, |
|
"grad_norm": 1.8718470335006714, |
|
"learning_rate": 9.024104635870368e-05, |
|
"loss": 1.9148, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.48687350835322196, |
|
"grad_norm": 1.708706259727478, |
|
"learning_rate": 9.012361071007891e-05, |
|
"loss": 1.9202, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4892601431980907, |
|
"grad_norm": 2.0447752475738525, |
|
"learning_rate": 9.000555007147469e-05, |
|
"loss": 1.9347, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4916467780429594, |
|
"grad_norm": 2.0071353912353516, |
|
"learning_rate": 8.988686628187597e-05, |
|
"loss": 1.9392, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.49403341288782815, |
|
"grad_norm": 1.9587384462356567, |
|
"learning_rate": 8.976756118997427e-05, |
|
"loss": 1.9572, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4964200477326969, |
|
"grad_norm": 2.829688549041748, |
|
"learning_rate": 8.964763665413893e-05, |
|
"loss": 1.7861, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.4988066825775656, |
|
"grad_norm": 1.878251075744629, |
|
"learning_rate": 8.952709454238808e-05, |
|
"loss": 2.0488, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5011933174224343, |
|
"grad_norm": 1.6959253549575806, |
|
"learning_rate": 8.940593673235962e-05, |
|
"loss": 2.0164, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5035799522673031, |
|
"grad_norm": 1.8719433546066284, |
|
"learning_rate": 8.928416511128195e-05, |
|
"loss": 1.8288, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5059665871121718, |
|
"grad_norm": 1.9740793704986572, |
|
"learning_rate": 8.916178157594453e-05, |
|
"loss": 1.8924, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5083532219570406, |
|
"grad_norm": 2.177084445953369, |
|
"learning_rate": 8.903878803266841e-05, |
|
"loss": 2.0888, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5107398568019093, |
|
"grad_norm": 1.6153382062911987, |
|
"learning_rate": 8.891518639727649e-05, |
|
"loss": 1.7814, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.513126491646778, |
|
"grad_norm": 2.0001626014709473, |
|
"learning_rate": 8.879097859506372e-05, |
|
"loss": 1.9432, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5155131264916468, |
|
"grad_norm": 2.0495731830596924, |
|
"learning_rate": 8.866616656076696e-05, |
|
"loss": 1.8339, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5178997613365155, |
|
"grad_norm": 1.7960389852523804, |
|
"learning_rate": 8.854075223853508e-05, |
|
"loss": 1.9401, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5202863961813843, |
|
"grad_norm": 1.9565211534500122, |
|
"learning_rate": 8.841473758189854e-05, |
|
"loss": 1.8112, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.522673031026253, |
|
"grad_norm": 2.022819995880127, |
|
"learning_rate": 8.828812455373891e-05, |
|
"loss": 2.0094, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5250596658711217, |
|
"grad_norm": 1.8336989879608154, |
|
"learning_rate": 8.816091512625843e-05, |
|
"loss": 1.9463, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5274463007159904, |
|
"grad_norm": 2.031402111053467, |
|
"learning_rate": 8.803311128094918e-05, |
|
"loss": 1.9657, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5298329355608592, |
|
"grad_norm": 2.0874717235565186, |
|
"learning_rate": 8.790471500856228e-05, |
|
"loss": 2.0375, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5322195704057279, |
|
"grad_norm": 1.7618159055709839, |
|
"learning_rate": 8.777572830907684e-05, |
|
"loss": 2.1104, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5346062052505967, |
|
"grad_norm": 1.6236835718154907, |
|
"learning_rate": 8.764615319166886e-05, |
|
"loss": 2.0333, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5369928400954654, |
|
"grad_norm": 1.613146424293518, |
|
"learning_rate": 8.751599167467985e-05, |
|
"loss": 1.8055, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5393794749403341, |
|
"grad_norm": 1.5570297241210938, |
|
"learning_rate": 8.738524578558547e-05, |
|
"loss": 1.8801, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5417661097852029, |
|
"grad_norm": 1.7564152479171753, |
|
"learning_rate": 8.72539175609639e-05, |
|
"loss": 1.8072, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5441527446300716, |
|
"grad_norm": 1.7274627685546875, |
|
"learning_rate": 8.712200904646416e-05, |
|
"loss": 1.787, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5465393794749404, |
|
"grad_norm": 2.1072866916656494, |
|
"learning_rate": 8.698952229677422e-05, |
|
"loss": 1.8179, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.548926014319809, |
|
"grad_norm": 2.1879334449768066, |
|
"learning_rate": 8.685645937558896e-05, |
|
"loss": 2.0755, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5513126491646778, |
|
"grad_norm": 1.7754478454589844, |
|
"learning_rate": 8.67228223555781e-05, |
|
"loss": 1.8438, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5536992840095465, |
|
"grad_norm": 2.0312633514404297, |
|
"learning_rate": 8.658861331835385e-05, |
|
"loss": 1.9058, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5560859188544153, |
|
"grad_norm": 1.9634501934051514, |
|
"learning_rate": 8.645383435443852e-05, |
|
"loss": 1.8278, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5584725536992841, |
|
"grad_norm": 1.915229082107544, |
|
"learning_rate": 8.631848756323197e-05, |
|
"loss": 1.9127, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5608591885441527, |
|
"grad_norm": 1.7499381303787231, |
|
"learning_rate": 8.618257505297886e-05, |
|
"loss": 1.9196, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5632458233890215, |
|
"grad_norm": 2.883967161178589, |
|
"learning_rate": 8.604609894073584e-05, |
|
"loss": 1.7157, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5656324582338902, |
|
"grad_norm": 1.8204519748687744, |
|
"learning_rate": 8.590906135233854e-05, |
|
"loss": 1.845, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.568019093078759, |
|
"grad_norm": 3.1434381008148193, |
|
"learning_rate": 8.577146442236857e-05, |
|
"loss": 1.9142, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5704057279236276, |
|
"grad_norm": 1.7107082605361938, |
|
"learning_rate": 8.563331029412012e-05, |
|
"loss": 1.9358, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5727923627684964, |
|
"grad_norm": 2.010882616043091, |
|
"learning_rate": 8.549460111956664e-05, |
|
"loss": 1.8745, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5751789976133651, |
|
"grad_norm": 2.873108148574829, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.8725, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5775656324582339, |
|
"grad_norm": 2.0034258365631104, |
|
"learning_rate": 8.521552628263362e-05, |
|
"loss": 1.8666, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5799522673031027, |
|
"grad_norm": 1.7691940069198608, |
|
"learning_rate": 8.507516496729495e-05, |
|
"loss": 1.8096, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5823389021479713, |
|
"grad_norm": 1.77200186252594, |
|
"learning_rate": 8.493425729966534e-05, |
|
"loss": 1.9294, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5847255369928401, |
|
"grad_norm": 1.6291214227676392, |
|
"learning_rate": 8.479280547460907e-05, |
|
"loss": 1.8242, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5871121718377088, |
|
"grad_norm": 1.5914812088012695, |
|
"learning_rate": 8.465081169546659e-05, |
|
"loss": 1.9836, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5894988066825776, |
|
"grad_norm": 1.6103566884994507, |
|
"learning_rate": 8.450827817402011e-05, |
|
"loss": 1.9699, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5918854415274463, |
|
"grad_norm": 2.9773311614990234, |
|
"learning_rate": 8.436520713045922e-05, |
|
"loss": 1.7708, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.594272076372315, |
|
"grad_norm": 1.5203982591629028, |
|
"learning_rate": 8.422160079334628e-05, |
|
"loss": 1.8533, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5966587112171837, |
|
"grad_norm": 3.8291232585906982, |
|
"learning_rate": 8.40774613995817e-05, |
|
"loss": 1.8964, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5990453460620525, |
|
"grad_norm": 1.650585412979126, |
|
"learning_rate": 8.393279119436912e-05, |
|
"loss": 1.9163, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6014319809069213, |
|
"grad_norm": 1.588335633277893, |
|
"learning_rate": 8.378759243118044e-05, |
|
"loss": 2.0618, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.60381861575179, |
|
"grad_norm": 1.9786241054534912, |
|
"learning_rate": 8.364186737172068e-05, |
|
"loss": 1.8235, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.6062052505966588, |
|
"grad_norm": 1.7208276987075806, |
|
"learning_rate": 8.349561828589277e-05, |
|
"loss": 2.0045, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6085918854415274, |
|
"grad_norm": 1.713976502418518, |
|
"learning_rate": 8.33488474517622e-05, |
|
"loss": 1.8602, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6109785202863962, |
|
"grad_norm": 1.655760407447815, |
|
"learning_rate": 8.320155715552155e-05, |
|
"loss": 1.8096, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6133651551312649, |
|
"grad_norm": 1.82340669631958, |
|
"learning_rate": 8.305374969145488e-05, |
|
"loss": 1.9755, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6157517899761337, |
|
"grad_norm": 1.6505107879638672, |
|
"learning_rate": 8.290542736190188e-05, |
|
"loss": 1.7543, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6181384248210023, |
|
"grad_norm": 1.6107587814331055, |
|
"learning_rate": 8.275659247722222e-05, |
|
"loss": 1.7788, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6205250596658711, |
|
"grad_norm": 1.7392557859420776, |
|
"learning_rate": 8.260724735575933e-05, |
|
"loss": 1.8713, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6229116945107399, |
|
"grad_norm": 1.8423359394073486, |
|
"learning_rate": 8.24573943238045e-05, |
|
"loss": 1.9501, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6252983293556086, |
|
"grad_norm": 1.596801996231079, |
|
"learning_rate": 8.230703571556048e-05, |
|
"loss": 1.7561, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6276849642004774, |
|
"grad_norm": 1.6264513731002808, |
|
"learning_rate": 8.215617387310524e-05, |
|
"loss": 1.812, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.630071599045346, |
|
"grad_norm": 1.8065801858901978, |
|
"learning_rate": 8.200481114635536e-05, |
|
"loss": 1.8587, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6324582338902148, |
|
"grad_norm": 1.6027936935424805, |
|
"learning_rate": 8.185294989302958e-05, |
|
"loss": 1.7951, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6348448687350835, |
|
"grad_norm": 1.863053560256958, |
|
"learning_rate": 8.170059247861194e-05, |
|
"loss": 1.791, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6372315035799523, |
|
"grad_norm": 1.7930762767791748, |
|
"learning_rate": 8.154774127631501e-05, |
|
"loss": 1.7575, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.639618138424821, |
|
"grad_norm": 2.0538759231567383, |
|
"learning_rate": 8.139439866704293e-05, |
|
"loss": 1.8417, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6420047732696897, |
|
"grad_norm": 2.5710806846618652, |
|
"learning_rate": 8.124056703935423e-05, |
|
"loss": 1.8187, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6443914081145584, |
|
"grad_norm": 1.6980230808258057, |
|
"learning_rate": 8.108624878942477e-05, |
|
"loss": 1.8364, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6467780429594272, |
|
"grad_norm": 1.7313123941421509, |
|
"learning_rate": 8.093144632101026e-05, |
|
"loss": 1.7538, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.649164677804296, |
|
"grad_norm": 1.6911081075668335, |
|
"learning_rate": 8.077616204540897e-05, |
|
"loss": 1.8258, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6515513126491647, |
|
"grad_norm": 1.9907560348510742, |
|
"learning_rate": 8.062039838142402e-05, |
|
"loss": 1.7978, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6539379474940334, |
|
"grad_norm": 1.9501363039016724, |
|
"learning_rate": 8.046415775532585e-05, |
|
"loss": 1.8116, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6563245823389021, |
|
"grad_norm": 1.7937824726104736, |
|
"learning_rate": 8.030744260081426e-05, |
|
"loss": 1.8347, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6587112171837709, |
|
"grad_norm": 1.682985782623291, |
|
"learning_rate": 8.015025535898073e-05, |
|
"loss": 1.8879, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6610978520286396, |
|
"grad_norm": 1.7274394035339355, |
|
"learning_rate": 7.999259847827015e-05, |
|
"loss": 1.8931, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6634844868735084, |
|
"grad_norm": 1.7429416179656982, |
|
"learning_rate": 7.983447441444281e-05, |
|
"loss": 1.7171, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.665871121718377, |
|
"grad_norm": 1.949879765510559, |
|
"learning_rate": 7.967588563053616e-05, |
|
"loss": 1.7779, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6682577565632458, |
|
"grad_norm": 1.5538753271102905, |
|
"learning_rate": 7.951683459682641e-05, |
|
"loss": 1.8087, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6706443914081146, |
|
"grad_norm": 1.7967875003814697, |
|
"learning_rate": 7.935732379079008e-05, |
|
"loss": 2.0304, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6730310262529833, |
|
"grad_norm": 1.7717353105545044, |
|
"learning_rate": 7.919735569706533e-05, |
|
"loss": 1.904, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6754176610978521, |
|
"grad_norm": 1.6083266735076904, |
|
"learning_rate": 7.903693280741331e-05, |
|
"loss": 1.8501, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6778042959427207, |
|
"grad_norm": 1.6468119621276855, |
|
"learning_rate": 7.887605762067945e-05, |
|
"loss": 1.7535, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6801909307875895, |
|
"grad_norm": 1.6218470335006714, |
|
"learning_rate": 7.871473264275429e-05, |
|
"loss": 1.7495, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6825775656324582, |
|
"grad_norm": 1.6359236240386963, |
|
"learning_rate": 7.855296038653475e-05, |
|
"loss": 2.0507, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.684964200477327, |
|
"grad_norm": 1.4922749996185303, |
|
"learning_rate": 7.83907433718847e-05, |
|
"loss": 1.7931, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6873508353221957, |
|
"grad_norm": 1.5041239261627197, |
|
"learning_rate": 7.82280841255959e-05, |
|
"loss": 1.7704, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6897374701670644, |
|
"grad_norm": 2.032655954360962, |
|
"learning_rate": 7.80649851813486e-05, |
|
"loss": 1.9174, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6921241050119332, |
|
"grad_norm": 1.7632269859313965, |
|
"learning_rate": 7.790144907967201e-05, |
|
"loss": 1.7885, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6945107398568019, |
|
"grad_norm": 1.7323729991912842, |
|
"learning_rate": 7.773747836790481e-05, |
|
"loss": 1.9919, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6968973747016707, |
|
"grad_norm": 2.3218891620635986, |
|
"learning_rate": 7.757307560015538e-05, |
|
"loss": 1.7896, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6992840095465394, |
|
"grad_norm": 1.620492935180664, |
|
"learning_rate": 7.740824333726213e-05, |
|
"loss": 1.748, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7016706443914081, |
|
"grad_norm": 1.6885743141174316, |
|
"learning_rate": 7.724298414675353e-05, |
|
"loss": 1.7732, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.7040572792362768, |
|
"grad_norm": 1.8093699216842651, |
|
"learning_rate": 7.707730060280812e-05, |
|
"loss": 1.9147, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7064439140811456, |
|
"grad_norm": 1.7837680578231812, |
|
"learning_rate": 7.691119528621444e-05, |
|
"loss": 1.8391, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7088305489260143, |
|
"grad_norm": 1.75551176071167, |
|
"learning_rate": 7.674467078433081e-05, |
|
"loss": 1.9519, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.711217183770883, |
|
"grad_norm": 1.791812777519226, |
|
"learning_rate": 7.657772969104508e-05, |
|
"loss": 1.6442, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7136038186157518, |
|
"grad_norm": 1.6638917922973633, |
|
"learning_rate": 7.641037460673412e-05, |
|
"loss": 1.6225, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7159904534606205, |
|
"grad_norm": 1.479506015777588, |
|
"learning_rate": 7.624260813822342e-05, |
|
"loss": 1.7162, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7183770883054893, |
|
"grad_norm": 1.9166977405548096, |
|
"learning_rate": 7.607443289874642e-05, |
|
"loss": 1.8657, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.720763723150358, |
|
"grad_norm": 1.4827370643615723, |
|
"learning_rate": 7.590585150790389e-05, |
|
"loss": 1.9136, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.7231503579952268, |
|
"grad_norm": 1.5117080211639404, |
|
"learning_rate": 7.573686659162293e-05, |
|
"loss": 1.8548, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.7255369928400954, |
|
"grad_norm": 1.8482357263565063, |
|
"learning_rate": 7.556748078211635e-05, |
|
"loss": 1.9555, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.7279236276849642, |
|
"grad_norm": 1.6880775690078735, |
|
"learning_rate": 7.53976967178414e-05, |
|
"loss": 1.8775, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7303102625298329, |
|
"grad_norm": 1.5047417879104614, |
|
"learning_rate": 7.522751704345887e-05, |
|
"loss": 1.8849, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7326968973747017, |
|
"grad_norm": 1.5307697057724, |
|
"learning_rate": 7.505694440979178e-05, |
|
"loss": 1.8404, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7350835322195705, |
|
"grad_norm": 1.508344054222107, |
|
"learning_rate": 7.488598147378416e-05, |
|
"loss": 1.666, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.7374701670644391, |
|
"grad_norm": 1.7632466554641724, |
|
"learning_rate": 7.471463089845956e-05, |
|
"loss": 1.7466, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.7398568019093079, |
|
"grad_norm": 1.6543248891830444, |
|
"learning_rate": 7.454289535287968e-05, |
|
"loss": 1.7259, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7422434367541766, |
|
"grad_norm": 2.329713821411133, |
|
"learning_rate": 7.437077751210279e-05, |
|
"loss": 1.9443, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7446300715990454, |
|
"grad_norm": 1.585302472114563, |
|
"learning_rate": 7.419828005714194e-05, |
|
"loss": 1.8221, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.747016706443914, |
|
"grad_norm": 1.523518681526184, |
|
"learning_rate": 7.402540567492337e-05, |
|
"loss": 1.7825, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.7494033412887828, |
|
"grad_norm": 1.5896198749542236, |
|
"learning_rate": 7.385215705824449e-05, |
|
"loss": 1.9895, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.7517899761336515, |
|
"grad_norm": 1.7857471704483032, |
|
"learning_rate": 7.367853690573208e-05, |
|
"loss": 1.6531, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7541766109785203, |
|
"grad_norm": 1.8661036491394043, |
|
"learning_rate": 7.350454792180016e-05, |
|
"loss": 1.7411, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7565632458233891, |
|
"grad_norm": 1.7312443256378174, |
|
"learning_rate": 7.333019281660789e-05, |
|
"loss": 1.9503, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7589498806682577, |
|
"grad_norm": 1.8219163417816162, |
|
"learning_rate": 7.31554743060174e-05, |
|
"loss": 1.7237, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.7613365155131265, |
|
"grad_norm": 1.7200877666473389, |
|
"learning_rate": 7.298039511155138e-05, |
|
"loss": 1.8042, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7637231503579952, |
|
"grad_norm": 1.5407986640930176, |
|
"learning_rate": 7.280495796035079e-05, |
|
"loss": 1.8225, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.766109785202864, |
|
"grad_norm": 1.5745642185211182, |
|
"learning_rate": 7.262916558513237e-05, |
|
"loss": 1.6478, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7684964200477327, |
|
"grad_norm": 1.8857331275939941, |
|
"learning_rate": 7.245302072414601e-05, |
|
"loss": 1.8026, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7708830548926014, |
|
"grad_norm": 1.4270589351654053, |
|
"learning_rate": 7.227652612113213e-05, |
|
"loss": 1.6531, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7732696897374701, |
|
"grad_norm": 1.530493140220642, |
|
"learning_rate": 7.209968452527896e-05, |
|
"loss": 1.7553, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7756563245823389, |
|
"grad_norm": 1.6771613359451294, |
|
"learning_rate": 7.192249869117971e-05, |
|
"loss": 1.8374, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7780429594272077, |
|
"grad_norm": 1.7160065174102783, |
|
"learning_rate": 7.174497137878966e-05, |
|
"loss": 1.7429, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7804295942720764, |
|
"grad_norm": 1.518904685974121, |
|
"learning_rate": 7.156710535338312e-05, |
|
"loss": 1.8843, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7828162291169452, |
|
"grad_norm": 1.7113001346588135, |
|
"learning_rate": 7.138890338551048e-05, |
|
"loss": 1.8249, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7852028639618138, |
|
"grad_norm": 1.6391565799713135, |
|
"learning_rate": 7.121036825095492e-05, |
|
"loss": 1.6807, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7875894988066826, |
|
"grad_norm": 1.6043463945388794, |
|
"learning_rate": 7.103150273068921e-05, |
|
"loss": 1.7299, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7899761336515513, |
|
"grad_norm": 1.5004619359970093, |
|
"learning_rate": 7.085230961083249e-05, |
|
"loss": 1.8501, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7923627684964201, |
|
"grad_norm": 1.893096685409546, |
|
"learning_rate": 7.067279168260671e-05, |
|
"loss": 1.8326, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7947494033412887, |
|
"grad_norm": 1.5315916538238525, |
|
"learning_rate": 7.04929517422933e-05, |
|
"loss": 1.7833, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7971360381861575, |
|
"grad_norm": 1.5632952451705933, |
|
"learning_rate": 7.031279259118946e-05, |
|
"loss": 1.6346, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7995226730310262, |
|
"grad_norm": 1.9762367010116577, |
|
"learning_rate": 7.013231703556471e-05, |
|
"loss": 1.8849, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.801909307875895, |
|
"grad_norm": 1.5724525451660156, |
|
"learning_rate": 6.995152788661705e-05, |
|
"loss": 1.7792, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8042959427207638, |
|
"grad_norm": 1.4417201280593872, |
|
"learning_rate": 6.977042796042917e-05, |
|
"loss": 1.7516, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.8066825775656324, |
|
"grad_norm": 1.6131057739257812, |
|
"learning_rate": 6.958902007792466e-05, |
|
"loss": 1.7614, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.8090692124105012, |
|
"grad_norm": 2.5175702571868896, |
|
"learning_rate": 6.940730706482399e-05, |
|
"loss": 1.8208, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.8114558472553699, |
|
"grad_norm": 1.5602608919143677, |
|
"learning_rate": 6.922529175160054e-05, |
|
"loss": 1.7046, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8138424821002387, |
|
"grad_norm": 1.5071675777435303, |
|
"learning_rate": 6.904297697343655e-05, |
|
"loss": 1.9148, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.8162291169451074, |
|
"grad_norm": 1.4578834772109985, |
|
"learning_rate": 6.886036557017881e-05, |
|
"loss": 1.8546, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8186157517899761, |
|
"grad_norm": 1.7864019870758057, |
|
"learning_rate": 6.867746038629462e-05, |
|
"loss": 1.9423, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.8210023866348448, |
|
"grad_norm": 1.4294066429138184, |
|
"learning_rate": 6.849426427082735e-05, |
|
"loss": 1.7797, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.8233890214797136, |
|
"grad_norm": 2.900899648666382, |
|
"learning_rate": 6.83107800773521e-05, |
|
"loss": 1.8395, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8257756563245824, |
|
"grad_norm": 1.5912294387817383, |
|
"learning_rate": 6.812701066393124e-05, |
|
"loss": 1.7345, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.8281622911694511, |
|
"grad_norm": 1.6191726922988892, |
|
"learning_rate": 6.79429588930699e-05, |
|
"loss": 1.7563, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.8305489260143198, |
|
"grad_norm": 1.926048755645752, |
|
"learning_rate": 6.775862763167142e-05, |
|
"loss": 1.7473, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.8329355608591885, |
|
"grad_norm": 1.5704717636108398, |
|
"learning_rate": 6.757401975099262e-05, |
|
"loss": 1.6788, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.8353221957040573, |
|
"grad_norm": 2.986739158630371, |
|
"learning_rate": 6.738913812659912e-05, |
|
"loss": 1.9091, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.837708830548926, |
|
"grad_norm": 2.8111684322357178, |
|
"learning_rate": 6.720398563832055e-05, |
|
"loss": 1.7738, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.8400954653937948, |
|
"grad_norm": 1.5233489274978638, |
|
"learning_rate": 6.701856517020565e-05, |
|
"loss": 1.869, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.8424821002386634, |
|
"grad_norm": 1.679288387298584, |
|
"learning_rate": 6.683287961047742e-05, |
|
"loss": 1.977, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.8448687350835322, |
|
"grad_norm": 1.5280253887176514, |
|
"learning_rate": 6.664693185148807e-05, |
|
"loss": 1.7278, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.847255369928401, |
|
"grad_norm": 1.5906157493591309, |
|
"learning_rate": 6.646072478967397e-05, |
|
"loss": 1.8965, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8496420047732697, |
|
"grad_norm": 1.6531226634979248, |
|
"learning_rate": 6.627426132551058e-05, |
|
"loss": 1.77, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8520286396181385, |
|
"grad_norm": 1.5413316488265991, |
|
"learning_rate": 6.608754436346725e-05, |
|
"loss": 1.7051, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.8544152744630071, |
|
"grad_norm": 1.7978452444076538, |
|
"learning_rate": 6.590057681196191e-05, |
|
"loss": 1.6797, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.8568019093078759, |
|
"grad_norm": 1.6512346267700195, |
|
"learning_rate": 6.571336158331589e-05, |
|
"loss": 1.9775, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.8591885441527446, |
|
"grad_norm": 1.7628110647201538, |
|
"learning_rate": 6.552590159370844e-05, |
|
"loss": 1.6468, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8615751789976134, |
|
"grad_norm": 1.659655213356018, |
|
"learning_rate": 6.53381997631314e-05, |
|
"loss": 1.8676, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.863961813842482, |
|
"grad_norm": 1.5994445085525513, |
|
"learning_rate": 6.515025901534364e-05, |
|
"loss": 1.7799, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8663484486873508, |
|
"grad_norm": 4.107300758361816, |
|
"learning_rate": 6.496208227782556e-05, |
|
"loss": 1.7622, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8687350835322196, |
|
"grad_norm": 1.6633572578430176, |
|
"learning_rate": 6.477367248173352e-05, |
|
"loss": 1.7943, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8711217183770883, |
|
"grad_norm": 1.5715115070343018, |
|
"learning_rate": 6.458503256185404e-05, |
|
"loss": 1.791, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8735083532219571, |
|
"grad_norm": 1.4537943601608276, |
|
"learning_rate": 6.439616545655834e-05, |
|
"loss": 1.6835, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8758949880668258, |
|
"grad_norm": 1.4551641941070557, |
|
"learning_rate": 6.420707410775626e-05, |
|
"loss": 1.8273, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8782816229116945, |
|
"grad_norm": 1.7414036989212036, |
|
"learning_rate": 6.401776146085072e-05, |
|
"loss": 2.0934, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8806682577565632, |
|
"grad_norm": 1.602457046508789, |
|
"learning_rate": 6.382823046469167e-05, |
|
"loss": 1.8388, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.883054892601432, |
|
"grad_norm": 1.549912929534912, |
|
"learning_rate": 6.363848407153016e-05, |
|
"loss": 1.6429, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8854415274463007, |
|
"grad_norm": 1.4284569025039673, |
|
"learning_rate": 6.344852523697247e-05, |
|
"loss": 1.8545, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8878281622911695, |
|
"grad_norm": 1.83491849899292, |
|
"learning_rate": 6.325835691993394e-05, |
|
"loss": 1.574, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8902147971360382, |
|
"grad_norm": 1.6155121326446533, |
|
"learning_rate": 6.306798208259297e-05, |
|
"loss": 1.7415, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8926014319809069, |
|
"grad_norm": 1.5498522520065308, |
|
"learning_rate": 6.287740369034485e-05, |
|
"loss": 1.5622, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8949880668257757, |
|
"grad_norm": 1.6388617753982544, |
|
"learning_rate": 6.26866247117555e-05, |
|
"loss": 1.6144, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8973747016706444, |
|
"grad_norm": 1.4823899269104004, |
|
"learning_rate": 6.249564811851543e-05, |
|
"loss": 1.8225, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.8997613365155132, |
|
"grad_norm": 1.4698792695999146, |
|
"learning_rate": 6.230447688539316e-05, |
|
"loss": 1.6339, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.9021479713603818, |
|
"grad_norm": 1.6909016370773315, |
|
"learning_rate": 6.211311399018916e-05, |
|
"loss": 1.7918, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.9045346062052506, |
|
"grad_norm": 1.4534494876861572, |
|
"learning_rate": 6.192156241368929e-05, |
|
"loss": 1.7715, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9069212410501193, |
|
"grad_norm": 1.719106912612915, |
|
"learning_rate": 6.172982513961845e-05, |
|
"loss": 1.7261, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9093078758949881, |
|
"grad_norm": 1.6923831701278687, |
|
"learning_rate": 6.153790515459404e-05, |
|
"loss": 1.6554, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9116945107398569, |
|
"grad_norm": 1.6443957090377808, |
|
"learning_rate": 6.13458054480795e-05, |
|
"loss": 1.8556, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.9140811455847255, |
|
"grad_norm": 1.5689623355865479, |
|
"learning_rate": 6.115352901233779e-05, |
|
"loss": 1.8041, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.9164677804295943, |
|
"grad_norm": 1.46951425075531, |
|
"learning_rate": 6.096107884238458e-05, |
|
"loss": 1.6515, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.918854415274463, |
|
"grad_norm": 1.5866256952285767, |
|
"learning_rate": 6.0768457935941817e-05, |
|
"loss": 1.8007, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.9212410501193318, |
|
"grad_norm": 1.9523183107376099, |
|
"learning_rate": 6.0575669293390954e-05, |
|
"loss": 1.8313, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.9236276849642004, |
|
"grad_norm": 1.6216835975646973, |
|
"learning_rate": 6.038271591772615e-05, |
|
"loss": 1.8399, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.9260143198090692, |
|
"grad_norm": 1.8278216123580933, |
|
"learning_rate": 6.0189600814507604e-05, |
|
"loss": 1.9067, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.9284009546539379, |
|
"grad_norm": 1.2732576131820679, |
|
"learning_rate": 5.9996326991814654e-05, |
|
"loss": 1.6042, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.9307875894988067, |
|
"grad_norm": 1.75897216796875, |
|
"learning_rate": 5.980289746019892e-05, |
|
"loss": 1.8796, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9331742243436754, |
|
"grad_norm": 1.5754450559616089, |
|
"learning_rate": 5.9609315232637483e-05, |
|
"loss": 1.5981, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.9355608591885441, |
|
"grad_norm": 1.7975653409957886, |
|
"learning_rate": 5.941558332448589e-05, |
|
"loss": 1.638, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.9379474940334129, |
|
"grad_norm": 1.6017708778381348, |
|
"learning_rate": 5.922170475343125e-05, |
|
"loss": 1.6904, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.9403341288782816, |
|
"grad_norm": 1.6121450662612915, |
|
"learning_rate": 5.9027682539445104e-05, |
|
"loss": 1.69, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.9427207637231504, |
|
"grad_norm": 1.5319181680679321, |
|
"learning_rate": 5.883351970473654e-05, |
|
"loss": 1.8468, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9451073985680191, |
|
"grad_norm": 1.7490177154541016, |
|
"learning_rate": 5.863921927370498e-05, |
|
"loss": 1.7477, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.9474940334128878, |
|
"grad_norm": 1.546705722808838, |
|
"learning_rate": 5.8444784272893175e-05, |
|
"loss": 1.6837, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9498806682577565, |
|
"grad_norm": 1.576828956604004, |
|
"learning_rate": 5.8250217730939973e-05, |
|
"loss": 1.6976, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.9522673031026253, |
|
"grad_norm": 1.443121075630188, |
|
"learning_rate": 5.8055522678533225e-05, |
|
"loss": 1.6661, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.954653937947494, |
|
"grad_norm": 1.619332194328308, |
|
"learning_rate": 5.786070214836254e-05, |
|
"loss": 1.6282, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9570405727923628, |
|
"grad_norm": 1.516404628753662, |
|
"learning_rate": 5.7665759175072034e-05, |
|
"loss": 1.8743, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9594272076372315, |
|
"grad_norm": 1.842561960220337, |
|
"learning_rate": 5.747069679521305e-05, |
|
"loss": 1.7742, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9618138424821002, |
|
"grad_norm": 1.5098801851272583, |
|
"learning_rate": 5.727551804719693e-05, |
|
"loss": 1.7087, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.964200477326969, |
|
"grad_norm": 1.611047387123108, |
|
"learning_rate": 5.708022597124758e-05, |
|
"loss": 1.6934, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9665871121718377, |
|
"grad_norm": 1.5745174884796143, |
|
"learning_rate": 5.688482360935423e-05, |
|
"loss": 1.8729, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9689737470167065, |
|
"grad_norm": 1.4478429555892944, |
|
"learning_rate": 5.668931400522396e-05, |
|
"loss": 1.801, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9713603818615751, |
|
"grad_norm": 2.9802427291870117, |
|
"learning_rate": 5.649370020423431e-05, |
|
"loss": 1.6937, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9737470167064439, |
|
"grad_norm": 1.6159980297088623, |
|
"learning_rate": 5.629798525338589e-05, |
|
"loss": 1.712, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.9761336515513126, |
|
"grad_norm": 1.6687465906143188, |
|
"learning_rate": 5.6102172201254835e-05, |
|
"loss": 1.7582, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9785202863961814, |
|
"grad_norm": 1.318992018699646, |
|
"learning_rate": 5.5906264097945407e-05, |
|
"loss": 1.7913, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9809069212410502, |
|
"grad_norm": 1.6671638488769531, |
|
"learning_rate": 5.5710263995042434e-05, |
|
"loss": 1.8547, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9832935560859188, |
|
"grad_norm": 1.460523247718811, |
|
"learning_rate": 5.551417494556376e-05, |
|
"loss": 1.7699, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9856801909307876, |
|
"grad_norm": 1.4564532041549683, |
|
"learning_rate": 5.531800000391275e-05, |
|
"loss": 1.7636, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.9880668257756563, |
|
"grad_norm": 1.7387609481811523, |
|
"learning_rate": 5.5121742225830665e-05, |
|
"loss": 1.8602, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9904534606205251, |
|
"grad_norm": 1.8153102397918701, |
|
"learning_rate": 5.4925404668349076e-05, |
|
"loss": 1.797, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9928400954653938, |
|
"grad_norm": 1.4723916053771973, |
|
"learning_rate": 5.472899038974225e-05, |
|
"loss": 1.7051, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9952267303102625, |
|
"grad_norm": 2.961282968521118, |
|
"learning_rate": 5.45325024494795e-05, |
|
"loss": 1.7661, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.9976133651551312, |
|
"grad_norm": 1.6182713508605957, |
|
"learning_rate": 5.433594390817756e-05, |
|
"loss": 1.875, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.34993839263916, |
|
"learning_rate": 5.413931782755283e-05, |
|
"loss": 1.8514, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 1.0023866348448687, |
|
"grad_norm": 1.439794659614563, |
|
"learning_rate": 5.3942627270373826e-05, |
|
"loss": 1.5045, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.0047732696897376, |
|
"grad_norm": 1.7519071102142334, |
|
"learning_rate": 5.374587530041335e-05, |
|
"loss": 1.5372, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 1.0071599045346062, |
|
"grad_norm": 1.4893503189086914, |
|
"learning_rate": 5.35490649824008e-05, |
|
"loss": 1.5061, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.009546539379475, |
|
"grad_norm": 1.4957739114761353, |
|
"learning_rate": 5.335219938197445e-05, |
|
"loss": 1.4709, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 1.0119331742243436, |
|
"grad_norm": 1.5801351070404053, |
|
"learning_rate": 5.315528156563367e-05, |
|
"loss": 1.522, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.0143198090692125, |
|
"grad_norm": 1.5916804075241089, |
|
"learning_rate": 5.295831460069124e-05, |
|
"loss": 1.429, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.0167064439140812, |
|
"grad_norm": 1.598103404045105, |
|
"learning_rate": 5.276130155522541e-05, |
|
"loss": 1.5911, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 1.0190930787589498, |
|
"grad_norm": 1.575002670288086, |
|
"learning_rate": 5.256424549803228e-05, |
|
"loss": 1.5334, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 1.0214797136038185, |
|
"grad_norm": 1.5843360424041748, |
|
"learning_rate": 5.236714949857791e-05, |
|
"loss": 1.4366, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.0238663484486874, |
|
"grad_norm": 1.5547505617141724, |
|
"learning_rate": 5.2170016626950505e-05, |
|
"loss": 1.5106, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 1.026252983293556, |
|
"grad_norm": 1.4847965240478516, |
|
"learning_rate": 5.1972849953812644e-05, |
|
"loss": 1.4397, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.0286396181384247, |
|
"grad_norm": 2.0349180698394775, |
|
"learning_rate": 5.1775652550353405e-05, |
|
"loss": 1.5765, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 1.0310262529832936, |
|
"grad_norm": 1.6574598550796509, |
|
"learning_rate": 5.157842748824053e-05, |
|
"loss": 1.433, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 1.0334128878281623, |
|
"grad_norm": 1.5641635656356812, |
|
"learning_rate": 5.138117783957261e-05, |
|
"loss": 1.5666, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 1.035799522673031, |
|
"grad_norm": 1.6239501237869263, |
|
"learning_rate": 5.1183906676831197e-05, |
|
"loss": 1.6223, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.0381861575178997, |
|
"grad_norm": 1.5002102851867676, |
|
"learning_rate": 5.098661707283298e-05, |
|
"loss": 1.4733, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.0405727923627686, |
|
"grad_norm": 1.5246349573135376, |
|
"learning_rate": 5.078931210068185e-05, |
|
"loss": 1.4459, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 1.0429594272076372, |
|
"grad_norm": 1.4616323709487915, |
|
"learning_rate": 5.059199483372114e-05, |
|
"loss": 1.4595, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 1.045346062052506, |
|
"grad_norm": 1.4437859058380127, |
|
"learning_rate": 5.039466834548568e-05, |
|
"loss": 1.594, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 1.0477326968973748, |
|
"grad_norm": 1.609554409980774, |
|
"learning_rate": 5.0197335709653883e-05, |
|
"loss": 1.3251, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 1.0501193317422435, |
|
"grad_norm": 1.502581238746643, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5505, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.0525059665871122, |
|
"grad_norm": 1.5816670656204224, |
|
"learning_rate": 4.980266429034613e-05, |
|
"loss": 1.4974, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 1.0548926014319808, |
|
"grad_norm": 1.714355230331421, |
|
"learning_rate": 4.960533165451435e-05, |
|
"loss": 1.387, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 1.0572792362768497, |
|
"grad_norm": 1.506137490272522, |
|
"learning_rate": 4.9408005166278855e-05, |
|
"loss": 1.4963, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 1.0596658711217184, |
|
"grad_norm": 1.5406689643859863, |
|
"learning_rate": 4.921068789931816e-05, |
|
"loss": 1.4865, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 1.062052505966587, |
|
"grad_norm": 1.6917808055877686, |
|
"learning_rate": 4.901338292716704e-05, |
|
"loss": 1.3597, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.064439140811456, |
|
"grad_norm": 1.4597982168197632, |
|
"learning_rate": 4.8816093323168815e-05, |
|
"loss": 1.4529, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 1.0668257756563246, |
|
"grad_norm": 1.3898138999938965, |
|
"learning_rate": 4.8618822160427406e-05, |
|
"loss": 1.4942, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 1.0692124105011933, |
|
"grad_norm": 1.3161375522613525, |
|
"learning_rate": 4.842157251175947e-05, |
|
"loss": 1.4459, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 1.071599045346062, |
|
"grad_norm": 1.6536246538162231, |
|
"learning_rate": 4.822434744964661e-05, |
|
"loss": 1.4766, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 1.0739856801909309, |
|
"grad_norm": 1.7592108249664307, |
|
"learning_rate": 4.802715004618737e-05, |
|
"loss": 1.5014, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0763723150357996, |
|
"grad_norm": 1.4345988035202026, |
|
"learning_rate": 4.7829983373049507e-05, |
|
"loss": 1.4605, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 1.0787589498806682, |
|
"grad_norm": 1.2879067659378052, |
|
"learning_rate": 4.763285050142211e-05, |
|
"loss": 1.3331, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 1.081145584725537, |
|
"grad_norm": 1.392535924911499, |
|
"learning_rate": 4.743575450196773e-05, |
|
"loss": 1.4701, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 1.0835322195704058, |
|
"grad_norm": 1.4662644863128662, |
|
"learning_rate": 4.7238698444774595e-05, |
|
"loss": 1.4321, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 1.0859188544152745, |
|
"grad_norm": 1.4287410974502563, |
|
"learning_rate": 4.704168539930878e-05, |
|
"loss": 1.3805, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.0883054892601431, |
|
"grad_norm": 1.4799420833587646, |
|
"learning_rate": 4.6844718434366334e-05, |
|
"loss": 1.6115, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 1.0906921241050118, |
|
"grad_norm": 1.6349695920944214, |
|
"learning_rate": 4.664780061802557e-05, |
|
"loss": 1.4725, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 1.0930787589498807, |
|
"grad_norm": 1.5499435663223267, |
|
"learning_rate": 4.64509350175992e-05, |
|
"loss": 1.4677, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 1.0954653937947494, |
|
"grad_norm": 1.5121248960494995, |
|
"learning_rate": 4.6254124699586656e-05, |
|
"loss": 1.5272, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 1.097852028639618, |
|
"grad_norm": 1.491830825805664, |
|
"learning_rate": 4.605737272962618e-05, |
|
"loss": 1.4693, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.100238663484487, |
|
"grad_norm": 1.4596081972122192, |
|
"learning_rate": 4.5860682172447184e-05, |
|
"loss": 1.491, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 1.1026252983293556, |
|
"grad_norm": 1.7243481874465942, |
|
"learning_rate": 4.566405609182247e-05, |
|
"loss": 1.5289, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 1.1050119331742243, |
|
"grad_norm": 1.5089308023452759, |
|
"learning_rate": 4.546749755052051e-05, |
|
"loss": 1.5428, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 1.107398568019093, |
|
"grad_norm": 1.5854674577713013, |
|
"learning_rate": 4.527100961025776e-05, |
|
"loss": 1.5129, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 1.1097852028639619, |
|
"grad_norm": 1.55560302734375, |
|
"learning_rate": 4.507459533165093e-05, |
|
"loss": 1.482, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.1121718377088305, |
|
"grad_norm": 1.4460105895996094, |
|
"learning_rate": 4.4878257774169346e-05, |
|
"loss": 1.4073, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 1.1145584725536992, |
|
"grad_norm": 1.5131725072860718, |
|
"learning_rate": 4.4681999996087274e-05, |
|
"loss": 1.4992, |
|
"step": 1401 |
|
}, |
|
{ |
|
"epoch": 1.1169451073985681, |
|
"grad_norm": 1.5037047863006592, |
|
"learning_rate": 4.448582505443625e-05, |
|
"loss": 1.5421, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 1.1193317422434368, |
|
"grad_norm": 1.4700491428375244, |
|
"learning_rate": 4.4289736004957585e-05, |
|
"loss": 1.4587, |
|
"step": 1407 |
|
}, |
|
{ |
|
"epoch": 1.1217183770883055, |
|
"grad_norm": 1.3639819622039795, |
|
"learning_rate": 4.4093735902054605e-05, |
|
"loss": 1.4711, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.1241050119331741, |
|
"grad_norm": 1.565079689025879, |
|
"learning_rate": 4.3897827798745183e-05, |
|
"loss": 1.4546, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 1.126491646778043, |
|
"grad_norm": 1.5039900541305542, |
|
"learning_rate": 4.3702014746614136e-05, |
|
"loss": 1.4998, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 1.1288782816229117, |
|
"grad_norm": 1.6283437013626099, |
|
"learning_rate": 4.350629979576569e-05, |
|
"loss": 1.4458, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 1.1312649164677804, |
|
"grad_norm": 1.4039489030838013, |
|
"learning_rate": 4.331068599477605e-05, |
|
"loss": 1.3474, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.1336515513126493, |
|
"grad_norm": 1.4971529245376587, |
|
"learning_rate": 4.311517639064578e-05, |
|
"loss": 1.3097, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.136038186157518, |
|
"grad_norm": 1.9998141527175903, |
|
"learning_rate": 4.2919774028752436e-05, |
|
"loss": 1.4228, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 1.1384248210023866, |
|
"grad_norm": 1.3852521181106567, |
|
"learning_rate": 4.27244819528031e-05, |
|
"loss": 1.4883, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 1.1408114558472553, |
|
"grad_norm": 1.5615485906600952, |
|
"learning_rate": 4.2529303204786953e-05, |
|
"loss": 1.5153, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 1.1431980906921242, |
|
"grad_norm": 1.5283887386322021, |
|
"learning_rate": 4.233424082492797e-05, |
|
"loss": 1.527, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 1.1455847255369929, |
|
"grad_norm": 1.385176420211792, |
|
"learning_rate": 4.213929785163747e-05, |
|
"loss": 1.4805, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.1479713603818615, |
|
"grad_norm": 1.3835763931274414, |
|
"learning_rate": 4.1944477321466786e-05, |
|
"loss": 1.4868, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 1.1503579952267304, |
|
"grad_norm": 1.4247292280197144, |
|
"learning_rate": 4.1749782269060045e-05, |
|
"loss": 1.498, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 1.152744630071599, |
|
"grad_norm": 1.5267618894577026, |
|
"learning_rate": 4.1555215727106844e-05, |
|
"loss": 1.4659, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 1.1551312649164678, |
|
"grad_norm": 1.5051010847091675, |
|
"learning_rate": 4.136078072629503e-05, |
|
"loss": 1.4474, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.1575178997613365, |
|
"grad_norm": 2.2243285179138184, |
|
"learning_rate": 4.116648029526347e-05, |
|
"loss": 1.453, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.1599045346062051, |
|
"grad_norm": 1.4427586793899536, |
|
"learning_rate": 4.097231746055491e-05, |
|
"loss": 1.532, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 1.162291169451074, |
|
"grad_norm": 1.9031730890274048, |
|
"learning_rate": 4.077829524656877e-05, |
|
"loss": 1.3974, |
|
"step": 1461 |
|
}, |
|
{ |
|
"epoch": 1.1646778042959427, |
|
"grad_norm": 1.4072078466415405, |
|
"learning_rate": 4.05844166755141e-05, |
|
"loss": 1.4742, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 1.1670644391408114, |
|
"grad_norm": 1.4678648710250854, |
|
"learning_rate": 4.039068476736253e-05, |
|
"loss": 1.4408, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 1.1694510739856803, |
|
"grad_norm": 7.8452582359313965, |
|
"learning_rate": 4.01971025398011e-05, |
|
"loss": 1.3489, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.171837708830549, |
|
"grad_norm": 1.6050971746444702, |
|
"learning_rate": 4.000367300818537e-05, |
|
"loss": 1.6608, |
|
"step": 1473 |
|
}, |
|
{ |
|
"epoch": 1.1742243436754176, |
|
"grad_norm": 1.71634840965271, |
|
"learning_rate": 3.98103991854924e-05, |
|
"loss": 1.4679, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 1.1766109785202863, |
|
"grad_norm": 1.4481014013290405, |
|
"learning_rate": 3.961728408227384e-05, |
|
"loss": 1.5657, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 1.1789976133651552, |
|
"grad_norm": 1.6103709936141968, |
|
"learning_rate": 3.942433070660905e-05, |
|
"loss": 1.4409, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 1.1813842482100239, |
|
"grad_norm": 1.411056399345398, |
|
"learning_rate": 3.923154206405819e-05, |
|
"loss": 1.4865, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 1.1837708830548925, |
|
"grad_norm": 1.497053623199463, |
|
"learning_rate": 3.9038921157615444e-05, |
|
"loss": 1.4072, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 1.1861575178997614, |
|
"grad_norm": 1.6520947217941284, |
|
"learning_rate": 3.884647098766224e-05, |
|
"loss": 1.4393, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 1.18854415274463, |
|
"grad_norm": 1.442406177520752, |
|
"learning_rate": 3.8654194551920485e-05, |
|
"loss": 1.4458, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 1.1909307875894988, |
|
"grad_norm": 1.3878071308135986, |
|
"learning_rate": 3.846209484540597e-05, |
|
"loss": 1.4374, |
|
"step": 1497 |
|
}, |
|
{ |
|
"epoch": 1.1933174224343674, |
|
"grad_norm": 1.401583194732666, |
|
"learning_rate": 3.827017486038157e-05, |
|
"loss": 1.3965, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1957040572792363, |
|
"grad_norm": 1.4490373134613037, |
|
"learning_rate": 3.8078437586310716e-05, |
|
"loss": 1.6232, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 1.198090692124105, |
|
"grad_norm": 1.2965726852416992, |
|
"learning_rate": 3.788688600981085e-05, |
|
"loss": 1.4681, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 1.2004773269689737, |
|
"grad_norm": 1.355893850326538, |
|
"learning_rate": 3.769552311460684e-05, |
|
"loss": 1.488, |
|
"step": 1509 |
|
}, |
|
{ |
|
"epoch": 1.2028639618138426, |
|
"grad_norm": 1.4382792711257935, |
|
"learning_rate": 3.750435188148459e-05, |
|
"loss": 1.5472, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 1.2052505966587113, |
|
"grad_norm": 1.3497835397720337, |
|
"learning_rate": 3.73133752882445e-05, |
|
"loss": 1.3225, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 1.20763723150358, |
|
"grad_norm": 1.448410153388977, |
|
"learning_rate": 3.712259630965518e-05, |
|
"loss": 1.4645, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 1.2100238663484486, |
|
"grad_norm": 1.470038890838623, |
|
"learning_rate": 3.6932017917407045e-05, |
|
"loss": 1.4681, |
|
"step": 1521 |
|
}, |
|
{ |
|
"epoch": 1.2124105011933175, |
|
"grad_norm": 1.2767425775527954, |
|
"learning_rate": 3.6741643080066065e-05, |
|
"loss": 1.4428, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 1.2147971360381862, |
|
"grad_norm": 1.5355818271636963, |
|
"learning_rate": 3.655147476302754e-05, |
|
"loss": 1.528, |
|
"step": 1527 |
|
}, |
|
{ |
|
"epoch": 1.2171837708830548, |
|
"grad_norm": 1.5351886749267578, |
|
"learning_rate": 3.636151592846985e-05, |
|
"loss": 1.3914, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.2195704057279237, |
|
"grad_norm": 1.580076813697815, |
|
"learning_rate": 3.617176953530835e-05, |
|
"loss": 1.3934, |
|
"step": 1533 |
|
}, |
|
{ |
|
"epoch": 1.2219570405727924, |
|
"grad_norm": 1.2656506299972534, |
|
"learning_rate": 3.5982238539149285e-05, |
|
"loss": 1.305, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 1.224343675417661, |
|
"grad_norm": 2.035010814666748, |
|
"learning_rate": 3.579292589224375e-05, |
|
"loss": 1.4774, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 1.2267303102625298, |
|
"grad_norm": 1.4457292556762695, |
|
"learning_rate": 3.560383454344168e-05, |
|
"loss": 1.5794, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 1.2291169451073987, |
|
"grad_norm": 1.2905712127685547, |
|
"learning_rate": 3.541496743814596e-05, |
|
"loss": 1.4821, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 1.2315035799522673, |
|
"grad_norm": 1.505958914756775, |
|
"learning_rate": 3.522632751826651e-05, |
|
"loss": 1.3573, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 1.233890214797136, |
|
"grad_norm": 1.4743762016296387, |
|
"learning_rate": 3.503791772217445e-05, |
|
"loss": 1.5104, |
|
"step": 1551 |
|
}, |
|
{ |
|
"epoch": 1.2362768496420047, |
|
"grad_norm": 1.4788120985031128, |
|
"learning_rate": 3.484974098465636e-05, |
|
"loss": 1.3989, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 1.2386634844868736, |
|
"grad_norm": 1.3256484270095825, |
|
"learning_rate": 3.4661800236868604e-05, |
|
"loss": 1.4617, |
|
"step": 1557 |
|
}, |
|
{ |
|
"epoch": 1.2410501193317423, |
|
"grad_norm": 1.5638628005981445, |
|
"learning_rate": 3.447409840629156e-05, |
|
"loss": 1.402, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.243436754176611, |
|
"grad_norm": 1.7883927822113037, |
|
"learning_rate": 3.428663841668412e-05, |
|
"loss": 1.5829, |
|
"step": 1563 |
|
}, |
|
{ |
|
"epoch": 1.2458233890214796, |
|
"grad_norm": 1.4335147142410278, |
|
"learning_rate": 3.409942318803809e-05, |
|
"loss": 1.4597, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 1.2482100238663485, |
|
"grad_norm": 1.4358636140823364, |
|
"learning_rate": 3.391245563653276e-05, |
|
"loss": 1.5638, |
|
"step": 1569 |
|
}, |
|
{ |
|
"epoch": 1.2505966587112172, |
|
"grad_norm": 1.3342747688293457, |
|
"learning_rate": 3.3725738674489414e-05, |
|
"loss": 1.447, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 1.2529832935560858, |
|
"grad_norm": 1.3842703104019165, |
|
"learning_rate": 3.3539275210326044e-05, |
|
"loss": 1.4634, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.2553699284009547, |
|
"grad_norm": 1.5342031717300415, |
|
"learning_rate": 3.335306814851196e-05, |
|
"loss": 1.458, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 1.2577565632458234, |
|
"grad_norm": 1.4148904085159302, |
|
"learning_rate": 3.31671203895226e-05, |
|
"loss": 1.3945, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 1.260143198090692, |
|
"grad_norm": 1.3674441576004028, |
|
"learning_rate": 3.298143482979436e-05, |
|
"loss": 1.3962, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 1.2625298329355608, |
|
"grad_norm": 1.42054283618927, |
|
"learning_rate": 3.2796014361679464e-05, |
|
"loss": 1.5179, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 1.2649164677804297, |
|
"grad_norm": 1.5487267971038818, |
|
"learning_rate": 3.261086187340088e-05, |
|
"loss": 1.4281, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.2673031026252983, |
|
"grad_norm": 1.5014619827270508, |
|
"learning_rate": 3.242598024900738e-05, |
|
"loss": 1.4789, |
|
"step": 1593 |
|
}, |
|
{ |
|
"epoch": 1.269689737470167, |
|
"grad_norm": 1.533458948135376, |
|
"learning_rate": 3.224137236832859e-05, |
|
"loss": 1.3491, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 1.272076372315036, |
|
"grad_norm": 1.3559014797210693, |
|
"learning_rate": 3.2057041106930104e-05, |
|
"loss": 1.3915, |
|
"step": 1599 |
|
}, |
|
{ |
|
"epoch": 1.2744630071599046, |
|
"grad_norm": 1.3522697687149048, |
|
"learning_rate": 3.187298933606878e-05, |
|
"loss": 1.496, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 1.2768496420047732, |
|
"grad_norm": 1.3634637594223022, |
|
"learning_rate": 3.1689219922647924e-05, |
|
"loss": 1.3662, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 1.279236276849642, |
|
"grad_norm": 1.4531528949737549, |
|
"learning_rate": 3.150573572917267e-05, |
|
"loss": 1.5501, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 1.2816229116945108, |
|
"grad_norm": 1.39664626121521, |
|
"learning_rate": 3.13225396137054e-05, |
|
"loss": 1.42, |
|
"step": 1611 |
|
}, |
|
{ |
|
"epoch": 1.2840095465393795, |
|
"grad_norm": 1.4954712390899658, |
|
"learning_rate": 3.11396344298212e-05, |
|
"loss": 1.5471, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 1.2863961813842482, |
|
"grad_norm": 1.3956115245819092, |
|
"learning_rate": 3.095702302656347e-05, |
|
"loss": 1.4936, |
|
"step": 1617 |
|
}, |
|
{ |
|
"epoch": 1.288782816229117, |
|
"grad_norm": 1.4721133708953857, |
|
"learning_rate": 3.077470824839947e-05, |
|
"loss": 1.4429, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.2911694510739857, |
|
"grad_norm": 6.485837936401367, |
|
"learning_rate": 3.059269293517603e-05, |
|
"loss": 1.4545, |
|
"step": 1623 |
|
}, |
|
{ |
|
"epoch": 1.2935560859188544, |
|
"grad_norm": 1.7930669784545898, |
|
"learning_rate": 3.0410979922075343e-05, |
|
"loss": 1.3401, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 1.295942720763723, |
|
"grad_norm": 1.4317888021469116, |
|
"learning_rate": 3.022957203957083e-05, |
|
"loss": 1.5389, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 1.2983293556085918, |
|
"grad_norm": 1.3846065998077393, |
|
"learning_rate": 3.004847211338295e-05, |
|
"loss": 1.355, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 1.3007159904534606, |
|
"grad_norm": 1.3602261543273926, |
|
"learning_rate": 2.9867682964435294e-05, |
|
"loss": 1.4359, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 1.3031026252983293, |
|
"grad_norm": 1.3559253215789795, |
|
"learning_rate": 2.9687207408810557e-05, |
|
"loss": 1.493, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 1.3054892601431982, |
|
"grad_norm": 1.6385109424591064, |
|
"learning_rate": 2.9507048257706727e-05, |
|
"loss": 1.5694, |
|
"step": 1641 |
|
}, |
|
{ |
|
"epoch": 1.307875894988067, |
|
"grad_norm": 1.4390101432800293, |
|
"learning_rate": 2.9327208317393303e-05, |
|
"loss": 1.5722, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 1.3102625298329356, |
|
"grad_norm": 1.4474018812179565, |
|
"learning_rate": 2.9147690389167514e-05, |
|
"loss": 1.4355, |
|
"step": 1647 |
|
}, |
|
{ |
|
"epoch": 1.3126491646778042, |
|
"grad_norm": 1.3009108304977417, |
|
"learning_rate": 2.8968497269310803e-05, |
|
"loss": 1.3249, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.315035799522673, |
|
"grad_norm": 1.4271756410598755, |
|
"learning_rate": 2.8789631749045097e-05, |
|
"loss": 1.3821, |
|
"step": 1653 |
|
}, |
|
{ |
|
"epoch": 1.3174224343675418, |
|
"grad_norm": 1.4093137979507446, |
|
"learning_rate": 2.8611096614489518e-05, |
|
"loss": 1.3932, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 1.3198090692124105, |
|
"grad_norm": 1.5373637676239014, |
|
"learning_rate": 2.8432894646616885e-05, |
|
"loss": 1.3887, |
|
"step": 1659 |
|
}, |
|
{ |
|
"epoch": 1.3221957040572792, |
|
"grad_norm": 1.3347370624542236, |
|
"learning_rate": 2.8255028621210355e-05, |
|
"loss": 1.4542, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 1.324582338902148, |
|
"grad_norm": 1.5229403972625732, |
|
"learning_rate": 2.8077501308820308e-05, |
|
"loss": 1.5258, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 1.3269689737470167, |
|
"grad_norm": 1.38504958152771, |
|
"learning_rate": 2.790031547472105e-05, |
|
"loss": 1.4561, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 1.3293556085918854, |
|
"grad_norm": 1.4247602224349976, |
|
"learning_rate": 2.7723473878867877e-05, |
|
"loss": 1.5028, |
|
"step": 1671 |
|
}, |
|
{ |
|
"epoch": 1.331742243436754, |
|
"grad_norm": 1.437752604484558, |
|
"learning_rate": 2.754697927585399e-05, |
|
"loss": 1.4035, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 1.334128878281623, |
|
"grad_norm": 1.3439934253692627, |
|
"learning_rate": 2.737083441486763e-05, |
|
"loss": 1.3615, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 1.3365155131264916, |
|
"grad_norm": 1.3507174253463745, |
|
"learning_rate": 2.71950420396492e-05, |
|
"loss": 1.416, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.3389021479713603, |
|
"grad_norm": 1.3720000982284546, |
|
"learning_rate": 2.7019604888448642e-05, |
|
"loss": 1.4143, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 1.3412887828162292, |
|
"grad_norm": 1.4216835498809814, |
|
"learning_rate": 2.6844525693982613e-05, |
|
"loss": 1.4053, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 1.3436754176610979, |
|
"grad_norm": 1.3386591672897339, |
|
"learning_rate": 2.666980718339211e-05, |
|
"loss": 1.4513, |
|
"step": 1689 |
|
}, |
|
{ |
|
"epoch": 1.3460620525059666, |
|
"grad_norm": 1.423043966293335, |
|
"learning_rate": 2.6495452078199863e-05, |
|
"loss": 1.4137, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 1.3484486873508352, |
|
"grad_norm": 1.4139893054962158, |
|
"learning_rate": 2.6321463094267934e-05, |
|
"loss": 1.395, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 1.3508353221957041, |
|
"grad_norm": 1.7168787717819214, |
|
"learning_rate": 2.614784294175554e-05, |
|
"loss": 1.5379, |
|
"step": 1698 |
|
}, |
|
{ |
|
"epoch": 1.3532219570405728, |
|
"grad_norm": 1.528499722480774, |
|
"learning_rate": 2.597459432507664e-05, |
|
"loss": 1.4597, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 1.3556085918854415, |
|
"grad_norm": 1.4057003259658813, |
|
"learning_rate": 2.5801719942858065e-05, |
|
"loss": 1.4797, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 1.3579952267303104, |
|
"grad_norm": 1.324141025543213, |
|
"learning_rate": 2.562922248789722e-05, |
|
"loss": 1.4355, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 1.360381861575179, |
|
"grad_norm": 1.3396581411361694, |
|
"learning_rate": 2.5457104647120322e-05, |
|
"loss": 1.5498, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.3627684964200477, |
|
"grad_norm": 1.3867429494857788, |
|
"learning_rate": 2.5285369101540445e-05, |
|
"loss": 1.4706, |
|
"step": 1713 |
|
}, |
|
{ |
|
"epoch": 1.3651551312649164, |
|
"grad_norm": 1.4631327390670776, |
|
"learning_rate": 2.5114018526215844e-05, |
|
"loss": 1.4652, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 1.3675417661097853, |
|
"grad_norm": 1.4228324890136719, |
|
"learning_rate": 2.494305559020822e-05, |
|
"loss": 1.5147, |
|
"step": 1719 |
|
}, |
|
{ |
|
"epoch": 1.369928400954654, |
|
"grad_norm": 1.3669848442077637, |
|
"learning_rate": 2.4772482956541132e-05, |
|
"loss": 1.3945, |
|
"step": 1722 |
|
}, |
|
{ |
|
"epoch": 1.3723150357995226, |
|
"grad_norm": 1.466894268989563, |
|
"learning_rate": 2.4602303282158616e-05, |
|
"loss": 1.3822, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.3747016706443915, |
|
"grad_norm": 1.5458747148513794, |
|
"learning_rate": 2.4432519217883676e-05, |
|
"loss": 1.46, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 1.3770883054892602, |
|
"grad_norm": 1.4828535318374634, |
|
"learning_rate": 2.4263133408377076e-05, |
|
"loss": 1.5053, |
|
"step": 1731 |
|
}, |
|
{ |
|
"epoch": 1.3794749403341289, |
|
"grad_norm": 1.3600937128067017, |
|
"learning_rate": 2.4094148492096125e-05, |
|
"loss": 1.5814, |
|
"step": 1734 |
|
}, |
|
{ |
|
"epoch": 1.3818615751789975, |
|
"grad_norm": 1.5162534713745117, |
|
"learning_rate": 2.3925567101253576e-05, |
|
"loss": 1.5373, |
|
"step": 1737 |
|
}, |
|
{ |
|
"epoch": 1.3842482100238662, |
|
"grad_norm": 1.4318009614944458, |
|
"learning_rate": 2.3757391861776585e-05, |
|
"loss": 1.4308, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.3866348448687351, |
|
"grad_norm": 1.285593032836914, |
|
"learning_rate": 2.3589625393265895e-05, |
|
"loss": 1.4115, |
|
"step": 1743 |
|
}, |
|
{ |
|
"epoch": 1.3890214797136038, |
|
"grad_norm": 1.41255521774292, |
|
"learning_rate": 2.3422270308954934e-05, |
|
"loss": 1.4724, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 1.3914081145584727, |
|
"grad_norm": 1.221402883529663, |
|
"learning_rate": 2.3255329215669185e-05, |
|
"loss": 1.3923, |
|
"step": 1749 |
|
}, |
|
{ |
|
"epoch": 1.3937947494033414, |
|
"grad_norm": 1.7999006509780884, |
|
"learning_rate": 2.3088804713785584e-05, |
|
"loss": 1.5016, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 1.39618138424821, |
|
"grad_norm": 1.2943382263183594, |
|
"learning_rate": 2.2922699397191893e-05, |
|
"loss": 1.4305, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.3985680190930787, |
|
"grad_norm": 1.470240831375122, |
|
"learning_rate": 2.2757015853246493e-05, |
|
"loss": 1.3706, |
|
"step": 1758 |
|
}, |
|
{ |
|
"epoch": 1.4009546539379474, |
|
"grad_norm": 1.269652247428894, |
|
"learning_rate": 2.2591756662737862e-05, |
|
"loss": 1.4425, |
|
"step": 1761 |
|
}, |
|
{ |
|
"epoch": 1.4033412887828163, |
|
"grad_norm": 1.3773661851882935, |
|
"learning_rate": 2.242692439984463e-05, |
|
"loss": 1.4063, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 1.405727923627685, |
|
"grad_norm": 1.4183642864227295, |
|
"learning_rate": 2.2262521632095203e-05, |
|
"loss": 1.5086, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 1.4081145584725536, |
|
"grad_norm": 1.3887887001037598, |
|
"learning_rate": 2.2098550920327998e-05, |
|
"loss": 1.311, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.4105011933174225, |
|
"grad_norm": 1.5611753463745117, |
|
"learning_rate": 2.1935014818651405e-05, |
|
"loss": 1.3815, |
|
"step": 1773 |
|
}, |
|
{ |
|
"epoch": 1.4128878281622912, |
|
"grad_norm": 1.2606338262557983, |
|
"learning_rate": 2.177191587440409e-05, |
|
"loss": 1.4286, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 1.4152744630071599, |
|
"grad_norm": 1.3460246324539185, |
|
"learning_rate": 2.1609256628115316e-05, |
|
"loss": 1.541, |
|
"step": 1779 |
|
}, |
|
{ |
|
"epoch": 1.4176610978520285, |
|
"grad_norm": 1.4279568195343018, |
|
"learning_rate": 2.1447039613465265e-05, |
|
"loss": 1.4517, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 1.4200477326968974, |
|
"grad_norm": 1.4444867372512817, |
|
"learning_rate": 2.128526735724572e-05, |
|
"loss": 1.4325, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.422434367541766, |
|
"grad_norm": 2.7751872539520264, |
|
"learning_rate": 2.1123942379320576e-05, |
|
"loss": 1.4161, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 1.4248210023866348, |
|
"grad_norm": 1.5731838941574097, |
|
"learning_rate": 2.096306719258669e-05, |
|
"loss": 1.3889, |
|
"step": 1791 |
|
}, |
|
{ |
|
"epoch": 1.4272076372315037, |
|
"grad_norm": 1.5036033391952515, |
|
"learning_rate": 2.0802644302934683e-05, |
|
"loss": 1.4823, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 1.4295942720763724, |
|
"grad_norm": 1.5555312633514404, |
|
"learning_rate": 2.0642676209209934e-05, |
|
"loss": 1.5452, |
|
"step": 1797 |
|
}, |
|
{ |
|
"epoch": 1.431980906921241, |
|
"grad_norm": 1.653014898300171, |
|
"learning_rate": 2.0483165403173583e-05, |
|
"loss": 1.4651, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4343675417661097, |
|
"grad_norm": 1.4742029905319214, |
|
"learning_rate": 2.0324114369463855e-05, |
|
"loss": 1.4215, |
|
"step": 1803 |
|
}, |
|
{ |
|
"epoch": 1.4367541766109786, |
|
"grad_norm": 1.3069920539855957, |
|
"learning_rate": 2.0165525585557204e-05, |
|
"loss": 1.352, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 1.4391408114558473, |
|
"grad_norm": 1.3675094842910767, |
|
"learning_rate": 2.0007401521729863e-05, |
|
"loss": 1.3925, |
|
"step": 1809 |
|
}, |
|
{ |
|
"epoch": 1.441527446300716, |
|
"grad_norm": 1.5048547983169556, |
|
"learning_rate": 1.984974464101928e-05, |
|
"loss": 1.4392, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 1.4439140811455848, |
|
"grad_norm": 1.3629268407821655, |
|
"learning_rate": 1.9692557399185734e-05, |
|
"loss": 1.6123, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 1.4463007159904535, |
|
"grad_norm": 1.4186464548110962, |
|
"learning_rate": 1.953584224467418e-05, |
|
"loss": 1.4375, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 1.4486873508353222, |
|
"grad_norm": 1.264143943786621, |
|
"learning_rate": 1.9379601618575977e-05, |
|
"loss": 1.3714, |
|
"step": 1821 |
|
}, |
|
{ |
|
"epoch": 1.4510739856801909, |
|
"grad_norm": 1.2267049551010132, |
|
"learning_rate": 1.9223837954591046e-05, |
|
"loss": 1.442, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 1.4534606205250595, |
|
"grad_norm": 1.4130151271820068, |
|
"learning_rate": 1.9068553678989736e-05, |
|
"loss": 1.5417, |
|
"step": 1827 |
|
}, |
|
{ |
|
"epoch": 1.4558472553699284, |
|
"grad_norm": 1.3902508020401, |
|
"learning_rate": 1.8913751210575248e-05, |
|
"loss": 1.4484, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.458233890214797, |
|
"grad_norm": 1.3493434190750122, |
|
"learning_rate": 1.8759432960645774e-05, |
|
"loss": 1.4089, |
|
"step": 1833 |
|
}, |
|
{ |
|
"epoch": 1.460620525059666, |
|
"grad_norm": 1.3696714639663696, |
|
"learning_rate": 1.8605601332957077e-05, |
|
"loss": 1.3673, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 1.4630071599045347, |
|
"grad_norm": 1.3794286251068115, |
|
"learning_rate": 1.8452258723684995e-05, |
|
"loss": 1.3348, |
|
"step": 1839 |
|
}, |
|
{ |
|
"epoch": 1.4653937947494033, |
|
"grad_norm": 1.4056599140167236, |
|
"learning_rate": 1.8299407521388067e-05, |
|
"loss": 1.3715, |
|
"step": 1842 |
|
}, |
|
{ |
|
"epoch": 1.467780429594272, |
|
"grad_norm": 1.3033989667892456, |
|
"learning_rate": 1.8147050106970437e-05, |
|
"loss": 1.4756, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 1.4701670644391407, |
|
"grad_norm": 1.2725988626480103, |
|
"learning_rate": 1.7995188853644646e-05, |
|
"loss": 1.4429, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 1.4725536992840096, |
|
"grad_norm": 1.2975175380706787, |
|
"learning_rate": 1.784382612689477e-05, |
|
"loss": 1.4233, |
|
"step": 1851 |
|
}, |
|
{ |
|
"epoch": 1.4749403341288783, |
|
"grad_norm": 1.3403589725494385, |
|
"learning_rate": 1.7692964284439505e-05, |
|
"loss": 1.3441, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 1.477326968973747, |
|
"grad_norm": 1.395530343055725, |
|
"learning_rate": 1.7542605676195506e-05, |
|
"loss": 1.3968, |
|
"step": 1857 |
|
}, |
|
{ |
|
"epoch": 1.4797136038186158, |
|
"grad_norm": 1.2565813064575195, |
|
"learning_rate": 1.739275264424067e-05, |
|
"loss": 1.4858, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.4821002386634845, |
|
"grad_norm": 1.4718109369277954, |
|
"learning_rate": 1.7243407522777806e-05, |
|
"loss": 1.4901, |
|
"step": 1863 |
|
}, |
|
{ |
|
"epoch": 1.4844868735083532, |
|
"grad_norm": 1.2175111770629883, |
|
"learning_rate": 1.7094572638098123e-05, |
|
"loss": 1.4708, |
|
"step": 1866 |
|
}, |
|
{ |
|
"epoch": 1.4868735083532219, |
|
"grad_norm": 1.3653630018234253, |
|
"learning_rate": 1.6946250308545125e-05, |
|
"loss": 1.3542, |
|
"step": 1869 |
|
}, |
|
{ |
|
"epoch": 1.4892601431980907, |
|
"grad_norm": 1.325291633605957, |
|
"learning_rate": 1.6798442844478445e-05, |
|
"loss": 1.3565, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 1.4916467780429594, |
|
"grad_norm": 1.3131422996520996, |
|
"learning_rate": 1.6651152548237802e-05, |
|
"loss": 1.3708, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.494033412887828, |
|
"grad_norm": 1.3751031160354614, |
|
"learning_rate": 1.6504381714107252e-05, |
|
"loss": 1.4521, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 1.496420047732697, |
|
"grad_norm": 1.3174288272857666, |
|
"learning_rate": 1.6358132628279322e-05, |
|
"loss": 1.3748, |
|
"step": 1881 |
|
}, |
|
{ |
|
"epoch": 1.4988066825775657, |
|
"grad_norm": 1.3484026193618774, |
|
"learning_rate": 1.6212407568819565e-05, |
|
"loss": 1.3542, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 1.5011933174224343, |
|
"grad_norm": 1.417079210281372, |
|
"learning_rate": 1.6067208805630877e-05, |
|
"loss": 1.4029, |
|
"step": 1887 |
|
}, |
|
{ |
|
"epoch": 1.503579952267303, |
|
"grad_norm": 2.4393136501312256, |
|
"learning_rate": 1.5922538600418318e-05, |
|
"loss": 1.3775, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.5059665871121717, |
|
"grad_norm": 1.4242373704910278, |
|
"learning_rate": 1.5778399206653734e-05, |
|
"loss": 1.3828, |
|
"step": 1893 |
|
}, |
|
{ |
|
"epoch": 1.5083532219570406, |
|
"grad_norm": 1.3555493354797363, |
|
"learning_rate": 1.563479286954078e-05, |
|
"loss": 1.4257, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 1.5107398568019093, |
|
"grad_norm": 1.3086305856704712, |
|
"learning_rate": 1.54917218259799e-05, |
|
"loss": 1.366, |
|
"step": 1899 |
|
}, |
|
{ |
|
"epoch": 1.5131264916467781, |
|
"grad_norm": 1.3633023500442505, |
|
"learning_rate": 1.5349188304533413e-05, |
|
"loss": 1.4599, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 1.5155131264916468, |
|
"grad_norm": 1.3130401372909546, |
|
"learning_rate": 1.5207194525390938e-05, |
|
"loss": 1.4543, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 1.5178997613365155, |
|
"grad_norm": 1.3946834802627563, |
|
"learning_rate": 1.5065742700334678e-05, |
|
"loss": 1.4115, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 1.5202863961813842, |
|
"grad_norm": 1.3722360134124756, |
|
"learning_rate": 1.4924835032705064e-05, |
|
"loss": 1.4059, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 1.5226730310262528, |
|
"grad_norm": 1.297034740447998, |
|
"learning_rate": 1.4784473717366387e-05, |
|
"loss": 1.5423, |
|
"step": 1914 |
|
}, |
|
{ |
|
"epoch": 1.5250596658711217, |
|
"grad_norm": 1.2313867807388306, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 1.4202, |
|
"step": 1917 |
|
}, |
|
{ |
|
"epoch": 1.5274463007159904, |
|
"grad_norm": 1.2203267812728882, |
|
"learning_rate": 1.4505398880433369e-05, |
|
"loss": 1.3289, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.5298329355608593, |
|
"grad_norm": 2.824936628341675, |
|
"learning_rate": 1.4366689705879898e-05, |
|
"loss": 1.4151, |
|
"step": 1923 |
|
}, |
|
{ |
|
"epoch": 1.532219570405728, |
|
"grad_norm": 1.2896000146865845, |
|
"learning_rate": 1.4228535577631442e-05, |
|
"loss": 1.3452, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 1.5346062052505967, |
|
"grad_norm": 1.2921373844146729, |
|
"learning_rate": 1.4090938647661461e-05, |
|
"loss": 1.4469, |
|
"step": 1929 |
|
}, |
|
{ |
|
"epoch": 1.5369928400954653, |
|
"grad_norm": 1.4541573524475098, |
|
"learning_rate": 1.3953901059264191e-05, |
|
"loss": 1.5048, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 1.539379474940334, |
|
"grad_norm": 1.5273715257644653, |
|
"learning_rate": 1.3817424947021151e-05, |
|
"loss": 1.425, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.541766109785203, |
|
"grad_norm": 1.4470243453979492, |
|
"learning_rate": 1.3681512436768045e-05, |
|
"loss": 1.6023, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 1.5441527446300716, |
|
"grad_norm": 1.274420976638794, |
|
"learning_rate": 1.3546165645561487e-05, |
|
"loss": 1.3682, |
|
"step": 1941 |
|
}, |
|
{ |
|
"epoch": 1.5465393794749405, |
|
"grad_norm": 1.537539005279541, |
|
"learning_rate": 1.3411386681646164e-05, |
|
"loss": 1.3933, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 1.5489260143198091, |
|
"grad_norm": 1.2480096817016602, |
|
"learning_rate": 1.3277177644421924e-05, |
|
"loss": 1.3532, |
|
"step": 1947 |
|
}, |
|
{ |
|
"epoch": 1.5513126491646778, |
|
"grad_norm": 1.4736180305480957, |
|
"learning_rate": 1.314354062441106e-05, |
|
"loss": 1.5258, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.5536992840095465, |
|
"grad_norm": 1.2729185819625854, |
|
"learning_rate": 1.301047770322581e-05, |
|
"loss": 1.3904, |
|
"step": 1953 |
|
}, |
|
{ |
|
"epoch": 1.5560859188544152, |
|
"grad_norm": 1.2870765924453735, |
|
"learning_rate": 1.287799095353584e-05, |
|
"loss": 1.3343, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 1.558472553699284, |
|
"grad_norm": 1.385016679763794, |
|
"learning_rate": 1.2746082439036117e-05, |
|
"loss": 1.4185, |
|
"step": 1959 |
|
}, |
|
{ |
|
"epoch": 1.5608591885441527, |
|
"grad_norm": 1.3921257257461548, |
|
"learning_rate": 1.2614754214414548e-05, |
|
"loss": 1.3932, |
|
"step": 1962 |
|
}, |
|
{ |
|
"epoch": 1.5632458233890216, |
|
"grad_norm": 1.4554414749145508, |
|
"learning_rate": 1.2484008325320174e-05, |
|
"loss": 1.4237, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.5656324582338903, |
|
"grad_norm": 1.4019906520843506, |
|
"learning_rate": 1.2353846808331154e-05, |
|
"loss": 1.3849, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 1.568019093078759, |
|
"grad_norm": 1.3316291570663452, |
|
"learning_rate": 1.2224271690923155e-05, |
|
"loss": 1.3343, |
|
"step": 1971 |
|
}, |
|
{ |
|
"epoch": 1.5704057279236276, |
|
"grad_norm": 1.251207709312439, |
|
"learning_rate": 1.2095284991437733e-05, |
|
"loss": 1.333, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 1.5727923627684963, |
|
"grad_norm": 1.3229224681854248, |
|
"learning_rate": 1.1966888719050829e-05, |
|
"loss": 1.4419, |
|
"step": 1977 |
|
}, |
|
{ |
|
"epoch": 1.575178997613365, |
|
"grad_norm": 1.3271231651306152, |
|
"learning_rate": 1.1839084873741584e-05, |
|
"loss": 1.421, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.577565632458234, |
|
"grad_norm": 1.4479427337646484, |
|
"learning_rate": 1.1711875446261094e-05, |
|
"loss": 1.4322, |
|
"step": 1983 |
|
}, |
|
{ |
|
"epoch": 1.5799522673031028, |
|
"grad_norm": 1.4587756395339966, |
|
"learning_rate": 1.1585262418101467e-05, |
|
"loss": 1.4832, |
|
"step": 1986 |
|
}, |
|
{ |
|
"epoch": 1.5823389021479715, |
|
"grad_norm": 1.3749325275421143, |
|
"learning_rate": 1.1459247761464909e-05, |
|
"loss": 1.423, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 1.5847255369928401, |
|
"grad_norm": 1.3663976192474365, |
|
"learning_rate": 1.1333833439233055e-05, |
|
"loss": 1.4133, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 1.5871121718377088, |
|
"grad_norm": 1.3849143981933594, |
|
"learning_rate": 1.1209021404936304e-05, |
|
"loss": 1.3823, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.5894988066825775, |
|
"grad_norm": 1.3283625841140747, |
|
"learning_rate": 1.1084813602723515e-05, |
|
"loss": 1.4437, |
|
"step": 1998 |
|
}, |
|
{ |
|
"epoch": 1.5918854415274462, |
|
"grad_norm": 1.3943214416503906, |
|
"learning_rate": 1.0961211967331597e-05, |
|
"loss": 1.4566, |
|
"step": 2001 |
|
}, |
|
{ |
|
"epoch": 1.594272076372315, |
|
"grad_norm": 1.340326189994812, |
|
"learning_rate": 1.083821842405548e-05, |
|
"loss": 1.3319, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 1.5966587112171837, |
|
"grad_norm": 1.458629846572876, |
|
"learning_rate": 1.0715834888718074e-05, |
|
"loss": 1.307, |
|
"step": 2007 |
|
}, |
|
{ |
|
"epoch": 1.5990453460620526, |
|
"grad_norm": 1.327406883239746, |
|
"learning_rate": 1.0594063267640386e-05, |
|
"loss": 1.3367, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.6014319809069213, |
|
"grad_norm": 1.3545573949813843, |
|
"learning_rate": 1.0472905457611936e-05, |
|
"loss": 1.43, |
|
"step": 2013 |
|
}, |
|
{ |
|
"epoch": 1.60381861575179, |
|
"grad_norm": 1.3321373462677002, |
|
"learning_rate": 1.0352363345861065e-05, |
|
"loss": 1.3416, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 1.6062052505966586, |
|
"grad_norm": 1.3040752410888672, |
|
"learning_rate": 1.023243881002573e-05, |
|
"loss": 1.6122, |
|
"step": 2019 |
|
}, |
|
{ |
|
"epoch": 1.6085918854415273, |
|
"grad_norm": 1.7471809387207031, |
|
"learning_rate": 1.0113133718124035e-05, |
|
"loss": 1.5219, |
|
"step": 2022 |
|
}, |
|
{ |
|
"epoch": 1.6109785202863962, |
|
"grad_norm": 1.362330436706543, |
|
"learning_rate": 9.994449928525324e-06, |
|
"loss": 1.4859, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.6133651551312649, |
|
"grad_norm": 1.3588142395019531, |
|
"learning_rate": 9.876389289921106e-06, |
|
"loss": 1.5388, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 1.6157517899761338, |
|
"grad_norm": 1.2795350551605225, |
|
"learning_rate": 9.758953641296331e-06, |
|
"loss": 1.4129, |
|
"step": 2031 |
|
}, |
|
{ |
|
"epoch": 1.6181384248210025, |
|
"grad_norm": 1.3939927816390991, |
|
"learning_rate": 9.642144811900739e-06, |
|
"loss": 1.407, |
|
"step": 2034 |
|
}, |
|
{ |
|
"epoch": 1.6205250596658711, |
|
"grad_norm": 1.3605296611785889, |
|
"learning_rate": 9.5259646212203e-06, |
|
"loss": 1.3686, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 1.6229116945107398, |
|
"grad_norm": 3.3502025604248047, |
|
"learning_rate": 9.410414878948975e-06, |
|
"loss": 1.3942, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.6252983293556085, |
|
"grad_norm": 1.2541710138320923, |
|
"learning_rate": 9.295497384960416e-06, |
|
"loss": 1.4175, |
|
"step": 2043 |
|
}, |
|
{ |
|
"epoch": 1.6276849642004774, |
|
"grad_norm": 1.4015976190567017, |
|
"learning_rate": 9.181213929280046e-06, |
|
"loss": 1.4867, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 1.630071599045346, |
|
"grad_norm": 1.2918376922607422, |
|
"learning_rate": 9.067566292057084e-06, |
|
"loss": 1.4243, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 1.632458233890215, |
|
"grad_norm": 1.341584324836731, |
|
"learning_rate": 8.954556243536877e-06, |
|
"loss": 1.309, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 1.6348448687350836, |
|
"grad_norm": 1.364698052406311, |
|
"learning_rate": 8.842185544033255e-06, |
|
"loss": 1.4609, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.6372315035799523, |
|
"grad_norm": 1.3149210214614868, |
|
"learning_rate": 8.7304559439012e-06, |
|
"loss": 1.4338, |
|
"step": 2058 |
|
}, |
|
{ |
|
"epoch": 1.639618138424821, |
|
"grad_norm": 1.2939684391021729, |
|
"learning_rate": 8.619369183509501e-06, |
|
"loss": 1.3857, |
|
"step": 2061 |
|
}, |
|
{ |
|
"epoch": 1.6420047732696896, |
|
"grad_norm": 1.364255428314209, |
|
"learning_rate": 8.508926993213712e-06, |
|
"loss": 1.4484, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 1.6443914081145583, |
|
"grad_norm": 1.3580390214920044, |
|
"learning_rate": 8.39913109332916e-06, |
|
"loss": 1.377, |
|
"step": 2067 |
|
}, |
|
{ |
|
"epoch": 1.6467780429594272, |
|
"grad_norm": 1.354833960533142, |
|
"learning_rate": 8.28998319410413e-06, |
|
"loss": 1.3848, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.649164677804296, |
|
"grad_norm": 1.2995808124542236, |
|
"learning_rate": 8.181484995693295e-06, |
|
"loss": 1.369, |
|
"step": 2073 |
|
}, |
|
{ |
|
"epoch": 1.6515513126491648, |
|
"grad_norm": 1.387732982635498, |
|
"learning_rate": 8.073638188131128e-06, |
|
"loss": 1.3963, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 1.6539379474940334, |
|
"grad_norm": 1.349213719367981, |
|
"learning_rate": 7.966444451305726e-06, |
|
"loss": 1.4368, |
|
"step": 2079 |
|
}, |
|
{ |
|
"epoch": 1.6563245823389021, |
|
"grad_norm": 1.3115229606628418, |
|
"learning_rate": 7.859905454932471e-06, |
|
"loss": 1.3239, |
|
"step": 2082 |
|
}, |
|
{ |
|
"epoch": 1.6587112171837708, |
|
"grad_norm": 1.2257990837097168, |
|
"learning_rate": 7.75402285852816e-06, |
|
"loss": 1.3398, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.6610978520286395, |
|
"grad_norm": 1.3116490840911865, |
|
"learning_rate": 7.648798311385058e-06, |
|
"loss": 1.3408, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 1.6634844868735084, |
|
"grad_norm": 1.3405076265335083, |
|
"learning_rate": 7.5442334525452964e-06, |
|
"loss": 1.3239, |
|
"step": 2091 |
|
}, |
|
{ |
|
"epoch": 1.665871121718377, |
|
"grad_norm": 1.328359842300415, |
|
"learning_rate": 7.440329910775273e-06, |
|
"loss": 1.3864, |
|
"step": 2094 |
|
}, |
|
{ |
|
"epoch": 1.668257756563246, |
|
"grad_norm": 1.4465019702911377, |
|
"learning_rate": 7.337089304540301e-06, |
|
"loss": 1.3507, |
|
"step": 2097 |
|
}, |
|
{ |
|
"epoch": 1.6706443914081146, |
|
"grad_norm": 1.334693431854248, |
|
"learning_rate": 7.234513241979418e-06, |
|
"loss": 1.41, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6730310262529833, |
|
"grad_norm": 1.2191022634506226, |
|
"learning_rate": 7.132603320880294e-06, |
|
"loss": 1.3517, |
|
"step": 2103 |
|
}, |
|
{ |
|
"epoch": 1.675417661097852, |
|
"grad_norm": 1.286347508430481, |
|
"learning_rate": 7.031361128654401e-06, |
|
"loss": 1.4724, |
|
"step": 2106 |
|
}, |
|
{ |
|
"epoch": 1.6778042959427206, |
|
"grad_norm": 1.2741565704345703, |
|
"learning_rate": 6.930788242312253e-06, |
|
"loss": 1.3599, |
|
"step": 2109 |
|
}, |
|
{ |
|
"epoch": 1.6801909307875895, |
|
"grad_norm": 1.331213116645813, |
|
"learning_rate": 6.830886228438837e-06, |
|
"loss": 1.448, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 1.6825775656324582, |
|
"grad_norm": 1.2380222082138062, |
|
"learning_rate": 6.731656643169204e-06, |
|
"loss": 1.362, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.684964200477327, |
|
"grad_norm": 1.3948215246200562, |
|
"learning_rate": 6.633101032164274e-06, |
|
"loss": 1.4777, |
|
"step": 2118 |
|
}, |
|
{ |
|
"epoch": 1.6873508353221958, |
|
"grad_norm": 1.2766612768173218, |
|
"learning_rate": 6.535220930586705e-06, |
|
"loss": 1.5417, |
|
"step": 2121 |
|
}, |
|
{ |
|
"epoch": 1.6897374701670644, |
|
"grad_norm": 1.5349231958389282, |
|
"learning_rate": 6.4380178630770225e-06, |
|
"loss": 1.4201, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 1.692124105011933, |
|
"grad_norm": 1.3303968906402588, |
|
"learning_rate": 6.341493343729854e-06, |
|
"loss": 1.4746, |
|
"step": 2127 |
|
}, |
|
{ |
|
"epoch": 1.6945107398568018, |
|
"grad_norm": 1.2858116626739502, |
|
"learning_rate": 6.2456488760703205e-06, |
|
"loss": 1.4834, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.6968973747016707, |
|
"grad_norm": 1.3412748575210571, |
|
"learning_rate": 6.150485953030677e-06, |
|
"loss": 1.2398, |
|
"step": 2133 |
|
}, |
|
{ |
|
"epoch": 1.6992840095465394, |
|
"grad_norm": 1.3060575723648071, |
|
"learning_rate": 6.056006056926977e-06, |
|
"loss": 1.5145, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 1.7016706443914082, |
|
"grad_norm": 1.4394913911819458, |
|
"learning_rate": 5.962210659436091e-06, |
|
"loss": 1.3623, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 1.704057279236277, |
|
"grad_norm": 1.2894078493118286, |
|
"learning_rate": 5.869101221572654e-06, |
|
"loss": 1.327, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 1.7064439140811456, |
|
"grad_norm": 1.6368987560272217, |
|
"learning_rate": 5.776679193666412e-06, |
|
"loss": 1.5371, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.7088305489260143, |
|
"grad_norm": 1.4706789255142212, |
|
"learning_rate": 5.6849460153395706e-06, |
|
"loss": 1.3617, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 1.711217183770883, |
|
"grad_norm": 1.358765721321106, |
|
"learning_rate": 5.5939031154844e-06, |
|
"loss": 1.3666, |
|
"step": 2151 |
|
}, |
|
{ |
|
"epoch": 1.7136038186157518, |
|
"grad_norm": 1.3404169082641602, |
|
"learning_rate": 5.5035519122409895e-06, |
|
"loss": 1.4213, |
|
"step": 2154 |
|
}, |
|
{ |
|
"epoch": 1.7159904534606205, |
|
"grad_norm": 1.9059109687805176, |
|
"learning_rate": 5.413893812975096e-06, |
|
"loss": 1.4891, |
|
"step": 2157 |
|
}, |
|
{ |
|
"epoch": 1.7183770883054894, |
|
"grad_norm": 1.5331205129623413, |
|
"learning_rate": 5.324930214256302e-06, |
|
"loss": 1.4278, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.720763723150358, |
|
"grad_norm": 1.3307032585144043, |
|
"learning_rate": 5.236662501836192e-06, |
|
"loss": 1.389, |
|
"step": 2163 |
|
}, |
|
{ |
|
"epoch": 1.7231503579952268, |
|
"grad_norm": 1.5163909196853638, |
|
"learning_rate": 5.149092050626825e-06, |
|
"loss": 1.5462, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 1.7255369928400954, |
|
"grad_norm": 1.3799465894699097, |
|
"learning_rate": 5.062220224679276e-06, |
|
"loss": 1.3583, |
|
"step": 2169 |
|
}, |
|
{ |
|
"epoch": 1.727923627684964, |
|
"grad_norm": 1.288385272026062, |
|
"learning_rate": 4.9760483771624236e-06, |
|
"loss": 1.401, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 1.7303102625298328, |
|
"grad_norm": 1.3018258810043335, |
|
"learning_rate": 4.89057785034181e-06, |
|
"loss": 1.3998, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.7326968973747017, |
|
"grad_norm": 1.4465045928955078, |
|
"learning_rate": 4.805809975558828e-06, |
|
"loss": 1.4118, |
|
"step": 2178 |
|
}, |
|
{ |
|
"epoch": 1.7350835322195706, |
|
"grad_norm": 1.3118780851364136, |
|
"learning_rate": 4.721746073209893e-06, |
|
"loss": 1.3574, |
|
"step": 2181 |
|
}, |
|
{ |
|
"epoch": 1.7374701670644392, |
|
"grad_norm": 1.653915524482727, |
|
"learning_rate": 4.6383874527259345e-06, |
|
"loss": 1.5086, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 1.739856801909308, |
|
"grad_norm": 1.3008549213409424, |
|
"learning_rate": 4.555735412551975e-06, |
|
"loss": 1.4131, |
|
"step": 2187 |
|
}, |
|
{ |
|
"epoch": 1.7422434367541766, |
|
"grad_norm": 1.714281678199768, |
|
"learning_rate": 4.47379124012689e-06, |
|
"loss": 1.4335, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.7446300715990453, |
|
"grad_norm": 1.379380702972412, |
|
"learning_rate": 4.3925562118634135e-06, |
|
"loss": 1.4987, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 1.747016706443914, |
|
"grad_norm": 1.3546345233917236, |
|
"learning_rate": 4.312031593128163e-06, |
|
"loss": 1.5424, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 1.7494033412887828, |
|
"grad_norm": 1.4139165878295898, |
|
"learning_rate": 4.232218638222029e-06, |
|
"loss": 1.3599, |
|
"step": 2199 |
|
}, |
|
{ |
|
"epoch": 1.7517899761336515, |
|
"grad_norm": 1.326416015625, |
|
"learning_rate": 4.153118590360561e-06, |
|
"loss": 1.3698, |
|
"step": 2202 |
|
}, |
|
{ |
|
"epoch": 1.7541766109785204, |
|
"grad_norm": 1.2726656198501587, |
|
"learning_rate": 4.074732681654647e-06, |
|
"loss": 1.3478, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.756563245823389, |
|
"grad_norm": 1.295027494430542, |
|
"learning_rate": 3.997062133091284e-06, |
|
"loss": 1.3318, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 1.7589498806682577, |
|
"grad_norm": 1.240146279335022, |
|
"learning_rate": 3.920108154514585e-06, |
|
"loss": 1.2902, |
|
"step": 2211 |
|
}, |
|
{ |
|
"epoch": 1.7613365155131264, |
|
"grad_norm": 1.2674936056137085, |
|
"learning_rate": 3.843871944606969e-06, |
|
"loss": 1.3331, |
|
"step": 2214 |
|
}, |
|
{ |
|
"epoch": 1.763723150357995, |
|
"grad_norm": 1.4124763011932373, |
|
"learning_rate": 3.7683546908703903e-06, |
|
"loss": 1.432, |
|
"step": 2217 |
|
}, |
|
{ |
|
"epoch": 1.766109785202864, |
|
"grad_norm": 1.3157274723052979, |
|
"learning_rate": 3.693557569607947e-06, |
|
"loss": 1.3372, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.7684964200477327, |
|
"grad_norm": 1.4040486812591553, |
|
"learning_rate": 3.6194817459054676e-06, |
|
"loss": 1.4255, |
|
"step": 2223 |
|
}, |
|
{ |
|
"epoch": 1.7708830548926016, |
|
"grad_norm": 1.2398067712783813, |
|
"learning_rate": 3.5461283736134722e-06, |
|
"loss": 1.3448, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 1.7732696897374702, |
|
"grad_norm": 1.2771934270858765, |
|
"learning_rate": 3.4734985953290778e-06, |
|
"loss": 1.4079, |
|
"step": 2229 |
|
}, |
|
{ |
|
"epoch": 1.775656324582339, |
|
"grad_norm": 1.1697748899459839, |
|
"learning_rate": 3.401593542378262e-06, |
|
"loss": 1.4184, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 1.7780429594272076, |
|
"grad_norm": 1.355383276939392, |
|
"learning_rate": 3.330414334798265e-06, |
|
"loss": 1.31, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.7804295942720763, |
|
"grad_norm": 1.2719684839248657, |
|
"learning_rate": 3.2599620813200837e-06, |
|
"loss": 1.4189, |
|
"step": 2238 |
|
}, |
|
{ |
|
"epoch": 1.7828162291169452, |
|
"grad_norm": 1.4642714262008667, |
|
"learning_rate": 3.1902378793512657e-06, |
|
"loss": 1.4552, |
|
"step": 2241 |
|
}, |
|
{ |
|
"epoch": 1.7852028639618138, |
|
"grad_norm": 1.2354283332824707, |
|
"learning_rate": 3.121242814958747e-06, |
|
"loss": 1.3951, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 1.7875894988066827, |
|
"grad_norm": 1.3236439228057861, |
|
"learning_rate": 3.0529779628519992e-06, |
|
"loss": 1.4105, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 1.7899761336515514, |
|
"grad_norm": 1.3168965578079224, |
|
"learning_rate": 2.9854443863662262e-06, |
|
"loss": 1.4434, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.79236276849642, |
|
"grad_norm": 2.0770509243011475, |
|
"learning_rate": 2.918643137445859e-06, |
|
"loss": 1.4209, |
|
"step": 2253 |
|
}, |
|
{ |
|
"epoch": 1.7947494033412887, |
|
"grad_norm": 1.3210294246673584, |
|
"learning_rate": 2.8525752566281482e-06, |
|
"loss": 1.4219, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 1.7971360381861574, |
|
"grad_norm": 1.3929189443588257, |
|
"learning_rate": 2.787241773026933e-06, |
|
"loss": 1.4382, |
|
"step": 2259 |
|
}, |
|
{ |
|
"epoch": 1.799522673031026, |
|
"grad_norm": 1.3046364784240723, |
|
"learning_rate": 2.722643704316652e-06, |
|
"loss": 1.4634, |
|
"step": 2262 |
|
}, |
|
{ |
|
"epoch": 1.801909307875895, |
|
"grad_norm": 1.324135422706604, |
|
"learning_rate": 2.658782056716441e-06, |
|
"loss": 1.4041, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.8042959427207639, |
|
"grad_norm": 1.3277792930603027, |
|
"learning_rate": 2.5956578249745236e-06, |
|
"loss": 1.3838, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 1.8066825775656326, |
|
"grad_norm": 1.5090585947036743, |
|
"learning_rate": 2.533271992352659e-06, |
|
"loss": 1.4224, |
|
"step": 2271 |
|
}, |
|
{ |
|
"epoch": 1.8090692124105012, |
|
"grad_norm": 1.2901520729064941, |
|
"learning_rate": 2.4716255306108605e-06, |
|
"loss": 1.3893, |
|
"step": 2274 |
|
}, |
|
{ |
|
"epoch": 1.81145584725537, |
|
"grad_norm": 1.3586432933807373, |
|
"learning_rate": 2.4107193999922286e-06, |
|
"loss": 1.3146, |
|
"step": 2277 |
|
}, |
|
{ |
|
"epoch": 1.8138424821002386, |
|
"grad_norm": 1.2898615598678589, |
|
"learning_rate": 2.3505545492080395e-06, |
|
"loss": 1.2849, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.8162291169451072, |
|
"grad_norm": 1.3389803171157837, |
|
"learning_rate": 2.291131915422917e-06, |
|
"loss": 1.3749, |
|
"step": 2283 |
|
}, |
|
{ |
|
"epoch": 1.8186157517899761, |
|
"grad_norm": 1.3909133672714233, |
|
"learning_rate": 2.2324524242402613e-06, |
|
"loss": 1.4045, |
|
"step": 2286 |
|
}, |
|
{ |
|
"epoch": 1.8210023866348448, |
|
"grad_norm": 1.992772102355957, |
|
"learning_rate": 2.1745169896878414e-06, |
|
"loss": 1.3947, |
|
"step": 2289 |
|
}, |
|
{ |
|
"epoch": 1.8233890214797137, |
|
"grad_norm": 1.3467578887939453, |
|
"learning_rate": 2.117326514203527e-06, |
|
"loss": 1.4358, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 1.8257756563245824, |
|
"grad_norm": 1.316362738609314, |
|
"learning_rate": 2.0608818886212576e-06, |
|
"loss": 1.3924, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.828162291169451, |
|
"grad_norm": 1.303235411643982, |
|
"learning_rate": 2.0051839921571448e-06, |
|
"loss": 1.4439, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 1.8305489260143197, |
|
"grad_norm": 1.4706928730010986, |
|
"learning_rate": 1.9502336923958255e-06, |
|
"loss": 1.2834, |
|
"step": 2301 |
|
}, |
|
{ |
|
"epoch": 1.8329355608591884, |
|
"grad_norm": 1.2729629278182983, |
|
"learning_rate": 1.8960318452768577e-06, |
|
"loss": 1.3582, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 1.8353221957040573, |
|
"grad_norm": 1.5145806074142456, |
|
"learning_rate": 1.8425792950814868e-06, |
|
"loss": 1.492, |
|
"step": 2307 |
|
}, |
|
{ |
|
"epoch": 1.837708830548926, |
|
"grad_norm": 1.286137342453003, |
|
"learning_rate": 1.7898768744194162e-06, |
|
"loss": 1.4031, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.8400954653937949, |
|
"grad_norm": 1.309260368347168, |
|
"learning_rate": 1.7379254042158955e-06, |
|
"loss": 1.3831, |
|
"step": 2313 |
|
}, |
|
{ |
|
"epoch": 1.8424821002386635, |
|
"grad_norm": 1.5175321102142334, |
|
"learning_rate": 1.6867256936989096e-06, |
|
"loss": 1.4786, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 1.8448687350835322, |
|
"grad_norm": 1.2964720726013184, |
|
"learning_rate": 1.6362785403865488e-06, |
|
"loss": 1.398, |
|
"step": 2319 |
|
}, |
|
{ |
|
"epoch": 1.847255369928401, |
|
"grad_norm": 1.2901564836502075, |
|
"learning_rate": 1.5865847300746417e-06, |
|
"loss": 1.3338, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 1.8496420047732696, |
|
"grad_norm": 1.3462107181549072, |
|
"learning_rate": 1.5376450368244589e-06, |
|
"loss": 1.3809, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.8520286396181385, |
|
"grad_norm": 1.3782678842544556, |
|
"learning_rate": 1.4894602229506892e-06, |
|
"loss": 1.3993, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 1.8544152744630071, |
|
"grad_norm": 1.1769006252288818, |
|
"learning_rate": 1.4420310390095615e-06, |
|
"loss": 1.291, |
|
"step": 2331 |
|
}, |
|
{ |
|
"epoch": 1.856801909307876, |
|
"grad_norm": 1.279466152191162, |
|
"learning_rate": 1.3953582237871521e-06, |
|
"loss": 1.4328, |
|
"step": 2334 |
|
}, |
|
{ |
|
"epoch": 1.8591885441527447, |
|
"grad_norm": 1.3210725784301758, |
|
"learning_rate": 1.3494425042878622e-06, |
|
"loss": 1.4165, |
|
"step": 2337 |
|
}, |
|
{ |
|
"epoch": 1.8615751789976134, |
|
"grad_norm": 1.393333911895752, |
|
"learning_rate": 1.3042845957231153e-06, |
|
"loss": 1.3581, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.863961813842482, |
|
"grad_norm": 1.3860208988189697, |
|
"learning_rate": 1.2598852015001994e-06, |
|
"loss": 1.3974, |
|
"step": 2343 |
|
}, |
|
{ |
|
"epoch": 1.8663484486873507, |
|
"grad_norm": 1.3636202812194824, |
|
"learning_rate": 1.2162450132113201e-06, |
|
"loss": 1.5009, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 1.8687350835322196, |
|
"grad_norm": 1.2739925384521484, |
|
"learning_rate": 1.1733647106228375e-06, |
|
"loss": 1.4206, |
|
"step": 2349 |
|
}, |
|
{ |
|
"epoch": 1.8711217183770883, |
|
"grad_norm": 2.2390100955963135, |
|
"learning_rate": 1.1312449616646403e-06, |
|
"loss": 1.3318, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 1.8735083532219572, |
|
"grad_norm": 1.1843377351760864, |
|
"learning_rate": 1.0898864224197946e-06, |
|
"loss": 1.289, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.8758949880668259, |
|
"grad_norm": 1.2790746688842773, |
|
"learning_rate": 1.049289737114273e-06, |
|
"loss": 1.4026, |
|
"step": 2358 |
|
}, |
|
{ |
|
"epoch": 1.8782816229116945, |
|
"grad_norm": 1.1583595275878906, |
|
"learning_rate": 1.009455538106968e-06, |
|
"loss": 1.2771, |
|
"step": 2361 |
|
}, |
|
{ |
|
"epoch": 1.8806682577565632, |
|
"grad_norm": 4.7106781005859375, |
|
"learning_rate": 9.703844458797962e-07, |
|
"loss": 1.4771, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 1.8830548926014319, |
|
"grad_norm": 1.3764762878417969, |
|
"learning_rate": 9.320770690280645e-07, |
|
"loss": 1.4295, |
|
"step": 2367 |
|
}, |
|
{ |
|
"epoch": 1.8854415274463006, |
|
"grad_norm": 1.3544838428497314, |
|
"learning_rate": 8.945340042509797e-07, |
|
"loss": 1.405, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.8878281622911695, |
|
"grad_norm": 1.3262426853179932, |
|
"learning_rate": 8.577558363423554e-07, |
|
"loss": 1.4135, |
|
"step": 2373 |
|
}, |
|
{ |
|
"epoch": 1.8902147971360383, |
|
"grad_norm": 1.2534743547439575, |
|
"learning_rate": 8.217431381815077e-07, |
|
"loss": 1.4168, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 1.892601431980907, |
|
"grad_norm": 1.4368257522583008, |
|
"learning_rate": 7.864964707243072e-07, |
|
"loss": 1.3518, |
|
"step": 2379 |
|
}, |
|
{ |
|
"epoch": 1.8949880668257757, |
|
"grad_norm": 1.3582005500793457, |
|
"learning_rate": 7.520163829944804e-07, |
|
"loss": 1.3315, |
|
"step": 2382 |
|
}, |
|
{ |
|
"epoch": 1.8973747016706444, |
|
"grad_norm": 1.4341644048690796, |
|
"learning_rate": 7.183034120750221e-07, |
|
"loss": 1.3689, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.899761336515513, |
|
"grad_norm": 1.368857741355896, |
|
"learning_rate": 6.85358083099863e-07, |
|
"loss": 1.384, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 1.9021479713603817, |
|
"grad_norm": 1.307237148284912, |
|
"learning_rate": 6.531809092456598e-07, |
|
"loss": 1.3101, |
|
"step": 2391 |
|
}, |
|
{ |
|
"epoch": 1.9045346062052506, |
|
"grad_norm": 1.274276614189148, |
|
"learning_rate": 6.217723917238128e-07, |
|
"loss": 1.4943, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 1.9069212410501193, |
|
"grad_norm": 1.3081694841384888, |
|
"learning_rate": 5.911330197726661e-07, |
|
"loss": 1.3365, |
|
"step": 2397 |
|
}, |
|
{ |
|
"epoch": 1.9093078758949882, |
|
"grad_norm": 1.2196581363677979, |
|
"learning_rate": 5.612632706498755e-07, |
|
"loss": 1.3927, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.9116945107398569, |
|
"grad_norm": 1.33291494846344, |
|
"learning_rate": 5.321636096249749e-07, |
|
"loss": 1.4337, |
|
"step": 2403 |
|
}, |
|
{ |
|
"epoch": 1.9140811455847255, |
|
"grad_norm": 1.355838418006897, |
|
"learning_rate": 5.038344899721436e-07, |
|
"loss": 1.3511, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 1.9164677804295942, |
|
"grad_norm": 1.2884796857833862, |
|
"learning_rate": 4.762763529631342e-07, |
|
"loss": 1.3787, |
|
"step": 2409 |
|
}, |
|
{ |
|
"epoch": 1.9188544152744629, |
|
"grad_norm": 1.4186152219772339, |
|
"learning_rate": 4.4948962786039437e-07, |
|
"loss": 1.4141, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 1.9212410501193318, |
|
"grad_norm": 1.1370840072631836, |
|
"learning_rate": 4.234747319103949e-07, |
|
"loss": 1.2792, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.9236276849642004, |
|
"grad_norm": 1.3362590074539185, |
|
"learning_rate": 3.9823207033710676e-07, |
|
"loss": 1.43, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 1.9260143198090693, |
|
"grad_norm": 1.2540643215179443, |
|
"learning_rate": 3.737620363357286e-07, |
|
"loss": 1.2947, |
|
"step": 2421 |
|
}, |
|
{ |
|
"epoch": 1.928400954653938, |
|
"grad_norm": 1.3183681964874268, |
|
"learning_rate": 3.5006501106651937e-07, |
|
"loss": 1.3768, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 1.9307875894988067, |
|
"grad_norm": 1.2523298263549805, |
|
"learning_rate": 3.2714136364888073e-07, |
|
"loss": 1.3564, |
|
"step": 2427 |
|
}, |
|
{ |
|
"epoch": 1.9331742243436754, |
|
"grad_norm": 1.7795356512069702, |
|
"learning_rate": 3.0499145115561176e-07, |
|
"loss": 1.518, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.935560859188544, |
|
"grad_norm": 1.2850757837295532, |
|
"learning_rate": 2.836156186073413e-07, |
|
"loss": 1.3588, |
|
"step": 2433 |
|
}, |
|
{ |
|
"epoch": 1.937947494033413, |
|
"grad_norm": 1.30345618724823, |
|
"learning_rate": 2.630141989671542e-07, |
|
"loss": 1.3733, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 1.9403341288782816, |
|
"grad_norm": 1.3604350090026855, |
|
"learning_rate": 2.431875131354011e-07, |
|
"loss": 1.4814, |
|
"step": 2439 |
|
}, |
|
{ |
|
"epoch": 1.9427207637231505, |
|
"grad_norm": 1.4250744581222534, |
|
"learning_rate": 2.2413586994470825e-07, |
|
"loss": 1.3531, |
|
"step": 2442 |
|
}, |
|
{ |
|
"epoch": 1.9451073985680192, |
|
"grad_norm": 1.2653945684432983, |
|
"learning_rate": 2.0585956615515323e-07, |
|
"loss": 1.3951, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.9474940334128878, |
|
"grad_norm": 1.3960016965866089, |
|
"learning_rate": 1.8835888644966325e-07, |
|
"loss": 1.3927, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 1.9498806682577565, |
|
"grad_norm": 1.4156994819641113, |
|
"learning_rate": 1.7163410342956875e-07, |
|
"loss": 1.4211, |
|
"step": 2451 |
|
}, |
|
{ |
|
"epoch": 1.9522673031026252, |
|
"grad_norm": 1.5609006881713867, |
|
"learning_rate": 1.5568547761034004e-07, |
|
"loss": 1.3577, |
|
"step": 2454 |
|
}, |
|
{ |
|
"epoch": 1.9546539379474939, |
|
"grad_norm": 1.5074419975280762, |
|
"learning_rate": 1.4051325741756828e-07, |
|
"loss": 1.4627, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 1.9570405727923628, |
|
"grad_norm": 1.231116771697998, |
|
"learning_rate": 1.2611767918306316e-07, |
|
"loss": 1.3873, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.9594272076372317, |
|
"grad_norm": 1.2997921705245972, |
|
"learning_rate": 1.1249896714117802e-07, |
|
"loss": 1.3963, |
|
"step": 2463 |
|
}, |
|
{ |
|
"epoch": 1.9618138424821003, |
|
"grad_norm": 1.2703529596328735, |
|
"learning_rate": 9.965733342532924e-08, |
|
"loss": 1.371, |
|
"step": 2466 |
|
}, |
|
{ |
|
"epoch": 1.964200477326969, |
|
"grad_norm": 1.2716647386550903, |
|
"learning_rate": 8.759297806469335e-08, |
|
"loss": 1.3068, |
|
"step": 2469 |
|
}, |
|
{ |
|
"epoch": 1.9665871121718377, |
|
"grad_norm": 1.36246657371521, |
|
"learning_rate": 7.630608898105962e-08, |
|
"loss": 1.3863, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 1.9689737470167064, |
|
"grad_norm": 1.224776029586792, |
|
"learning_rate": 6.579684198594338e-08, |
|
"loss": 1.3254, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.971360381861575, |
|
"grad_norm": 1.215903878211975, |
|
"learning_rate": 5.606540077782163e-08, |
|
"loss": 1.3006, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 1.973747016706444, |
|
"grad_norm": 1.5066276788711548, |
|
"learning_rate": 4.711191693959616e-08, |
|
"loss": 1.3676, |
|
"step": 2481 |
|
}, |
|
{ |
|
"epoch": 1.9761336515513126, |
|
"grad_norm": 1.3745481967926025, |
|
"learning_rate": 3.893652993621766e-08, |
|
"loss": 1.372, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 1.9785202863961815, |
|
"grad_norm": 1.4172685146331787, |
|
"learning_rate": 3.1539367112543014e-08, |
|
"loss": 1.482, |
|
"step": 2487 |
|
}, |
|
{ |
|
"epoch": 1.9809069212410502, |
|
"grad_norm": 1.3960031270980835, |
|
"learning_rate": 2.4920543691309138e-08, |
|
"loss": 1.3987, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.9832935560859188, |
|
"grad_norm": 1.2992031574249268, |
|
"learning_rate": 1.9080162771378808e-08, |
|
"loss": 1.3605, |
|
"step": 2493 |
|
}, |
|
{ |
|
"epoch": 1.9856801909307875, |
|
"grad_norm": 1.2885679006576538, |
|
"learning_rate": 1.4018315326103094e-08, |
|
"loss": 1.3816, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 1.9880668257756562, |
|
"grad_norm": 1.2640992403030396, |
|
"learning_rate": 9.735080201922487e-09, |
|
"loss": 1.3142, |
|
"step": 2499 |
|
}, |
|
{ |
|
"epoch": 1.990453460620525, |
|
"grad_norm": 1.2508050203323364, |
|
"learning_rate": 6.2305241171345395e-09, |
|
"loss": 1.4008, |
|
"step": 2502 |
|
}, |
|
{ |
|
"epoch": 1.9928400954653938, |
|
"grad_norm": 1.2504699230194092, |
|
"learning_rate": 3.5047016608613647e-09, |
|
"loss": 1.3362, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.9952267303102627, |
|
"grad_norm": 1.4421279430389404, |
|
"learning_rate": 1.5576552921836574e-09, |
|
"loss": 1.4037, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 1.9976133651551313, |
|
"grad_norm": 1.2950758934020996, |
|
"learning_rate": 3.89415339491217e-10, |
|
"loss": 1.3817, |
|
"step": 2511 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.6945881843566895, |
|
"learning_rate": 0.0, |
|
"loss": 1.4222, |
|
"step": 2514 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 2514, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1257, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.254235526619464e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|