|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 3022, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0026472534745201853, |
|
"grad_norm": 24.50491714477539, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 6.5473, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005294506949040371, |
|
"grad_norm": 17.426511764526367, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 6.2116, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007941760423560556, |
|
"grad_norm": 6.35976505279541, |
|
"learning_rate": 7.894736842105263e-06, |
|
"loss": 5.7967, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.010589013898080741, |
|
"grad_norm": 5.454939842224121, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 5.3365, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.013236267372600927, |
|
"grad_norm": 4.607099533081055, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 4.7105, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01588352084712111, |
|
"grad_norm": 3.6498019695281982, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 4.286, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.018530774321641297, |
|
"grad_norm": 4.196900844573975, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 4.3134, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.021178027796161483, |
|
"grad_norm": 3.617469072341919, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 3.7494, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02382528127068167, |
|
"grad_norm": 3.05267333984375, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 3.8046, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.026472534745201854, |
|
"grad_norm": 2.607614517211914, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 3.385, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02911978821972204, |
|
"grad_norm": 2.536888837814331, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 3.3516, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03176704169424222, |
|
"grad_norm": 2.315871000289917, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 3.0795, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03441429516876241, |
|
"grad_norm": 2.3058571815490723, |
|
"learning_rate": 3.421052631578947e-05, |
|
"loss": 3.0708, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.037061548643282594, |
|
"grad_norm": 2.067796230316162, |
|
"learning_rate": 3.6842105263157895e-05, |
|
"loss": 2.8311, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03970880211780278, |
|
"grad_norm": 1.9578440189361572, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 2.696, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.042356055592322965, |
|
"grad_norm": 2.043933629989624, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 2.7501, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04500330906684315, |
|
"grad_norm": 1.82830810546875, |
|
"learning_rate": 4.473684210526316e-05, |
|
"loss": 2.5058, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04765056254136334, |
|
"grad_norm": 1.8841806650161743, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 2.5708, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05029781601588352, |
|
"grad_norm": 1.9775539636611938, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6332, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05294506949040371, |
|
"grad_norm": 1.7908610105514526, |
|
"learning_rate": 5.2631578947368424e-05, |
|
"loss": 2.5441, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05559232296492389, |
|
"grad_norm": 1.977647066116333, |
|
"learning_rate": 5.526315789473685e-05, |
|
"loss": 2.3617, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.05823957643944408, |
|
"grad_norm": 2.008470296859741, |
|
"learning_rate": 5.789473684210527e-05, |
|
"loss": 2.3994, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06088682991396426, |
|
"grad_norm": 2.070720911026001, |
|
"learning_rate": 6.052631578947369e-05, |
|
"loss": 2.3509, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06353408338848444, |
|
"grad_norm": 2.0442869663238525, |
|
"learning_rate": 6.31578947368421e-05, |
|
"loss": 2.35, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.06618133686300463, |
|
"grad_norm": 1.8274725675582886, |
|
"learning_rate": 6.578947368421054e-05, |
|
"loss": 2.2802, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06882859033752482, |
|
"grad_norm": 1.9744892120361328, |
|
"learning_rate": 6.842105263157895e-05, |
|
"loss": 2.4711, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.071475843812045, |
|
"grad_norm": 1.881946086883545, |
|
"learning_rate": 7.105263157894737e-05, |
|
"loss": 2.3495, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07412309728656519, |
|
"grad_norm": 1.7632906436920166, |
|
"learning_rate": 7.368421052631579e-05, |
|
"loss": 2.1906, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.07677035076108538, |
|
"grad_norm": 1.8465447425842285, |
|
"learning_rate": 7.631578947368422e-05, |
|
"loss": 2.4193, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.07941760423560557, |
|
"grad_norm": 1.978273868560791, |
|
"learning_rate": 7.894736842105263e-05, |
|
"loss": 2.3229, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08206485771012574, |
|
"grad_norm": 1.9878270626068115, |
|
"learning_rate": 8.157894736842105e-05, |
|
"loss": 2.3028, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.08471211118464593, |
|
"grad_norm": 1.7065322399139404, |
|
"learning_rate": 8.421052631578948e-05, |
|
"loss": 2.244, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.08735936465916612, |
|
"grad_norm": 1.8170701265335083, |
|
"learning_rate": 8.68421052631579e-05, |
|
"loss": 2.1112, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0900066181336863, |
|
"grad_norm": 1.9288476705551147, |
|
"learning_rate": 8.947368421052632e-05, |
|
"loss": 2.3551, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09265387160820648, |
|
"grad_norm": 1.8695253133773804, |
|
"learning_rate": 9.210526315789474e-05, |
|
"loss": 2.2814, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09530112508272667, |
|
"grad_norm": 1.7066093683242798, |
|
"learning_rate": 9.473684210526316e-05, |
|
"loss": 1.9036, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.09794837855724686, |
|
"grad_norm": 1.8588757514953613, |
|
"learning_rate": 9.736842105263158e-05, |
|
"loss": 2.0139, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10059563203176704, |
|
"grad_norm": 1.789518117904663, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1809, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.10324288550628723, |
|
"grad_norm": 1.9242740869522095, |
|
"learning_rate": 9.999952071344157e-05, |
|
"loss": 2.301, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.10589013898080742, |
|
"grad_norm": 1.7974549531936646, |
|
"learning_rate": 9.999808286295485e-05, |
|
"loss": 2.2312, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.10853739245532759, |
|
"grad_norm": 1.7276233434677124, |
|
"learning_rate": 9.999568647610555e-05, |
|
"loss": 2.1109, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11118464592984778, |
|
"grad_norm": 1.8286519050598145, |
|
"learning_rate": 9.999233159883593e-05, |
|
"loss": 2.0782, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.11383189940436797, |
|
"grad_norm": 1.919313907623291, |
|
"learning_rate": 9.998801829546386e-05, |
|
"loss": 2.0693, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.11647915287888816, |
|
"grad_norm": 1.6544960737228394, |
|
"learning_rate": 9.998274664868173e-05, |
|
"loss": 2.0982, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.11912640635340833, |
|
"grad_norm": 1.8223872184753418, |
|
"learning_rate": 9.997651675955466e-05, |
|
"loss": 2.1379, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12177365982792852, |
|
"grad_norm": 1.7743052244186401, |
|
"learning_rate": 9.996932874751877e-05, |
|
"loss": 2.0637, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.12442091330244871, |
|
"grad_norm": 1.7228261232376099, |
|
"learning_rate": 9.996118275037873e-05, |
|
"loss": 2.1696, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1270681667769689, |
|
"grad_norm": 1.6266913414001465, |
|
"learning_rate": 9.995207892430524e-05, |
|
"loss": 2.1247, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.12971542025148908, |
|
"grad_norm": 1.8206615447998047, |
|
"learning_rate": 9.994201744383196e-05, |
|
"loss": 2.1831, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.13236267372600927, |
|
"grad_norm": 1.943579912185669, |
|
"learning_rate": 9.993099850185216e-05, |
|
"loss": 1.9262, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13500992720052946, |
|
"grad_norm": 1.89098060131073, |
|
"learning_rate": 9.991902230961511e-05, |
|
"loss": 2.2636, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.13765718067504965, |
|
"grad_norm": 1.8418017625808716, |
|
"learning_rate": 9.99060890967219e-05, |
|
"loss": 2.2454, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.14030443414956983, |
|
"grad_norm": 1.7433375120162964, |
|
"learning_rate": 9.989219911112113e-05, |
|
"loss": 2.2591, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.14295168762409, |
|
"grad_norm": 1.885964035987854, |
|
"learning_rate": 9.987735261910417e-05, |
|
"loss": 1.9402, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.14559894109861019, |
|
"grad_norm": 1.626397728919983, |
|
"learning_rate": 9.986154990529995e-05, |
|
"loss": 2.119, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14824619457313037, |
|
"grad_norm": 1.5490047931671143, |
|
"learning_rate": 9.984479127266961e-05, |
|
"loss": 1.8635, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.15089344804765056, |
|
"grad_norm": 1.5588316917419434, |
|
"learning_rate": 9.982707704250065e-05, |
|
"loss": 1.8135, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.15354070152217075, |
|
"grad_norm": 1.9416462182998657, |
|
"learning_rate": 9.980840755440075e-05, |
|
"loss": 2.2288, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.15618795499669094, |
|
"grad_norm": 1.5774728059768677, |
|
"learning_rate": 9.978878316629133e-05, |
|
"loss": 1.9254, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.15883520847121113, |
|
"grad_norm": 1.6661707162857056, |
|
"learning_rate": 9.976820425440058e-05, |
|
"loss": 2.0111, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1614824619457313, |
|
"grad_norm": 1.5805509090423584, |
|
"learning_rate": 9.974667121325634e-05, |
|
"loss": 2.0657, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.16412971542025148, |
|
"grad_norm": 1.7854478359222412, |
|
"learning_rate": 9.972418445567851e-05, |
|
"loss": 1.8586, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.16677696889477167, |
|
"grad_norm": 1.61441171169281, |
|
"learning_rate": 9.97007444127711e-05, |
|
"loss": 1.9234, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.16942422236929186, |
|
"grad_norm": 2.154454469680786, |
|
"learning_rate": 9.967635153391401e-05, |
|
"loss": 1.949, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.17207147584381205, |
|
"grad_norm": 1.5182636976242065, |
|
"learning_rate": 9.965100628675441e-05, |
|
"loss": 2.013, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17471872931833224, |
|
"grad_norm": 1.751714825630188, |
|
"learning_rate": 9.962470915719775e-05, |
|
"loss": 1.9629, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.17736598279285243, |
|
"grad_norm": 1.5807703733444214, |
|
"learning_rate": 9.959746064939846e-05, |
|
"loss": 1.8705, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.1800132362673726, |
|
"grad_norm": 1.7142225503921509, |
|
"learning_rate": 9.956926128575026e-05, |
|
"loss": 2.0033, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.18266048974189278, |
|
"grad_norm": 1.555530309677124, |
|
"learning_rate": 9.954011160687622e-05, |
|
"loss": 1.8995, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.18530774321641297, |
|
"grad_norm": 1.5679693222045898, |
|
"learning_rate": 9.951001217161829e-05, |
|
"loss": 2.042, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18795499669093316, |
|
"grad_norm": 1.6399418115615845, |
|
"learning_rate": 9.947896355702666e-05, |
|
"loss": 2.0388, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.19060225016545335, |
|
"grad_norm": 1.7505602836608887, |
|
"learning_rate": 9.944696635834867e-05, |
|
"loss": 1.9648, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.19324950363997354, |
|
"grad_norm": 1.4888848066329956, |
|
"learning_rate": 9.941402118901744e-05, |
|
"loss": 1.8595, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.19589675711449372, |
|
"grad_norm": 1.4739277362823486, |
|
"learning_rate": 9.938012868064e-05, |
|
"loss": 1.7959, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.1985440105890139, |
|
"grad_norm": 1.5393471717834473, |
|
"learning_rate": 9.934528948298533e-05, |
|
"loss": 1.9469, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.20119126406353408, |
|
"grad_norm": 1.5673627853393555, |
|
"learning_rate": 9.930950426397179e-05, |
|
"loss": 1.9332, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.20383851753805426, |
|
"grad_norm": 1.6461111307144165, |
|
"learning_rate": 9.927277370965435e-05, |
|
"loss": 1.8055, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.20648577101257445, |
|
"grad_norm": 1.5950462818145752, |
|
"learning_rate": 9.923509852421145e-05, |
|
"loss": 1.8414, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.20913302448709464, |
|
"grad_norm": 1.433727741241455, |
|
"learning_rate": 9.919647942993148e-05, |
|
"loss": 1.9514, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.21178027796161483, |
|
"grad_norm": 1.445776343345642, |
|
"learning_rate": 9.915691716719898e-05, |
|
"loss": 1.7297, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21442753143613502, |
|
"grad_norm": 1.9325745105743408, |
|
"learning_rate": 9.911641249448036e-05, |
|
"loss": 1.9855, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.21707478491065518, |
|
"grad_norm": 1.494813323020935, |
|
"learning_rate": 9.907496618830942e-05, |
|
"loss": 1.7916, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.21972203838517537, |
|
"grad_norm": 1.4863932132720947, |
|
"learning_rate": 9.903257904327249e-05, |
|
"loss": 1.8029, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.22236929185969556, |
|
"grad_norm": 1.594827651977539, |
|
"learning_rate": 9.898925187199308e-05, |
|
"loss": 1.9516, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.22501654533421575, |
|
"grad_norm": 1.5738781690597534, |
|
"learning_rate": 9.894498550511646e-05, |
|
"loss": 1.8997, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22766379880873594, |
|
"grad_norm": 1.5598024129867554, |
|
"learning_rate": 9.88997807912936e-05, |
|
"loss": 1.9249, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.23031105228325613, |
|
"grad_norm": 1.4761321544647217, |
|
"learning_rate": 9.885363859716497e-05, |
|
"loss": 1.7571, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.23295830575777632, |
|
"grad_norm": 1.4266904592514038, |
|
"learning_rate": 9.88065598073439e-05, |
|
"loss": 1.9811, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.23560555923229648, |
|
"grad_norm": 1.5371057987213135, |
|
"learning_rate": 9.875854532439964e-05, |
|
"loss": 1.8021, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.23825281270681667, |
|
"grad_norm": 1.380096673965454, |
|
"learning_rate": 9.870959606884004e-05, |
|
"loss": 1.8223, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24090006618133686, |
|
"grad_norm": 1.632664680480957, |
|
"learning_rate": 9.865971297909393e-05, |
|
"loss": 2.006, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.24354731965585705, |
|
"grad_norm": 1.3765276670455933, |
|
"learning_rate": 9.860889701149307e-05, |
|
"loss": 1.7893, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.24619457313037724, |
|
"grad_norm": 1.5789958238601685, |
|
"learning_rate": 9.855714914025384e-05, |
|
"loss": 1.9381, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.24884182660489743, |
|
"grad_norm": 1.8294042348861694, |
|
"learning_rate": 9.850447035745866e-05, |
|
"loss": 1.8584, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2514890800794176, |
|
"grad_norm": 1.5388972759246826, |
|
"learning_rate": 9.845086167303679e-05, |
|
"loss": 1.8763, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2541363335539378, |
|
"grad_norm": 1.5301390886306763, |
|
"learning_rate": 9.839632411474513e-05, |
|
"loss": 2.0612, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.256783587028458, |
|
"grad_norm": 1.546277642250061, |
|
"learning_rate": 9.83408587281484e-05, |
|
"loss": 1.9085, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.25943084050297816, |
|
"grad_norm": 1.5818853378295898, |
|
"learning_rate": 9.828446657659918e-05, |
|
"loss": 2.0181, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.26207809397749837, |
|
"grad_norm": 1.2648255825042725, |
|
"learning_rate": 9.82271487412175e-05, |
|
"loss": 1.6947, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.26472534745201853, |
|
"grad_norm": 1.541934847831726, |
|
"learning_rate": 9.816890632087006e-05, |
|
"loss": 1.8053, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2673726009265387, |
|
"grad_norm": 1.5966472625732422, |
|
"learning_rate": 9.810974043214922e-05, |
|
"loss": 1.8733, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.2700198544010589, |
|
"grad_norm": 1.5871154069900513, |
|
"learning_rate": 9.804965220935161e-05, |
|
"loss": 1.896, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.2726671078755791, |
|
"grad_norm": 1.4850573539733887, |
|
"learning_rate": 9.798864280445632e-05, |
|
"loss": 1.8494, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.2753143613500993, |
|
"grad_norm": 1.4737725257873535, |
|
"learning_rate": 9.792671338710285e-05, |
|
"loss": 1.8145, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.27796161482461945, |
|
"grad_norm": 1.5895408391952515, |
|
"learning_rate": 9.786386514456872e-05, |
|
"loss": 1.9279, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28060886829913967, |
|
"grad_norm": 1.522838830947876, |
|
"learning_rate": 9.780009928174661e-05, |
|
"loss": 1.9103, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.28325612177365983, |
|
"grad_norm": 1.4890238046646118, |
|
"learning_rate": 9.773541702112137e-05, |
|
"loss": 1.9306, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.28590337524818, |
|
"grad_norm": 1.5047945976257324, |
|
"learning_rate": 9.766981960274653e-05, |
|
"loss": 1.8459, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.2885506287227002, |
|
"grad_norm": 1.4997539520263672, |
|
"learning_rate": 9.760330828422053e-05, |
|
"loss": 1.7442, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.29119788219722037, |
|
"grad_norm": 1.389294981956482, |
|
"learning_rate": 9.753588434066258e-05, |
|
"loss": 1.9077, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2938451356717406, |
|
"grad_norm": 1.3641945123672485, |
|
"learning_rate": 9.746754906468832e-05, |
|
"loss": 1.8979, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.29649238914626075, |
|
"grad_norm": 1.5315138101577759, |
|
"learning_rate": 9.73983037663849e-05, |
|
"loss": 1.8207, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.29913964262078097, |
|
"grad_norm": 1.5057647228240967, |
|
"learning_rate": 9.732814977328592e-05, |
|
"loss": 1.911, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3017868960953011, |
|
"grad_norm": 1.368912696838379, |
|
"learning_rate": 9.725708843034605e-05, |
|
"loss": 1.8377, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3044341495698213, |
|
"grad_norm": 1.389817714691162, |
|
"learning_rate": 9.718512109991514e-05, |
|
"loss": 1.7907, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3070814030443415, |
|
"grad_norm": 1.7318735122680664, |
|
"learning_rate": 9.711224916171215e-05, |
|
"loss": 1.9412, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.30972865651886167, |
|
"grad_norm": 1.4791710376739502, |
|
"learning_rate": 9.703847401279871e-05, |
|
"loss": 1.7754, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.3123759099933819, |
|
"grad_norm": 1.3618526458740234, |
|
"learning_rate": 9.69637970675523e-05, |
|
"loss": 1.73, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.31502316346790205, |
|
"grad_norm": 1.5649083852767944, |
|
"learning_rate": 9.688821975763918e-05, |
|
"loss": 1.9635, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.31767041694242226, |
|
"grad_norm": 1.3701534271240234, |
|
"learning_rate": 9.681174353198687e-05, |
|
"loss": 1.6581, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3203176704169424, |
|
"grad_norm": 1.4764872789382935, |
|
"learning_rate": 9.673436985675645e-05, |
|
"loss": 1.794, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.3229649238914626, |
|
"grad_norm": 1.4432624578475952, |
|
"learning_rate": 9.665610021531447e-05, |
|
"loss": 1.9016, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3256121773659828, |
|
"grad_norm": 1.572975993156433, |
|
"learning_rate": 9.657693610820437e-05, |
|
"loss": 2.035, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.32825943084050296, |
|
"grad_norm": 1.5382163524627686, |
|
"learning_rate": 9.649687905311785e-05, |
|
"loss": 2.0041, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.3309066843150232, |
|
"grad_norm": 1.3413423299789429, |
|
"learning_rate": 9.641593058486574e-05, |
|
"loss": 1.7448, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33355393778954334, |
|
"grad_norm": 1.4374409914016724, |
|
"learning_rate": 9.633409225534855e-05, |
|
"loss": 1.7816, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.33620119126406356, |
|
"grad_norm": 1.4096835851669312, |
|
"learning_rate": 9.625136563352671e-05, |
|
"loss": 1.772, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.3388484447385837, |
|
"grad_norm": 2.1890769004821777, |
|
"learning_rate": 9.616775230539057e-05, |
|
"loss": 1.8641, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.3414956982131039, |
|
"grad_norm": 1.4621169567108154, |
|
"learning_rate": 9.608325387392986e-05, |
|
"loss": 1.7406, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.3441429516876241, |
|
"grad_norm": 1.4140963554382324, |
|
"learning_rate": 9.599787195910313e-05, |
|
"loss": 1.6127, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34679020516214426, |
|
"grad_norm": 1.459409236907959, |
|
"learning_rate": 9.591160819780649e-05, |
|
"loss": 1.7579, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3494374586366645, |
|
"grad_norm": 1.7444220781326294, |
|
"learning_rate": 9.582446424384242e-05, |
|
"loss": 1.8177, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.35208471211118464, |
|
"grad_norm": 1.4114232063293457, |
|
"learning_rate": 9.573644176788794e-05, |
|
"loss": 1.7955, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.35473196558570486, |
|
"grad_norm": 1.4076716899871826, |
|
"learning_rate": 9.564754245746264e-05, |
|
"loss": 1.9122, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.357379219060225, |
|
"grad_norm": 1.4209445714950562, |
|
"learning_rate": 9.555776801689632e-05, |
|
"loss": 1.8108, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3600264725347452, |
|
"grad_norm": 1.5626829862594604, |
|
"learning_rate": 9.546712016729624e-05, |
|
"loss": 1.9285, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.3626737260092654, |
|
"grad_norm": 1.4253438711166382, |
|
"learning_rate": 9.537560064651427e-05, |
|
"loss": 1.6505, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.36532097948378556, |
|
"grad_norm": 1.447141170501709, |
|
"learning_rate": 9.528321120911346e-05, |
|
"loss": 1.8303, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.3679682329583058, |
|
"grad_norm": 1.4913408756256104, |
|
"learning_rate": 9.51899536263344e-05, |
|
"loss": 1.8382, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.37061548643282594, |
|
"grad_norm": 1.5191394090652466, |
|
"learning_rate": 9.509582968606136e-05, |
|
"loss": 1.7477, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.37326273990734615, |
|
"grad_norm": 1.3612414598464966, |
|
"learning_rate": 9.500084119278788e-05, |
|
"loss": 1.7101, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.3759099933818663, |
|
"grad_norm": 1.3365185260772705, |
|
"learning_rate": 9.49049899675823e-05, |
|
"loss": 1.8855, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.3785572468563865, |
|
"grad_norm": 1.4907687902450562, |
|
"learning_rate": 9.480827784805278e-05, |
|
"loss": 1.8158, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.3812045003309067, |
|
"grad_norm": 1.2549834251403809, |
|
"learning_rate": 9.471070668831208e-05, |
|
"loss": 1.6304, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.38385175380542685, |
|
"grad_norm": 1.6914743185043335, |
|
"learning_rate": 9.4612278358942e-05, |
|
"loss": 1.6976, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38649900727994707, |
|
"grad_norm": 1.5349342823028564, |
|
"learning_rate": 9.451299474695754e-05, |
|
"loss": 1.7323, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.38914626075446723, |
|
"grad_norm": 1.4379171133041382, |
|
"learning_rate": 9.441285775577075e-05, |
|
"loss": 1.7762, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.39179351422898745, |
|
"grad_norm": 1.360475778579712, |
|
"learning_rate": 9.431186930515419e-05, |
|
"loss": 1.7328, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.3944407677035076, |
|
"grad_norm": 1.4364429712295532, |
|
"learning_rate": 9.421003133120412e-05, |
|
"loss": 1.7363, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.3970880211780278, |
|
"grad_norm": 1.4598385095596313, |
|
"learning_rate": 9.410734578630343e-05, |
|
"loss": 1.6917, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.399735274652548, |
|
"grad_norm": 1.3313078880310059, |
|
"learning_rate": 9.400381463908416e-05, |
|
"loss": 1.8008, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.40238252812706815, |
|
"grad_norm": 1.5070075988769531, |
|
"learning_rate": 9.389943987438983e-05, |
|
"loss": 1.669, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.40502978160158837, |
|
"grad_norm": 1.3858133554458618, |
|
"learning_rate": 9.379422349323728e-05, |
|
"loss": 1.6599, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.40767703507610853, |
|
"grad_norm": 1.3775012493133545, |
|
"learning_rate": 9.368816751277843e-05, |
|
"loss": 1.628, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.41032428855062875, |
|
"grad_norm": 1.3733761310577393, |
|
"learning_rate": 9.358127396626147e-05, |
|
"loss": 1.6797, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4129715420251489, |
|
"grad_norm": 1.760237455368042, |
|
"learning_rate": 9.347354490299205e-05, |
|
"loss": 1.7479, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.41561879549966907, |
|
"grad_norm": 1.2483643293380737, |
|
"learning_rate": 9.336498238829384e-05, |
|
"loss": 1.6595, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.4182660489741893, |
|
"grad_norm": 2.099116563796997, |
|
"learning_rate": 9.325558850346897e-05, |
|
"loss": 1.6933, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.42091330244870945, |
|
"grad_norm": 1.3913215398788452, |
|
"learning_rate": 9.31453653457582e-05, |
|
"loss": 1.6433, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.42356055592322966, |
|
"grad_norm": 1.3813973665237427, |
|
"learning_rate": 9.303431502830065e-05, |
|
"loss": 1.6652, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4262078093977498, |
|
"grad_norm": 1.496819019317627, |
|
"learning_rate": 9.292243968009331e-05, |
|
"loss": 1.747, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.42885506287227004, |
|
"grad_norm": 1.37201988697052, |
|
"learning_rate": 9.280974144595018e-05, |
|
"loss": 1.6331, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.4315023163467902, |
|
"grad_norm": 1.505353331565857, |
|
"learning_rate": 9.269622248646124e-05, |
|
"loss": 1.7717, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.43414956982131037, |
|
"grad_norm": 1.8498897552490234, |
|
"learning_rate": 9.258188497795093e-05, |
|
"loss": 1.6643, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.4367968232958306, |
|
"grad_norm": 1.2886799573898315, |
|
"learning_rate": 9.24667311124365e-05, |
|
"loss": 1.777, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43944407677035074, |
|
"grad_norm": 1.283218502998352, |
|
"learning_rate": 9.23507630975859e-05, |
|
"loss": 1.6958, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.44209133024487096, |
|
"grad_norm": 1.3919546604156494, |
|
"learning_rate": 9.223398315667561e-05, |
|
"loss": 1.6515, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.4447385837193911, |
|
"grad_norm": 1.4083247184753418, |
|
"learning_rate": 9.211639352854787e-05, |
|
"loss": 1.7531, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.44738583719391134, |
|
"grad_norm": 1.2739989757537842, |
|
"learning_rate": 9.199799646756777e-05, |
|
"loss": 1.7694, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.4500330906684315, |
|
"grad_norm": 1.4435306787490845, |
|
"learning_rate": 9.187879424358014e-05, |
|
"loss": 1.8044, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.45268034414295166, |
|
"grad_norm": 1.4848833084106445, |
|
"learning_rate": 9.17587891418659e-05, |
|
"loss": 1.6531, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.4553275976174719, |
|
"grad_norm": 1.527485966682434, |
|
"learning_rate": 9.163798346309837e-05, |
|
"loss": 1.8783, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.45797485109199204, |
|
"grad_norm": 1.2369976043701172, |
|
"learning_rate": 9.151637952329903e-05, |
|
"loss": 1.5479, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.46062210456651226, |
|
"grad_norm": 1.4693775177001953, |
|
"learning_rate": 9.139397965379327e-05, |
|
"loss": 1.7891, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.4632693580410324, |
|
"grad_norm": 1.6788188219070435, |
|
"learning_rate": 9.127078620116556e-05, |
|
"loss": 1.7637, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.46591661151555264, |
|
"grad_norm": 1.3309741020202637, |
|
"learning_rate": 9.114680152721453e-05, |
|
"loss": 1.6053, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.4685638649900728, |
|
"grad_norm": 1.509023904800415, |
|
"learning_rate": 9.102202800890772e-05, |
|
"loss": 1.8784, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.47121111846459296, |
|
"grad_norm": 1.3232872486114502, |
|
"learning_rate": 9.089646803833589e-05, |
|
"loss": 1.6745, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.4738583719391132, |
|
"grad_norm": 1.3540325164794922, |
|
"learning_rate": 9.077012402266731e-05, |
|
"loss": 1.6668, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.47650562541363334, |
|
"grad_norm": 1.3100489377975464, |
|
"learning_rate": 9.064299838410152e-05, |
|
"loss": 1.6188, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47915287888815355, |
|
"grad_norm": 1.3783172369003296, |
|
"learning_rate": 9.051509355982293e-05, |
|
"loss": 1.6491, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.4818001323626737, |
|
"grad_norm": 1.27851402759552, |
|
"learning_rate": 9.038641200195404e-05, |
|
"loss": 1.8925, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.48444738583719393, |
|
"grad_norm": 1.4370380640029907, |
|
"learning_rate": 9.025695617750848e-05, |
|
"loss": 1.7996, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.4870946393117141, |
|
"grad_norm": 1.4078205823898315, |
|
"learning_rate": 9.012672856834373e-05, |
|
"loss": 1.8554, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.48974189278623426, |
|
"grad_norm": 1.3553669452667236, |
|
"learning_rate": 8.999573167111348e-05, |
|
"loss": 1.5417, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4923891462607545, |
|
"grad_norm": 1.4759166240692139, |
|
"learning_rate": 8.986396799721983e-05, |
|
"loss": 1.6143, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.49503639973527463, |
|
"grad_norm": 1.3601372241973877, |
|
"learning_rate": 8.973144007276508e-05, |
|
"loss": 1.7011, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.49768365320979485, |
|
"grad_norm": 1.425181269645691, |
|
"learning_rate": 8.959815043850336e-05, |
|
"loss": 1.672, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.500330906684315, |
|
"grad_norm": 1.440303921699524, |
|
"learning_rate": 8.946410164979184e-05, |
|
"loss": 1.8008, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5029781601588352, |
|
"grad_norm": 1.4576961994171143, |
|
"learning_rate": 8.932929627654185e-05, |
|
"loss": 1.5234, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5056254136333554, |
|
"grad_norm": 1.3088816404342651, |
|
"learning_rate": 8.919373690316952e-05, |
|
"loss": 1.701, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.5082726671078756, |
|
"grad_norm": 3.7521555423736572, |
|
"learning_rate": 8.905742612854628e-05, |
|
"loss": 1.6714, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5109199205823958, |
|
"grad_norm": 1.4540220499038696, |
|
"learning_rate": 8.892036656594898e-05, |
|
"loss": 1.6276, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.513567174056916, |
|
"grad_norm": 1.3043605089187622, |
|
"learning_rate": 8.87825608430099e-05, |
|
"loss": 1.635, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.5162144275314361, |
|
"grad_norm": 1.3931020498275757, |
|
"learning_rate": 8.864401160166624e-05, |
|
"loss": 1.5822, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5188616810059563, |
|
"grad_norm": 1.3738582134246826, |
|
"learning_rate": 8.85047214981096e-05, |
|
"loss": 1.694, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5215089344804765, |
|
"grad_norm": 1.3968422412872314, |
|
"learning_rate": 8.83646932027349e-05, |
|
"loss": 1.6673, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5241561879549967, |
|
"grad_norm": 1.4195423126220703, |
|
"learning_rate": 8.822392940008937e-05, |
|
"loss": 1.5422, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.5268034414295168, |
|
"grad_norm": 1.2660058736801147, |
|
"learning_rate": 8.808243278882094e-05, |
|
"loss": 1.4875, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.5294506949040371, |
|
"grad_norm": 1.3500608205795288, |
|
"learning_rate": 8.794020608162656e-05, |
|
"loss": 1.6946, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5320979483785573, |
|
"grad_norm": 1.6274265050888062, |
|
"learning_rate": 8.779725200520021e-05, |
|
"loss": 1.6943, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.5347452018530774, |
|
"grad_norm": 1.2186963558197021, |
|
"learning_rate": 8.765357330018056e-05, |
|
"loss": 1.4563, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.5373924553275976, |
|
"grad_norm": 1.501142978668213, |
|
"learning_rate": 8.750917272109848e-05, |
|
"loss": 1.6729, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.5400397088021178, |
|
"grad_norm": 1.372517466545105, |
|
"learning_rate": 8.736405303632427e-05, |
|
"loss": 1.636, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.542686962276638, |
|
"grad_norm": 1.4448741674423218, |
|
"learning_rate": 8.721821702801449e-05, |
|
"loss": 1.6977, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5453342157511581, |
|
"grad_norm": 1.4774208068847656, |
|
"learning_rate": 8.707166749205866e-05, |
|
"loss": 1.7892, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.5479814692256784, |
|
"grad_norm": 1.3137487173080444, |
|
"learning_rate": 8.692440723802571e-05, |
|
"loss": 1.5086, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.5506287227001986, |
|
"grad_norm": 1.4480420351028442, |
|
"learning_rate": 8.677643908911007e-05, |
|
"loss": 1.6694, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.5532759761747187, |
|
"grad_norm": 1.4660981893539429, |
|
"learning_rate": 8.662776588207747e-05, |
|
"loss": 1.632, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.5559232296492389, |
|
"grad_norm": 1.2639222145080566, |
|
"learning_rate": 8.647839046721076e-05, |
|
"loss": 1.5101, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5585704831237591, |
|
"grad_norm": 1.3556458950042725, |
|
"learning_rate": 8.632831570825508e-05, |
|
"loss": 1.7912, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.5612177365982793, |
|
"grad_norm": 1.2261251211166382, |
|
"learning_rate": 8.617754448236298e-05, |
|
"loss": 1.6547, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.5638649900727994, |
|
"grad_norm": 1.2850754261016846, |
|
"learning_rate": 8.602607968003935e-05, |
|
"loss": 1.5365, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.5665122435473197, |
|
"grad_norm": 1.3346043825149536, |
|
"learning_rate": 8.587392420508598e-05, |
|
"loss": 1.6175, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.5691594970218399, |
|
"grad_norm": 1.5381152629852295, |
|
"learning_rate": 8.572108097454578e-05, |
|
"loss": 1.7967, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.57180675049636, |
|
"grad_norm": 1.2237263917922974, |
|
"learning_rate": 8.556755291864701e-05, |
|
"loss": 1.6057, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.5744540039708802, |
|
"grad_norm": 1.233619213104248, |
|
"learning_rate": 8.541334298074701e-05, |
|
"loss": 1.7107, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.5771012574454004, |
|
"grad_norm": 1.2423778772354126, |
|
"learning_rate": 8.525845411727581e-05, |
|
"loss": 1.4729, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.5797485109199206, |
|
"grad_norm": 7.3384480476379395, |
|
"learning_rate": 8.51028892976794e-05, |
|
"loss": 1.6363, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.5823957643944407, |
|
"grad_norm": 1.3198407888412476, |
|
"learning_rate": 8.494665150436288e-05, |
|
"loss": 1.646, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.585043017868961, |
|
"grad_norm": 1.172568678855896, |
|
"learning_rate": 8.478974373263318e-05, |
|
"loss": 1.4356, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.5876902713434812, |
|
"grad_norm": 1.4879450798034668, |
|
"learning_rate": 8.463216899064179e-05, |
|
"loss": 1.7847, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.5903375248180013, |
|
"grad_norm": 1.3998438119888306, |
|
"learning_rate": 8.447393029932692e-05, |
|
"loss": 1.7818, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.5929847782925215, |
|
"grad_norm": 1.3567726612091064, |
|
"learning_rate": 8.431503069235565e-05, |
|
"loss": 1.5539, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.5956320317670417, |
|
"grad_norm": 1.4983903169631958, |
|
"learning_rate": 8.415547321606584e-05, |
|
"loss": 1.6477, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5982792852415619, |
|
"grad_norm": 1.2646454572677612, |
|
"learning_rate": 8.399526092940768e-05, |
|
"loss": 1.6087, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.600926538716082, |
|
"grad_norm": 1.4137752056121826, |
|
"learning_rate": 8.38343969038849e-05, |
|
"loss": 1.7626, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6035737921906023, |
|
"grad_norm": 1.4016697406768799, |
|
"learning_rate": 8.367288422349617e-05, |
|
"loss": 1.6947, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6062210456651225, |
|
"grad_norm": 1.331425666809082, |
|
"learning_rate": 8.351072598467576e-05, |
|
"loss": 1.6358, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6088682991396426, |
|
"grad_norm": 1.2292309999465942, |
|
"learning_rate": 8.334792529623419e-05, |
|
"loss": 1.4613, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6115155526141628, |
|
"grad_norm": 1.3756728172302246, |
|
"learning_rate": 8.318448527929877e-05, |
|
"loss": 1.5771, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.614162806088683, |
|
"grad_norm": 1.4124281406402588, |
|
"learning_rate": 8.302040906725361e-05, |
|
"loss": 1.7364, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.6168100595632032, |
|
"grad_norm": 1.298540472984314, |
|
"learning_rate": 8.285569980567964e-05, |
|
"loss": 1.6394, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.6194573130377233, |
|
"grad_norm": 1.3905584812164307, |
|
"learning_rate": 8.269036065229427e-05, |
|
"loss": 1.7034, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.6221045665122436, |
|
"grad_norm": 1.4072821140289307, |
|
"learning_rate": 8.252439477689082e-05, |
|
"loss": 1.6315, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6247518199867638, |
|
"grad_norm": 1.239159345626831, |
|
"learning_rate": 8.235780536127787e-05, |
|
"loss": 1.5178, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.6273990734612839, |
|
"grad_norm": 1.3636091947555542, |
|
"learning_rate": 8.21905955992181e-05, |
|
"loss": 1.6564, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.6300463269358041, |
|
"grad_norm": 1.3506637811660767, |
|
"learning_rate": 8.202276869636713e-05, |
|
"loss": 1.646, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.6326935804103243, |
|
"grad_norm": 1.4368304014205933, |
|
"learning_rate": 8.185432787021216e-05, |
|
"loss": 1.5073, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.6353408338848445, |
|
"grad_norm": 1.3278450965881348, |
|
"learning_rate": 8.168527635001015e-05, |
|
"loss": 1.5203, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6379880873593646, |
|
"grad_norm": 1.2450168132781982, |
|
"learning_rate": 8.151561737672591e-05, |
|
"loss": 1.7171, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.6406353408338848, |
|
"grad_norm": 1.2755018472671509, |
|
"learning_rate": 8.134535420297008e-05, |
|
"loss": 1.5675, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.6432825943084051, |
|
"grad_norm": 1.3066191673278809, |
|
"learning_rate": 8.117449009293668e-05, |
|
"loss": 1.6525, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.6459298477829252, |
|
"grad_norm": 1.2875075340270996, |
|
"learning_rate": 8.100302832234056e-05, |
|
"loss": 1.6484, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.6485771012574454, |
|
"grad_norm": 1.5069595575332642, |
|
"learning_rate": 8.083097217835461e-05, |
|
"loss": 1.6251, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6512243547319656, |
|
"grad_norm": 1.334075927734375, |
|
"learning_rate": 8.065832495954668e-05, |
|
"loss": 1.743, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.6538716082064858, |
|
"grad_norm": 1.3219469785690308, |
|
"learning_rate": 8.048508997581647e-05, |
|
"loss": 1.6345, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.6565188616810059, |
|
"grad_norm": 1.4275529384613037, |
|
"learning_rate": 8.03112705483319e-05, |
|
"loss": 1.7515, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.6591661151555261, |
|
"grad_norm": 1.349526286125183, |
|
"learning_rate": 8.013687000946561e-05, |
|
"loss": 1.5209, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.6618133686300464, |
|
"grad_norm": 1.3620506525039673, |
|
"learning_rate": 7.996189170273096e-05, |
|
"loss": 1.6789, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6644606221045665, |
|
"grad_norm": 1.2079874277114868, |
|
"learning_rate": 7.978633898271795e-05, |
|
"loss": 1.3453, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.6671078755790867, |
|
"grad_norm": 1.3527398109436035, |
|
"learning_rate": 7.961021521502895e-05, |
|
"loss": 1.5927, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.6697551290536069, |
|
"grad_norm": 1.3048250675201416, |
|
"learning_rate": 7.943352377621414e-05, |
|
"loss": 1.643, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.6724023825281271, |
|
"grad_norm": 1.2111921310424805, |
|
"learning_rate": 7.925626805370678e-05, |
|
"loss": 1.4432, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.6750496360026472, |
|
"grad_norm": 1.3531336784362793, |
|
"learning_rate": 7.907845144575829e-05, |
|
"loss": 1.6235, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6776968894771674, |
|
"grad_norm": 1.204720139503479, |
|
"learning_rate": 7.890007736137307e-05, |
|
"loss": 1.5377, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.6803441429516877, |
|
"grad_norm": 1.3632683753967285, |
|
"learning_rate": 7.872114922024313e-05, |
|
"loss": 1.5758, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.6829913964262078, |
|
"grad_norm": 1.4058332443237305, |
|
"learning_rate": 7.854167045268264e-05, |
|
"loss": 1.4645, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.685638649900728, |
|
"grad_norm": 1.2490967512130737, |
|
"learning_rate": 7.836164449956199e-05, |
|
"loss": 1.5723, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.6882859033752482, |
|
"grad_norm": 1.3228312730789185, |
|
"learning_rate": 7.818107481224198e-05, |
|
"loss": 1.466, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6909331568497684, |
|
"grad_norm": 1.3664582967758179, |
|
"learning_rate": 7.799996485250755e-05, |
|
"loss": 1.4823, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.6935804103242885, |
|
"grad_norm": 1.1946579217910767, |
|
"learning_rate": 7.781831809250151e-05, |
|
"loss": 1.6093, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.6962276637988087, |
|
"grad_norm": 1.3534433841705322, |
|
"learning_rate": 7.763613801465786e-05, |
|
"loss": 1.5823, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.698874917273329, |
|
"grad_norm": 1.275877833366394, |
|
"learning_rate": 7.745342811163507e-05, |
|
"loss": 1.508, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.7015221707478491, |
|
"grad_norm": 1.2870965003967285, |
|
"learning_rate": 7.727019188624922e-05, |
|
"loss": 1.6452, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7041694242223693, |
|
"grad_norm": 1.2805050611495972, |
|
"learning_rate": 7.708643285140667e-05, |
|
"loss": 1.7463, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.7068166776968895, |
|
"grad_norm": 1.331794261932373, |
|
"learning_rate": 7.690215453003684e-05, |
|
"loss": 1.4428, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.7094639311714097, |
|
"grad_norm": 1.3701887130737305, |
|
"learning_rate": 7.671736045502462e-05, |
|
"loss": 1.6868, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.7121111846459298, |
|
"grad_norm": 1.3474302291870117, |
|
"learning_rate": 7.653205416914267e-05, |
|
"loss": 1.4919, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.71475843812045, |
|
"grad_norm": 1.6028352975845337, |
|
"learning_rate": 7.634623922498348e-05, |
|
"loss": 1.5958, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7174056915949703, |
|
"grad_norm": 1.2263597249984741, |
|
"learning_rate": 7.615991918489125e-05, |
|
"loss": 1.7238, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.7200529450694904, |
|
"grad_norm": 1.4178084135055542, |
|
"learning_rate": 7.597309762089359e-05, |
|
"loss": 1.48, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.7227001985440106, |
|
"grad_norm": 1.3942856788635254, |
|
"learning_rate": 7.57857781146331e-05, |
|
"loss": 1.5336, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.7253474520185308, |
|
"grad_norm": 1.2155961990356445, |
|
"learning_rate": 7.559796425729863e-05, |
|
"loss": 1.4977, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.727994705493051, |
|
"grad_norm": 1.3590655326843262, |
|
"learning_rate": 7.540965964955649e-05, |
|
"loss": 1.6736, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7306419589675711, |
|
"grad_norm": 1.1585520505905151, |
|
"learning_rate": 7.522086790148133e-05, |
|
"loss": 1.6883, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.7332892124420913, |
|
"grad_norm": 1.2694188356399536, |
|
"learning_rate": 7.503159263248709e-05, |
|
"loss": 1.657, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.7359364659166115, |
|
"grad_norm": 1.2413800954818726, |
|
"learning_rate": 7.484183747125742e-05, |
|
"loss": 1.4757, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.7385837193911317, |
|
"grad_norm": 1.1527191400527954, |
|
"learning_rate": 7.46516060556763e-05, |
|
"loss": 1.5628, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.7412309728656519, |
|
"grad_norm": 1.5187007188796997, |
|
"learning_rate": 7.446090203275809e-05, |
|
"loss": 1.6387, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7438782263401721, |
|
"grad_norm": 1.3278498649597168, |
|
"learning_rate": 7.426972905857781e-05, |
|
"loss": 1.5212, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.7465254798146923, |
|
"grad_norm": 1.4994242191314697, |
|
"learning_rate": 7.407809079820094e-05, |
|
"loss": 1.7582, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.7491727332892124, |
|
"grad_norm": 1.2623709440231323, |
|
"learning_rate": 7.388599092561315e-05, |
|
"loss": 1.6223, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.7518199867637326, |
|
"grad_norm": 1.3785511255264282, |
|
"learning_rate": 7.369343312364993e-05, |
|
"loss": 1.5051, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.7544672402382528, |
|
"grad_norm": 1.2472020387649536, |
|
"learning_rate": 7.350042108392594e-05, |
|
"loss": 1.419, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.757114493712773, |
|
"grad_norm": 1.6892167329788208, |
|
"learning_rate": 7.330695850676421e-05, |
|
"loss": 1.5718, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.7597617471872932, |
|
"grad_norm": 1.4521297216415405, |
|
"learning_rate": 7.311304910112525e-05, |
|
"loss": 1.6383, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.7624090006618134, |
|
"grad_norm": 1.450149655342102, |
|
"learning_rate": 7.291869658453594e-05, |
|
"loss": 1.771, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.7650562541363336, |
|
"grad_norm": 1.3068790435791016, |
|
"learning_rate": 7.272390468301821e-05, |
|
"loss": 1.6414, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.7677035076108537, |
|
"grad_norm": 1.1887469291687012, |
|
"learning_rate": 7.252867713101771e-05, |
|
"loss": 1.3455, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7703507610853739, |
|
"grad_norm": 1.2392699718475342, |
|
"learning_rate": 7.233301767133205e-05, |
|
"loss": 1.5139, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.7729980145598941, |
|
"grad_norm": 1.353925347328186, |
|
"learning_rate": 7.213693005503924e-05, |
|
"loss": 1.6324, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.7756452680344142, |
|
"grad_norm": 1.2792888879776, |
|
"learning_rate": 7.194041804142557e-05, |
|
"loss": 1.69, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.7782925215089345, |
|
"grad_norm": 1.1825402975082397, |
|
"learning_rate": 7.174348539791375e-05, |
|
"loss": 1.3613, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.7809397749834547, |
|
"grad_norm": 1.2615066766738892, |
|
"learning_rate": 7.154613589999054e-05, |
|
"loss": 1.6972, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7835870284579749, |
|
"grad_norm": 1.239867091178894, |
|
"learning_rate": 7.13483733311344e-05, |
|
"loss": 1.403, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.786234281932495, |
|
"grad_norm": 1.3656786680221558, |
|
"learning_rate": 7.115020148274295e-05, |
|
"loss": 1.6528, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.7888815354070152, |
|
"grad_norm": 1.2590436935424805, |
|
"learning_rate": 7.095162415406034e-05, |
|
"loss": 1.5411, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.7915287888815354, |
|
"grad_norm": 1.2784417867660522, |
|
"learning_rate": 7.075264515210435e-05, |
|
"loss": 1.5618, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.7941760423560555, |
|
"grad_norm": 1.3260300159454346, |
|
"learning_rate": 7.055326829159341e-05, |
|
"loss": 1.5295, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7968232958305758, |
|
"grad_norm": 5.832207202911377, |
|
"learning_rate": 7.03534973948735e-05, |
|
"loss": 1.5864, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.799470549305096, |
|
"grad_norm": 1.2828547954559326, |
|
"learning_rate": 7.015333629184484e-05, |
|
"loss": 1.5081, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.8021178027796162, |
|
"grad_norm": 1.2997095584869385, |
|
"learning_rate": 6.995278881988847e-05, |
|
"loss": 1.5827, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.8047650562541363, |
|
"grad_norm": 1.2829680442810059, |
|
"learning_rate": 6.975185882379271e-05, |
|
"loss": 1.4565, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.8074123097286565, |
|
"grad_norm": 1.3034470081329346, |
|
"learning_rate": 6.955055015567942e-05, |
|
"loss": 1.4973, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8100595632031767, |
|
"grad_norm": 1.170404314994812, |
|
"learning_rate": 6.934886667493012e-05, |
|
"loss": 1.4518, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.8127068166776968, |
|
"grad_norm": 1.2815779447555542, |
|
"learning_rate": 6.914681224811208e-05, |
|
"loss": 1.546, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.8153540701522171, |
|
"grad_norm": 1.227200984954834, |
|
"learning_rate": 6.894439074890414e-05, |
|
"loss": 1.5478, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.8180013236267373, |
|
"grad_norm": 1.2927132844924927, |
|
"learning_rate": 6.874160605802244e-05, |
|
"loss": 1.6184, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.8206485771012575, |
|
"grad_norm": 1.2327131032943726, |
|
"learning_rate": 6.853846206314605e-05, |
|
"loss": 1.5553, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8232958305757776, |
|
"grad_norm": 1.1886876821517944, |
|
"learning_rate": 6.833496265884241e-05, |
|
"loss": 1.4956, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.8259430840502978, |
|
"grad_norm": 1.4828628301620483, |
|
"learning_rate": 6.813111174649269e-05, |
|
"loss": 1.7339, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.828590337524818, |
|
"grad_norm": 1.2269375324249268, |
|
"learning_rate": 6.792691323421698e-05, |
|
"loss": 1.5712, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.8312375909993381, |
|
"grad_norm": 1.4898347854614258, |
|
"learning_rate": 6.772237103679937e-05, |
|
"loss": 1.6172, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.8338848444738584, |
|
"grad_norm": 1.1373467445373535, |
|
"learning_rate": 6.751748907561288e-05, |
|
"loss": 1.3869, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.8365320979483786, |
|
"grad_norm": 1.2607003450393677, |
|
"learning_rate": 6.731227127854434e-05, |
|
"loss": 1.5501, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.8391793514228988, |
|
"grad_norm": 1.357080340385437, |
|
"learning_rate": 6.710672157991899e-05, |
|
"loss": 1.5804, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.8418266048974189, |
|
"grad_norm": 1.300445318222046, |
|
"learning_rate": 6.690084392042513e-05, |
|
"loss": 1.4547, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.8444738583719391, |
|
"grad_norm": 1.281031608581543, |
|
"learning_rate": 6.669464224703861e-05, |
|
"loss": 1.5843, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.8471211118464593, |
|
"grad_norm": 1.2201812267303467, |
|
"learning_rate": 6.648812051294697e-05, |
|
"loss": 1.4422, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.8497683653209794, |
|
"grad_norm": 1.2445136308670044, |
|
"learning_rate": 6.628128267747391e-05, |
|
"loss": 1.5826, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.8524156187954997, |
|
"grad_norm": 1.383170247077942, |
|
"learning_rate": 6.607413270600319e-05, |
|
"loss": 1.6194, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.8550628722700199, |
|
"grad_norm": 1.370076060295105, |
|
"learning_rate": 6.586667456990267e-05, |
|
"loss": 1.6408, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.8577101257445401, |
|
"grad_norm": 1.293721318244934, |
|
"learning_rate": 6.565891224644822e-05, |
|
"loss": 1.5066, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.8603573792190602, |
|
"grad_norm": 1.4381659030914307, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 1.5161, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8630046326935804, |
|
"grad_norm": 1.3525183200836182, |
|
"learning_rate": 6.524249097566306e-05, |
|
"loss": 1.6022, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.8656518861681006, |
|
"grad_norm": 1.1742914915084839, |
|
"learning_rate": 6.503384001173707e-05, |
|
"loss": 1.3307, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.8682991396426207, |
|
"grad_norm": 1.275770664215088, |
|
"learning_rate": 6.48249008271135e-05, |
|
"loss": 1.5092, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.870946393117141, |
|
"grad_norm": 1.3267558813095093, |
|
"learning_rate": 6.461567742746206e-05, |
|
"loss": 1.6288, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.8735936465916612, |
|
"grad_norm": 1.1977699995040894, |
|
"learning_rate": 6.440617382390128e-05, |
|
"loss": 1.5567, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.8762409000661814, |
|
"grad_norm": 1.1399099826812744, |
|
"learning_rate": 6.419639403292161e-05, |
|
"loss": 1.5925, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.8788881535407015, |
|
"grad_norm": 1.3445255756378174, |
|
"learning_rate": 6.398634207630841e-05, |
|
"loss": 1.5288, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.8815354070152217, |
|
"grad_norm": 1.2953174114227295, |
|
"learning_rate": 6.377602198106483e-05, |
|
"loss": 1.5119, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.8841826604897419, |
|
"grad_norm": 1.2466961145401, |
|
"learning_rate": 6.356543777933468e-05, |
|
"loss": 1.4559, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.886829913964262, |
|
"grad_norm": 1.410008430480957, |
|
"learning_rate": 6.335459350832504e-05, |
|
"loss": 1.6239, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.8894771674387822, |
|
"grad_norm": 1.2374393939971924, |
|
"learning_rate": 6.314349321022893e-05, |
|
"loss": 1.4162, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.8921244209133025, |
|
"grad_norm": 1.3700758218765259, |
|
"learning_rate": 6.293214093214775e-05, |
|
"loss": 1.4784, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.8947716743878227, |
|
"grad_norm": 1.345596432685852, |
|
"learning_rate": 6.272054072601374e-05, |
|
"loss": 1.5489, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.8974189278623428, |
|
"grad_norm": 1.1666315793991089, |
|
"learning_rate": 6.250869664851227e-05, |
|
"loss": 1.3515, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.900066181336863, |
|
"grad_norm": 1.2450063228607178, |
|
"learning_rate": 6.229661276100412e-05, |
|
"loss": 1.4763, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9027134348113832, |
|
"grad_norm": 1.1888995170593262, |
|
"learning_rate": 6.208429312944754e-05, |
|
"loss": 1.4322, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.9053606882859033, |
|
"grad_norm": 1.3319921493530273, |
|
"learning_rate": 6.187174182432033e-05, |
|
"loss": 1.5044, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.9080079417604235, |
|
"grad_norm": 1.2023800611495972, |
|
"learning_rate": 6.165896292054187e-05, |
|
"loss": 1.5033, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.9106551952349438, |
|
"grad_norm": 1.3017017841339111, |
|
"learning_rate": 6.14459604973949e-05, |
|
"loss": 1.4683, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.913302448709464, |
|
"grad_norm": 1.2657389640808105, |
|
"learning_rate": 6.12327386384473e-05, |
|
"loss": 1.5533, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.9159497021839841, |
|
"grad_norm": 1.3227919340133667, |
|
"learning_rate": 6.101930143147395e-05, |
|
"loss": 1.5239, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.9185969556585043, |
|
"grad_norm": 1.3174325227737427, |
|
"learning_rate": 6.080565296837821e-05, |
|
"loss": 1.5259, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.9212442091330245, |
|
"grad_norm": 1.2424542903900146, |
|
"learning_rate": 6.059179734511356e-05, |
|
"loss": 1.3573, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.9238914626075446, |
|
"grad_norm": 1.2109280824661255, |
|
"learning_rate": 6.037773866160502e-05, |
|
"loss": 1.3831, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.9265387160820648, |
|
"grad_norm": 1.2729474306106567, |
|
"learning_rate": 6.0163481021670575e-05, |
|
"loss": 1.674, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9291859695565851, |
|
"grad_norm": 1.1736104488372803, |
|
"learning_rate": 5.994902853294251e-05, |
|
"loss": 1.4935, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.9318332230311053, |
|
"grad_norm": 1.3021750450134277, |
|
"learning_rate": 5.973438530678861e-05, |
|
"loss": 1.6066, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.9344804765056254, |
|
"grad_norm": 1.3625566959381104, |
|
"learning_rate": 5.951955545823342e-05, |
|
"loss": 1.629, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.9371277299801456, |
|
"grad_norm": 1.1946239471435547, |
|
"learning_rate": 5.930454310587929e-05, |
|
"loss": 1.4444, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.9397749834546658, |
|
"grad_norm": 1.4337393045425415, |
|
"learning_rate": 5.9089352371827446e-05, |
|
"loss": 1.6888, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.9424222369291859, |
|
"grad_norm": 1.3422842025756836, |
|
"learning_rate": 5.8873987381598924e-05, |
|
"loss": 1.6227, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.9450694904037061, |
|
"grad_norm": 1.2459781169891357, |
|
"learning_rate": 5.865845226405553e-05, |
|
"loss": 1.4704, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.9477167438782264, |
|
"grad_norm": 1.5130873918533325, |
|
"learning_rate": 5.844275115132064e-05, |
|
"loss": 1.5029, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.9503639973527466, |
|
"grad_norm": 1.127805471420288, |
|
"learning_rate": 5.822688817870004e-05, |
|
"loss": 1.5289, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.9530112508272667, |
|
"grad_norm": 1.283653736114502, |
|
"learning_rate": 5.801086748460255e-05, |
|
"loss": 1.545, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.9556585043017869, |
|
"grad_norm": 1.290038824081421, |
|
"learning_rate": 5.7794693210460804e-05, |
|
"loss": 1.5588, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.9583057577763071, |
|
"grad_norm": 1.2246005535125732, |
|
"learning_rate": 5.757836950065172e-05, |
|
"loss": 1.4577, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 0.9609530112508272, |
|
"grad_norm": 1.3036789894104004, |
|
"learning_rate": 5.736190050241719e-05, |
|
"loss": 1.6891, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 0.9636002647253474, |
|
"grad_norm": 1.2149336338043213, |
|
"learning_rate": 5.714529036578443e-05, |
|
"loss": 1.4114, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.9662475181998676, |
|
"grad_norm": 1.1539721488952637, |
|
"learning_rate": 5.692854324348653e-05, |
|
"loss": 1.5497, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.9688947716743879, |
|
"grad_norm": 1.3573237657546997, |
|
"learning_rate": 5.6711663290882776e-05, |
|
"loss": 1.4812, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 0.971542025148908, |
|
"grad_norm": 1.338049292564392, |
|
"learning_rate": 5.649465466587902e-05, |
|
"loss": 1.6043, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 0.9741892786234282, |
|
"grad_norm": 1.3066737651824951, |
|
"learning_rate": 5.627752152884794e-05, |
|
"loss": 1.582, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.9768365320979484, |
|
"grad_norm": 1.2373597621917725, |
|
"learning_rate": 5.606026804254931e-05, |
|
"loss": 1.4099, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 0.9794837855724685, |
|
"grad_norm": 1.1805121898651123, |
|
"learning_rate": 5.584289837205012e-05, |
|
"loss": 1.3914, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.9821310390469887, |
|
"grad_norm": 1.3286755084991455, |
|
"learning_rate": 5.5625416684644874e-05, |
|
"loss": 1.4803, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.984778292521509, |
|
"grad_norm": 1.2538626194000244, |
|
"learning_rate": 5.540782714977549e-05, |
|
"loss": 1.5063, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.9874255459960292, |
|
"grad_norm": 1.2164523601531982, |
|
"learning_rate": 5.51901339389516e-05, |
|
"loss": 1.3555, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 0.9900727994705493, |
|
"grad_norm": 1.217489242553711, |
|
"learning_rate": 5.4972341225670354e-05, |
|
"loss": 1.4818, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.9927200529450695, |
|
"grad_norm": 1.170462727546692, |
|
"learning_rate": 5.4754453185336586e-05, |
|
"loss": 1.5693, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9953673064195897, |
|
"grad_norm": 1.2590230703353882, |
|
"learning_rate": 5.453647399518262e-05, |
|
"loss": 1.3735, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.9980145598941098, |
|
"grad_norm": 1.1807870864868164, |
|
"learning_rate": 5.431840783418832e-05, |
|
"loss": 1.3643, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 1.00066181336863, |
|
"grad_norm": 1.1421568393707275, |
|
"learning_rate": 5.410025888300087e-05, |
|
"loss": 1.4336, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 1.0033090668431501, |
|
"grad_norm": 1.161148190498352, |
|
"learning_rate": 5.388203132385467e-05, |
|
"loss": 1.2284, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 1.0059563203176705, |
|
"grad_norm": 1.129975438117981, |
|
"learning_rate": 5.366372934049114e-05, |
|
"loss": 1.2385, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.0086035737921906, |
|
"grad_norm": 1.0889602899551392, |
|
"learning_rate": 5.3445357118078545e-05, |
|
"loss": 1.0735, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 1.011250827266711, |
|
"grad_norm": 1.2157572507858276, |
|
"learning_rate": 5.322691884313172e-05, |
|
"loss": 1.1803, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 1.013898080741231, |
|
"grad_norm": 1.1153740882873535, |
|
"learning_rate": 5.300841870343183e-05, |
|
"loss": 1.0574, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 1.016545334215751, |
|
"grad_norm": 1.1907968521118164, |
|
"learning_rate": 5.2789860887946066e-05, |
|
"loss": 1.0691, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 1.0191925876902714, |
|
"grad_norm": 1.1797744035720825, |
|
"learning_rate": 5.257124958674736e-05, |
|
"loss": 1.1063, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.0218398411647915, |
|
"grad_norm": 1.0647462606430054, |
|
"learning_rate": 5.235258899093406e-05, |
|
"loss": 1.0512, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 1.0244870946393116, |
|
"grad_norm": 1.1768978834152222, |
|
"learning_rate": 5.213388329254949e-05, |
|
"loss": 1.197, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 1.027134348113832, |
|
"grad_norm": 1.282067060470581, |
|
"learning_rate": 5.191513668450178e-05, |
|
"loss": 1.231, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 1.029781601588352, |
|
"grad_norm": 1.3294609785079956, |
|
"learning_rate": 5.1696353360483216e-05, |
|
"loss": 1.2719, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 1.0324288550628722, |
|
"grad_norm": 1.187889814376831, |
|
"learning_rate": 5.1477537514890116e-05, |
|
"loss": 1.2815, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0350761085373925, |
|
"grad_norm": 1.152590036392212, |
|
"learning_rate": 5.125869334274219e-05, |
|
"loss": 1.126, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 1.0377233620119126, |
|
"grad_norm": 1.1706854104995728, |
|
"learning_rate": 5.103982503960224e-05, |
|
"loss": 1.22, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 1.0403706154864327, |
|
"grad_norm": 1.1738533973693848, |
|
"learning_rate": 5.082093680149571e-05, |
|
"loss": 1.2386, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 1.043017868960953, |
|
"grad_norm": 1.299540400505066, |
|
"learning_rate": 5.060203282483022e-05, |
|
"loss": 1.2308, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 1.0456651224354732, |
|
"grad_norm": 1.1205031871795654, |
|
"learning_rate": 5.038311730631509e-05, |
|
"loss": 1.1254, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.0483123759099935, |
|
"grad_norm": 1.1589237451553345, |
|
"learning_rate": 5.016419444288096e-05, |
|
"loss": 1.046, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 1.0509596293845136, |
|
"grad_norm": 1.1844594478607178, |
|
"learning_rate": 4.9945268431599245e-05, |
|
"loss": 1.1835, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 1.0536068828590337, |
|
"grad_norm": 1.319905400276184, |
|
"learning_rate": 4.972634346960173e-05, |
|
"loss": 1.2235, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 1.056254136333554, |
|
"grad_norm": 1.1240413188934326, |
|
"learning_rate": 4.950742375400007e-05, |
|
"loss": 1.0733, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 1.0589013898080741, |
|
"grad_norm": 1.27524995803833, |
|
"learning_rate": 4.9288513481805374e-05, |
|
"loss": 1.1595, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0615486432825942, |
|
"grad_norm": 1.2067784070968628, |
|
"learning_rate": 4.906961684984767e-05, |
|
"loss": 1.1771, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 1.0641958967571146, |
|
"grad_norm": 1.154008150100708, |
|
"learning_rate": 4.8850738054695486e-05, |
|
"loss": 1.1934, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 1.0668431502316347, |
|
"grad_norm": 1.1568691730499268, |
|
"learning_rate": 4.863188129257539e-05, |
|
"loss": 1.1032, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 1.0694904037061548, |
|
"grad_norm": 1.1935631036758423, |
|
"learning_rate": 4.8413050759291585e-05, |
|
"loss": 1.1457, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 1.072137657180675, |
|
"grad_norm": 1.1685223579406738, |
|
"learning_rate": 4.8194250650145374e-05, |
|
"loss": 1.0371, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.0747849106551952, |
|
"grad_norm": 1.2918758392333984, |
|
"learning_rate": 4.797548515985481e-05, |
|
"loss": 1.1128, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 1.0774321641297153, |
|
"grad_norm": 1.232910394668579, |
|
"learning_rate": 4.775675848247427e-05, |
|
"loss": 1.0407, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 1.0800794176042356, |
|
"grad_norm": 1.27483069896698, |
|
"learning_rate": 4.7538074811313975e-05, |
|
"loss": 1.1523, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 1.0827266710787558, |
|
"grad_norm": 1.2089005708694458, |
|
"learning_rate": 4.731943833885973e-05, |
|
"loss": 1.0901, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 1.0853739245532759, |
|
"grad_norm": 1.272049069404602, |
|
"learning_rate": 4.7100853256692406e-05, |
|
"loss": 1.1968, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.0880211780277962, |
|
"grad_norm": 1.1610321998596191, |
|
"learning_rate": 4.6882323755407706e-05, |
|
"loss": 1.0379, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 1.0906684315023163, |
|
"grad_norm": 1.0861836671829224, |
|
"learning_rate": 4.666385402453568e-05, |
|
"loss": 1.1274, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 1.0933156849768366, |
|
"grad_norm": 1.2042131423950195, |
|
"learning_rate": 4.644544825246059e-05, |
|
"loss": 1.1502, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 1.0959629384513567, |
|
"grad_norm": 1.5976825952529907, |
|
"learning_rate": 4.622711062634046e-05, |
|
"loss": 1.1527, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 1.0986101919258768, |
|
"grad_norm": 1.2653815746307373, |
|
"learning_rate": 4.600884533202686e-05, |
|
"loss": 1.0946, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.1012574454003972, |
|
"grad_norm": 1.129782795906067, |
|
"learning_rate": 4.579065655398465e-05, |
|
"loss": 1.1376, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 1.1039046988749173, |
|
"grad_norm": 1.0471429824829102, |
|
"learning_rate": 4.5572548475211805e-05, |
|
"loss": 1.1488, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 1.1065519523494374, |
|
"grad_norm": 1.281714916229248, |
|
"learning_rate": 4.535452527715911e-05, |
|
"loss": 1.2245, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 1.1091992058239577, |
|
"grad_norm": 1.1683017015457153, |
|
"learning_rate": 4.5136591139650105e-05, |
|
"loss": 1.1307, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 1.1118464592984778, |
|
"grad_norm": 1.1847896575927734, |
|
"learning_rate": 4.491875024080088e-05, |
|
"loss": 1.0821, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.114493712772998, |
|
"grad_norm": 1.196803331375122, |
|
"learning_rate": 4.470100675694007e-05, |
|
"loss": 1.0633, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 1.1171409662475182, |
|
"grad_norm": 1.1869444847106934, |
|
"learning_rate": 4.4483364862528646e-05, |
|
"loss": 1.1864, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 1.1197882197220383, |
|
"grad_norm": 1.221575140953064, |
|
"learning_rate": 4.4265828730079987e-05, |
|
"loss": 1.0547, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 1.1224354731965587, |
|
"grad_norm": 1.164784550666809, |
|
"learning_rate": 4.404840253007987e-05, |
|
"loss": 1.1614, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 1.1250827266710788, |
|
"grad_norm": 1.0524084568023682, |
|
"learning_rate": 4.3831090430906484e-05, |
|
"loss": 1.1285, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.1277299801455989, |
|
"grad_norm": 1.6504064798355103, |
|
"learning_rate": 4.361389659875058e-05, |
|
"loss": 1.1689, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 1.1303772336201192, |
|
"grad_norm": 1.1136175394058228, |
|
"learning_rate": 4.339682519753551e-05, |
|
"loss": 1.0815, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 1.1330244870946393, |
|
"grad_norm": 1.1745281219482422, |
|
"learning_rate": 4.3179880388837496e-05, |
|
"loss": 1.1722, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 1.1356717405691594, |
|
"grad_norm": 1.0880483388900757, |
|
"learning_rate": 4.2963066331805725e-05, |
|
"loss": 1.0361, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 1.1383189940436798, |
|
"grad_norm": 1.137968897819519, |
|
"learning_rate": 4.2746387183082755e-05, |
|
"loss": 1.1, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.1409662475181999, |
|
"grad_norm": 1.2682772874832153, |
|
"learning_rate": 4.252984709672473e-05, |
|
"loss": 1.134, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 1.14361350099272, |
|
"grad_norm": 1.128180742263794, |
|
"learning_rate": 4.231345022412174e-05, |
|
"loss": 1.0812, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 1.1462607544672403, |
|
"grad_norm": 1.0430972576141357, |
|
"learning_rate": 4.2097200713918264e-05, |
|
"loss": 1.034, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 1.1489080079417604, |
|
"grad_norm": 1.1832259893417358, |
|
"learning_rate": 4.188110271193371e-05, |
|
"loss": 1.1422, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 1.1515552614162807, |
|
"grad_norm": 1.1320624351501465, |
|
"learning_rate": 4.1665160361082704e-05, |
|
"loss": 1.0688, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.1542025148908008, |
|
"grad_norm": 1.2752870321273804, |
|
"learning_rate": 4.144937780129594e-05, |
|
"loss": 1.1926, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 1.156849768365321, |
|
"grad_norm": 1.2092264890670776, |
|
"learning_rate": 4.123375916944061e-05, |
|
"loss": 1.0973, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 1.159497021839841, |
|
"grad_norm": 1.1710125207901, |
|
"learning_rate": 4.101830859924124e-05, |
|
"loss": 1.2602, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 1.1621442753143614, |
|
"grad_norm": 1.4670571088790894, |
|
"learning_rate": 4.080303022120025e-05, |
|
"loss": 1.2005, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 1.1647915287888815, |
|
"grad_norm": 1.1942548751831055, |
|
"learning_rate": 4.058792816251902e-05, |
|
"loss": 1.2164, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.1674387822634018, |
|
"grad_norm": 1.2230584621429443, |
|
"learning_rate": 4.037300654701856e-05, |
|
"loss": 1.0395, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 1.170086035737922, |
|
"grad_norm": 1.3117454051971436, |
|
"learning_rate": 4.015826949506049e-05, |
|
"loss": 1.1848, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 1.172733289212442, |
|
"grad_norm": 1.2102235555648804, |
|
"learning_rate": 3.994372112346812e-05, |
|
"loss": 1.1349, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 1.1753805426869623, |
|
"grad_norm": 1.3425853252410889, |
|
"learning_rate": 3.9729365545447514e-05, |
|
"loss": 1.1756, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 1.1780277961614825, |
|
"grad_norm": 1.1865317821502686, |
|
"learning_rate": 3.9515206870508534e-05, |
|
"loss": 1.1298, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.1806750496360026, |
|
"grad_norm": 1.0945122241973877, |
|
"learning_rate": 3.930124920438616e-05, |
|
"loss": 1.1275, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 1.1833223031105229, |
|
"grad_norm": 1.2114017009735107, |
|
"learning_rate": 3.908749664896171e-05, |
|
"loss": 1.1958, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 1.185969556585043, |
|
"grad_norm": 1.1771973371505737, |
|
"learning_rate": 3.887395330218429e-05, |
|
"loss": 1.0868, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 1.188616810059563, |
|
"grad_norm": 1.2639689445495605, |
|
"learning_rate": 3.866062325799209e-05, |
|
"loss": 1.213, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 1.1912640635340834, |
|
"grad_norm": 1.1774057149887085, |
|
"learning_rate": 3.844751060623404e-05, |
|
"loss": 1.0974, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1939113170086035, |
|
"grad_norm": 1.1269370317459106, |
|
"learning_rate": 3.823461943259132e-05, |
|
"loss": 1.1296, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 1.1965585704831239, |
|
"grad_norm": 1.2880319356918335, |
|
"learning_rate": 3.802195381849901e-05, |
|
"loss": 1.1121, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 1.199205823957644, |
|
"grad_norm": 1.1425657272338867, |
|
"learning_rate": 3.7809517841067976e-05, |
|
"loss": 1.0818, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 1.201853077432164, |
|
"grad_norm": 1.1727538108825684, |
|
"learning_rate": 3.759731557300652e-05, |
|
"loss": 1.025, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 1.2045003309066844, |
|
"grad_norm": 1.2917152643203735, |
|
"learning_rate": 3.738535108254246e-05, |
|
"loss": 1.21, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.2071475843812045, |
|
"grad_norm": 1.1989338397979736, |
|
"learning_rate": 3.7173628433345006e-05, |
|
"loss": 1.1712, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 1.2097948378557246, |
|
"grad_norm": 1.2029826641082764, |
|
"learning_rate": 3.696215168444699e-05, |
|
"loss": 1.1146, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 1.212442091330245, |
|
"grad_norm": 1.173412561416626, |
|
"learning_rate": 3.675092489016693e-05, |
|
"loss": 1.1237, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 1.215089344804765, |
|
"grad_norm": 1.250653862953186, |
|
"learning_rate": 3.6539952100031326e-05, |
|
"loss": 1.1326, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 1.2177365982792852, |
|
"grad_norm": 1.1222728490829468, |
|
"learning_rate": 3.632923735869711e-05, |
|
"loss": 1.1575, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.2203838517538055, |
|
"grad_norm": 1.098129153251648, |
|
"learning_rate": 3.611878470587402e-05, |
|
"loss": 1.1357, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 1.2230311052283256, |
|
"grad_norm": 1.2261312007904053, |
|
"learning_rate": 3.5908598176247124e-05, |
|
"loss": 1.075, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 1.225678358702846, |
|
"grad_norm": 1.145168423652649, |
|
"learning_rate": 3.569868179939958e-05, |
|
"loss": 1.1333, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 1.228325612177366, |
|
"grad_norm": 1.1339921951293945, |
|
"learning_rate": 3.5489039599735294e-05, |
|
"loss": 1.0158, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 1.2309728656518861, |
|
"grad_norm": 1.2139281034469604, |
|
"learning_rate": 3.5279675596401846e-05, |
|
"loss": 1.1726, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.2336201191264062, |
|
"grad_norm": 1.2778246402740479, |
|
"learning_rate": 3.5070593803213267e-05, |
|
"loss": 1.182, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 1.2362673726009266, |
|
"grad_norm": 1.2227150201797485, |
|
"learning_rate": 3.4861798228573325e-05, |
|
"loss": 1.0037, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 1.2389146260754467, |
|
"grad_norm": 1.2715504169464111, |
|
"learning_rate": 3.465329287539852e-05, |
|
"loss": 1.21, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 1.241561879549967, |
|
"grad_norm": 1.300766944885254, |
|
"learning_rate": 3.444508174104136e-05, |
|
"loss": 1.1, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 1.244209133024487, |
|
"grad_norm": 1.1540982723236084, |
|
"learning_rate": 3.423716881721375e-05, |
|
"loss": 1.1127, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.2468563864990072, |
|
"grad_norm": 1.4233511686325073, |
|
"learning_rate": 3.402955808991052e-05, |
|
"loss": 1.1692, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 1.2495036399735275, |
|
"grad_norm": 1.2163995504379272, |
|
"learning_rate": 3.382225353933288e-05, |
|
"loss": 1.0856, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 1.2521508934480476, |
|
"grad_norm": 1.2361574172973633, |
|
"learning_rate": 3.3615259139812225e-05, |
|
"loss": 1.2024, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 1.254798146922568, |
|
"grad_norm": 1.0741496086120605, |
|
"learning_rate": 3.340857885973388e-05, |
|
"loss": 1.0447, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 1.257445400397088, |
|
"grad_norm": 1.1579320430755615, |
|
"learning_rate": 3.320221666146107e-05, |
|
"loss": 1.0772, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.2600926538716082, |
|
"grad_norm": 1.2062878608703613, |
|
"learning_rate": 3.299617650125889e-05, |
|
"loss": 1.1011, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 1.2627399073461283, |
|
"grad_norm": 1.2862952947616577, |
|
"learning_rate": 3.279046232921852e-05, |
|
"loss": 1.2596, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 1.2653871608206486, |
|
"grad_norm": 1.2335329055786133, |
|
"learning_rate": 3.2585078089181464e-05, |
|
"loss": 1.2462, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 1.2680344142951687, |
|
"grad_norm": 1.0968290567398071, |
|
"learning_rate": 3.238002771866391e-05, |
|
"loss": 1.0543, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 1.270681667769689, |
|
"grad_norm": 1.06516695022583, |
|
"learning_rate": 3.217531514878136e-05, |
|
"loss": 1.1669, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.2733289212442092, |
|
"grad_norm": 1.1616246700286865, |
|
"learning_rate": 3.1970944304173126e-05, |
|
"loss": 1.2252, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 1.2759761747187293, |
|
"grad_norm": 1.1696902513504028, |
|
"learning_rate": 3.176691910292715e-05, |
|
"loss": 1.2329, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 1.2786234281932494, |
|
"grad_norm": 1.210041880607605, |
|
"learning_rate": 3.156324345650488e-05, |
|
"loss": 1.3271, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 1.2812706816677697, |
|
"grad_norm": 1.0774304866790771, |
|
"learning_rate": 3.1359921269666324e-05, |
|
"loss": 1.0306, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 1.2839179351422898, |
|
"grad_norm": 1.166651725769043, |
|
"learning_rate": 3.1156956440395136e-05, |
|
"loss": 1.021, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.2865651886168101, |
|
"grad_norm": 1.2745511531829834, |
|
"learning_rate": 3.095435285982387e-05, |
|
"loss": 1.1301, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 1.2892124420913302, |
|
"grad_norm": 1.0762966871261597, |
|
"learning_rate": 3.075211441215944e-05, |
|
"loss": 1.0831, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 1.2918596955658503, |
|
"grad_norm": 1.298743486404419, |
|
"learning_rate": 3.055024497460867e-05, |
|
"loss": 1.1705, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 1.2945069490403707, |
|
"grad_norm": 1.243034839630127, |
|
"learning_rate": 3.0348748417303823e-05, |
|
"loss": 1.1282, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 1.2971542025148908, |
|
"grad_norm": 1.2496618032455444, |
|
"learning_rate": 3.0147628603228594e-05, |
|
"loss": 1.0639, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.299801455989411, |
|
"grad_norm": 1.141508936882019, |
|
"learning_rate": 2.9946889388143913e-05, |
|
"loss": 1.1297, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 1.3024487094639312, |
|
"grad_norm": 1.188610553741455, |
|
"learning_rate": 2.974653462051411e-05, |
|
"loss": 1.1628, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 1.3050959629384513, |
|
"grad_norm": 1.1807959079742432, |
|
"learning_rate": 2.9546568141433006e-05, |
|
"loss": 1.0527, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 1.3077432164129714, |
|
"grad_norm": 1.1804313659667969, |
|
"learning_rate": 2.9346993784550474e-05, |
|
"loss": 1.196, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 1.3103904698874917, |
|
"grad_norm": 1.1646931171417236, |
|
"learning_rate": 2.9147815375998766e-05, |
|
"loss": 1.0773, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.3130377233620119, |
|
"grad_norm": 1.4130630493164062, |
|
"learning_rate": 2.8949036734319247e-05, |
|
"loss": 1.2183, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 1.3156849768365322, |
|
"grad_norm": 1.1829743385314941, |
|
"learning_rate": 2.8750661670389135e-05, |
|
"loss": 1.1457, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 1.3183322303110523, |
|
"grad_norm": 1.1480798721313477, |
|
"learning_rate": 2.8552693987348532e-05, |
|
"loss": 1.0502, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 1.3209794837855724, |
|
"grad_norm": 1.1411528587341309, |
|
"learning_rate": 2.835513748052738e-05, |
|
"loss": 1.1938, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 1.3236267372600927, |
|
"grad_norm": 1.1550084352493286, |
|
"learning_rate": 2.815799593737285e-05, |
|
"loss": 1.1577, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.3262739907346128, |
|
"grad_norm": 1.1829745769500732, |
|
"learning_rate": 2.7961273137376566e-05, |
|
"loss": 1.097, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 1.3289212442091332, |
|
"grad_norm": 1.229865312576294, |
|
"learning_rate": 2.7764972852002323e-05, |
|
"loss": 1.0721, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 1.3315684976836533, |
|
"grad_norm": 1.1786168813705444, |
|
"learning_rate": 2.7569098844613616e-05, |
|
"loss": 1.094, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 1.3342157511581734, |
|
"grad_norm": 1.4941198825836182, |
|
"learning_rate": 2.7373654870401634e-05, |
|
"loss": 1.2017, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 1.3368630046326935, |
|
"grad_norm": 1.1714154481887817, |
|
"learning_rate": 2.7178644676313143e-05, |
|
"loss": 0.9992, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.3395102581072138, |
|
"grad_norm": 1.2153651714324951, |
|
"learning_rate": 2.698407200097872e-05, |
|
"loss": 1.1801, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 1.342157511581734, |
|
"grad_norm": 1.2198010683059692, |
|
"learning_rate": 2.6789940574641102e-05, |
|
"loss": 1.0585, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 1.3448047650562542, |
|
"grad_norm": 1.2211023569107056, |
|
"learning_rate": 2.6596254119083656e-05, |
|
"loss": 1.111, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 1.3474520185307743, |
|
"grad_norm": 1.2999107837677002, |
|
"learning_rate": 2.6403016347558894e-05, |
|
"loss": 1.1344, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 1.3500992720052944, |
|
"grad_norm": 1.181583046913147, |
|
"learning_rate": 2.6210230964717513e-05, |
|
"loss": 1.0638, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.3527465254798146, |
|
"grad_norm": 1.1883265972137451, |
|
"learning_rate": 2.6017901666537216e-05, |
|
"loss": 1.0218, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 1.3553937789543349, |
|
"grad_norm": 1.2537999153137207, |
|
"learning_rate": 2.5826032140251943e-05, |
|
"loss": 1.0679, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 1.358041032428855, |
|
"grad_norm": 1.1566420793533325, |
|
"learning_rate": 2.563462606428101e-05, |
|
"loss": 1.116, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 1.3606882859033753, |
|
"grad_norm": 1.1046433448791504, |
|
"learning_rate": 2.5443687108158836e-05, |
|
"loss": 1.0058, |
|
"step": 2056 |
|
}, |
|
{ |
|
"epoch": 1.3633355393778954, |
|
"grad_norm": 1.307966709136963, |
|
"learning_rate": 2.525321893246444e-05, |
|
"loss": 1.2426, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.3659827928524155, |
|
"grad_norm": 1.0436811447143555, |
|
"learning_rate": 2.5063225188751273e-05, |
|
"loss": 1.0737, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 1.3686300463269359, |
|
"grad_norm": 1.0671106576919556, |
|
"learning_rate": 2.4873709519477202e-05, |
|
"loss": 1.083, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 1.371277299801456, |
|
"grad_norm": 1.3584109544754028, |
|
"learning_rate": 2.4684675557934767e-05, |
|
"loss": 1.0333, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 1.3739245532759763, |
|
"grad_norm": 1.180293321609497, |
|
"learning_rate": 2.4496126928181467e-05, |
|
"loss": 1.0714, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 1.3765718067504964, |
|
"grad_norm": 1.102691888809204, |
|
"learning_rate": 2.4308067244970228e-05, |
|
"loss": 1.0386, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.3792190602250165, |
|
"grad_norm": 1.156723976135254, |
|
"learning_rate": 2.4120500113680177e-05, |
|
"loss": 1.0593, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 1.3818663136995366, |
|
"grad_norm": 1.2727686166763306, |
|
"learning_rate": 2.3933429130247538e-05, |
|
"loss": 1.2251, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 1.384513567174057, |
|
"grad_norm": 1.213897466659546, |
|
"learning_rate": 2.3746857881096584e-05, |
|
"loss": 1.0509, |
|
"step": 2092 |
|
}, |
|
{ |
|
"epoch": 1.387160820648577, |
|
"grad_norm": 1.1525429487228394, |
|
"learning_rate": 2.3560789943071033e-05, |
|
"loss": 1.0187, |
|
"step": 2096 |
|
}, |
|
{ |
|
"epoch": 1.3898080741230974, |
|
"grad_norm": 1.1950461864471436, |
|
"learning_rate": 2.3375228883365334e-05, |
|
"loss": 1.0912, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.3924553275976175, |
|
"grad_norm": 1.1531497240066528, |
|
"learning_rate": 2.319017825945633e-05, |
|
"loss": 1.128, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 1.3951025810721376, |
|
"grad_norm": 1.2713518142700195, |
|
"learning_rate": 2.300564161903511e-05, |
|
"loss": 1.0656, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 1.397749834546658, |
|
"grad_norm": 1.1415860652923584, |
|
"learning_rate": 2.282162249993895e-05, |
|
"loss": 1.1084, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 1.400397088021178, |
|
"grad_norm": 1.114864468574524, |
|
"learning_rate": 2.263812443008343e-05, |
|
"loss": 1.0531, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 1.4030443414956983, |
|
"grad_norm": 1.3787562847137451, |
|
"learning_rate": 2.245515092739488e-05, |
|
"loss": 1.072, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.4056915949702184, |
|
"grad_norm": 1.014003872871399, |
|
"learning_rate": 2.2272705499742925e-05, |
|
"loss": 1.0156, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 1.4083388484447386, |
|
"grad_norm": 1.1538441181182861, |
|
"learning_rate": 2.209079164487323e-05, |
|
"loss": 1.0101, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 1.4109861019192587, |
|
"grad_norm": 1.2096091508865356, |
|
"learning_rate": 2.1909412850340394e-05, |
|
"loss": 1.0201, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 1.413633355393779, |
|
"grad_norm": 1.1149653196334839, |
|
"learning_rate": 2.1728572593441133e-05, |
|
"loss": 1.1124, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 1.416280608868299, |
|
"grad_norm": 1.3355867862701416, |
|
"learning_rate": 2.154827434114765e-05, |
|
"loss": 1.1943, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.4189278623428194, |
|
"grad_norm": 1.2160899639129639, |
|
"learning_rate": 2.1368521550041066e-05, |
|
"loss": 1.1481, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 1.4215751158173395, |
|
"grad_norm": 1.163010597229004, |
|
"learning_rate": 2.1189317666245285e-05, |
|
"loss": 1.0703, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 1.4242223692918596, |
|
"grad_norm": 1.1877809762954712, |
|
"learning_rate": 2.1010666125360767e-05, |
|
"loss": 1.1211, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 1.42686962276638, |
|
"grad_norm": 1.4443504810333252, |
|
"learning_rate": 2.083257035239885e-05, |
|
"loss": 1.2918, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 1.4295168762409, |
|
"grad_norm": 1.2549368143081665, |
|
"learning_rate": 2.0655033761715897e-05, |
|
"loss": 1.1117, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.4321641297154202, |
|
"grad_norm": 1.2271883487701416, |
|
"learning_rate": 2.0478059756948002e-05, |
|
"loss": 1.1452, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 1.4348113831899405, |
|
"grad_norm": 1.2357865571975708, |
|
"learning_rate": 2.0301651730945627e-05, |
|
"loss": 1.0594, |
|
"step": 2168 |
|
}, |
|
{ |
|
"epoch": 1.4374586366644606, |
|
"grad_norm": 1.08621346950531, |
|
"learning_rate": 2.0125813065708566e-05, |
|
"loss": 1.0332, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 1.4401058901389807, |
|
"grad_norm": 1.1553773880004883, |
|
"learning_rate": 1.9950547132321183e-05, |
|
"loss": 1.0823, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 1.442753143613501, |
|
"grad_norm": 1.2597051858901978, |
|
"learning_rate": 1.9775857290887757e-05, |
|
"loss": 1.0197, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.4454003970880211, |
|
"grad_norm": 1.2433415651321411, |
|
"learning_rate": 1.9601746890467965e-05, |
|
"loss": 1.0602, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 1.4480476505625415, |
|
"grad_norm": 1.3405801057815552, |
|
"learning_rate": 1.942821926901279e-05, |
|
"loss": 1.1459, |
|
"step": 2188 |
|
}, |
|
{ |
|
"epoch": 1.4506949040370616, |
|
"grad_norm": 1.1183578968048096, |
|
"learning_rate": 1.9255277753300487e-05, |
|
"loss": 1.08, |
|
"step": 2192 |
|
}, |
|
{ |
|
"epoch": 1.4533421575115817, |
|
"grad_norm": 1.011930227279663, |
|
"learning_rate": 1.9082925658872853e-05, |
|
"loss": 1.0511, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 1.4559894109861018, |
|
"grad_norm": 1.1752732992172241, |
|
"learning_rate": 1.8911166289971545e-05, |
|
"loss": 1.0437, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.4586366644606221, |
|
"grad_norm": 1.1920056343078613, |
|
"learning_rate": 1.8740002939474822e-05, |
|
"loss": 1.0756, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 1.4612839179351422, |
|
"grad_norm": 1.1798444986343384, |
|
"learning_rate": 1.856943888883444e-05, |
|
"loss": 1.0473, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 1.4639311714096626, |
|
"grad_norm": 1.4702142477035522, |
|
"learning_rate": 1.8399477408012643e-05, |
|
"loss": 1.0968, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 1.4665784248841827, |
|
"grad_norm": 1.2086206674575806, |
|
"learning_rate": 1.82301217554196e-05, |
|
"loss": 1.0752, |
|
"step": 2216 |
|
}, |
|
{ |
|
"epoch": 1.4692256783587028, |
|
"grad_norm": 1.2675915956497192, |
|
"learning_rate": 1.8061375177850774e-05, |
|
"loss": 1.1505, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.471872931833223, |
|
"grad_norm": 1.1746258735656738, |
|
"learning_rate": 1.7893240910424876e-05, |
|
"loss": 1.0708, |
|
"step": 2224 |
|
}, |
|
{ |
|
"epoch": 1.4745201853077432, |
|
"grad_norm": 1.2071187496185303, |
|
"learning_rate": 1.772572217652163e-05, |
|
"loss": 1.085, |
|
"step": 2228 |
|
}, |
|
{ |
|
"epoch": 1.4771674387822635, |
|
"grad_norm": 1.321071743965149, |
|
"learning_rate": 1.755882218772018e-05, |
|
"loss": 1.1952, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 1.4798146922567836, |
|
"grad_norm": 1.1357455253601074, |
|
"learning_rate": 1.7392544143737355e-05, |
|
"loss": 0.9572, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 1.4824619457313037, |
|
"grad_norm": 1.1780970096588135, |
|
"learning_rate": 1.7226891232366394e-05, |
|
"loss": 0.9885, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.4851091992058238, |
|
"grad_norm": 1.017472505569458, |
|
"learning_rate": 1.7061866629415862e-05, |
|
"loss": 1.0184, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 1.4877564526803442, |
|
"grad_norm": 1.0961604118347168, |
|
"learning_rate": 1.6897473498648765e-05, |
|
"loss": 1.0232, |
|
"step": 2248 |
|
}, |
|
{ |
|
"epoch": 1.4904037061548643, |
|
"grad_norm": 1.187002182006836, |
|
"learning_rate": 1.673371499172174e-05, |
|
"loss": 0.9823, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 1.4930509596293846, |
|
"grad_norm": 1.1367725133895874, |
|
"learning_rate": 1.6570594248124875e-05, |
|
"loss": 1.0288, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 1.4956982131039047, |
|
"grad_norm": 1.129102110862732, |
|
"learning_rate": 1.640811439512136e-05, |
|
"loss": 1.0688, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.4983454665784248, |
|
"grad_norm": 1.1886552572250366, |
|
"learning_rate": 1.6246278547687604e-05, |
|
"loss": 1.0209, |
|
"step": 2264 |
|
}, |
|
{ |
|
"epoch": 1.500992720052945, |
|
"grad_norm": 1.2786222696304321, |
|
"learning_rate": 1.6085089808453408e-05, |
|
"loss": 1.1101, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 1.5036399735274653, |
|
"grad_norm": 1.2403247356414795, |
|
"learning_rate": 1.592455126764264e-05, |
|
"loss": 1.0919, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 1.5062872270019856, |
|
"grad_norm": 1.1364173889160156, |
|
"learning_rate": 1.5764666003013905e-05, |
|
"loss": 1.0854, |
|
"step": 2276 |
|
}, |
|
{ |
|
"epoch": 1.5089344804765057, |
|
"grad_norm": 1.0539426803588867, |
|
"learning_rate": 1.560543707980152e-05, |
|
"loss": 1.014, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.5115817339510258, |
|
"grad_norm": 1.2470543384552002, |
|
"learning_rate": 1.544686755065677e-05, |
|
"loss": 1.0845, |
|
"step": 2284 |
|
}, |
|
{ |
|
"epoch": 1.514228987425546, |
|
"grad_norm": 1.3111423254013062, |
|
"learning_rate": 1.5288960455589447e-05, |
|
"loss": 1.1363, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 1.5168762409000662, |
|
"grad_norm": 1.076616883277893, |
|
"learning_rate": 1.5131718821909435e-05, |
|
"loss": 1.0104, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 1.5195234943745863, |
|
"grad_norm": 1.082895040512085, |
|
"learning_rate": 1.4975145664168839e-05, |
|
"loss": 1.0468, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 1.5221707478491067, |
|
"grad_norm": 1.2314468622207642, |
|
"learning_rate": 1.4819243984104015e-05, |
|
"loss": 1.0802, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.5248180013236268, |
|
"grad_norm": 1.7986695766448975, |
|
"learning_rate": 1.4664016770578182e-05, |
|
"loss": 1.0324, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 1.5274652547981469, |
|
"grad_norm": 1.2059293985366821, |
|
"learning_rate": 1.4509466999523985e-05, |
|
"loss": 1.0119, |
|
"step": 2308 |
|
}, |
|
{ |
|
"epoch": 1.530112508272667, |
|
"grad_norm": 1.1547520160675049, |
|
"learning_rate": 1.4355597633886575e-05, |
|
"loss": 1.0348, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 1.5327597617471873, |
|
"grad_norm": 1.1303229331970215, |
|
"learning_rate": 1.4202411623566685e-05, |
|
"loss": 0.9453, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 1.5354070152217076, |
|
"grad_norm": 1.3329232931137085, |
|
"learning_rate": 1.4049911905364128e-05, |
|
"loss": 1.1958, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.5380542686962277, |
|
"grad_norm": 1.2855108976364136, |
|
"learning_rate": 1.3898101402921516e-05, |
|
"loss": 1.1197, |
|
"step": 2324 |
|
}, |
|
{ |
|
"epoch": 1.5407015221707479, |
|
"grad_norm": 1.1098500490188599, |
|
"learning_rate": 1.3746983026668198e-05, |
|
"loss": 1.0392, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 1.543348775645268, |
|
"grad_norm": 1.232391119003296, |
|
"learning_rate": 1.359655967376442e-05, |
|
"loss": 1.0877, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 1.545996029119788, |
|
"grad_norm": 1.2778176069259644, |
|
"learning_rate": 1.3446834228045812e-05, |
|
"loss": 1.0646, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 1.5486432825943084, |
|
"grad_norm": 1.0760436058044434, |
|
"learning_rate": 1.3297809559968133e-05, |
|
"loss": 1.0476, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.5512905360688287, |
|
"grad_norm": 1.0470277070999146, |
|
"learning_rate": 1.3149488526552201e-05, |
|
"loss": 0.9706, |
|
"step": 2344 |
|
}, |
|
{ |
|
"epoch": 1.5539377895433488, |
|
"grad_norm": 1.3804305791854858, |
|
"learning_rate": 1.3001873971329121e-05, |
|
"loss": 1.0437, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 1.556585043017869, |
|
"grad_norm": 1.1428264379501343, |
|
"learning_rate": 1.2854968724285754e-05, |
|
"loss": 1.0923, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 1.559232296492389, |
|
"grad_norm": 1.1798884868621826, |
|
"learning_rate": 1.270877560181054e-05, |
|
"loss": 1.1306, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 1.5618795499669094, |
|
"grad_norm": 1.1382559537887573, |
|
"learning_rate": 1.2563297406639395e-05, |
|
"loss": 1.1029, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.5645268034414295, |
|
"grad_norm": 1.0915166139602661, |
|
"learning_rate": 1.2418536927802094e-05, |
|
"loss": 0.9779, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 1.5671740569159498, |
|
"grad_norm": 1.1595373153686523, |
|
"learning_rate": 1.2274496940568664e-05, |
|
"loss": 1.1744, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 1.56982131039047, |
|
"grad_norm": 1.1752400398254395, |
|
"learning_rate": 1.213118020639633e-05, |
|
"loss": 1.0246, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 1.57246856386499, |
|
"grad_norm": 1.064510464668274, |
|
"learning_rate": 1.1988589472876438e-05, |
|
"loss": 1.1571, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 1.5751158173395101, |
|
"grad_norm": 1.2771798372268677, |
|
"learning_rate": 1.184672747368189e-05, |
|
"loss": 1.0656, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.5777630708140304, |
|
"grad_norm": 1.2218413352966309, |
|
"learning_rate": 1.1705596928514645e-05, |
|
"loss": 1.0626, |
|
"step": 2384 |
|
}, |
|
{ |
|
"epoch": 1.5804103242885508, |
|
"grad_norm": 1.0653800964355469, |
|
"learning_rate": 1.1565200543053623e-05, |
|
"loss": 1.0626, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 1.5830575777630709, |
|
"grad_norm": 1.2271225452423096, |
|
"learning_rate": 1.1425541008902851e-05, |
|
"loss": 1.1017, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 1.585704831237591, |
|
"grad_norm": 1.1287221908569336, |
|
"learning_rate": 1.128662100353985e-05, |
|
"loss": 0.9612, |
|
"step": 2396 |
|
}, |
|
{ |
|
"epoch": 1.588352084712111, |
|
"grad_norm": 1.1722044944763184, |
|
"learning_rate": 1.1148443190264246e-05, |
|
"loss": 0.9906, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.5909993381866314, |
|
"grad_norm": 1.3099933862686157, |
|
"learning_rate": 1.1011010218146777e-05, |
|
"loss": 1.0637, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 1.5936465916611515, |
|
"grad_norm": 1.1737853288650513, |
|
"learning_rate": 1.0874324721978501e-05, |
|
"loss": 1.082, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 1.5962938451356719, |
|
"grad_norm": 1.258298635482788, |
|
"learning_rate": 1.0738389322220276e-05, |
|
"loss": 1.0151, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 1.598941098610192, |
|
"grad_norm": 1.198495864868164, |
|
"learning_rate": 1.0603206624952482e-05, |
|
"loss": 1.0566, |
|
"step": 2416 |
|
}, |
|
{ |
|
"epoch": 1.601588352084712, |
|
"grad_norm": 1.1976563930511475, |
|
"learning_rate": 1.0468779221825103e-05, |
|
"loss": 1.1149, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.6042356055592322, |
|
"grad_norm": 1.0899832248687744, |
|
"learning_rate": 1.0335109690008055e-05, |
|
"loss": 1.0187, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 1.6068828590337525, |
|
"grad_norm": 1.3058562278747559, |
|
"learning_rate": 1.0202200592141703e-05, |
|
"loss": 1.1494, |
|
"step": 2428 |
|
}, |
|
{ |
|
"epoch": 1.6095301125082728, |
|
"grad_norm": 1.304995059967041, |
|
"learning_rate": 1.0070054476287849e-05, |
|
"loss": 1.1067, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 1.612177365982793, |
|
"grad_norm": 1.2065619230270386, |
|
"learning_rate": 9.938673875880755e-06, |
|
"loss": 1.03, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 1.614824619457313, |
|
"grad_norm": 1.3018181324005127, |
|
"learning_rate": 9.808061309678634e-06, |
|
"loss": 1.1286, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.6174718729318331, |
|
"grad_norm": 1.257094144821167, |
|
"learning_rate": 9.678219281715412e-06, |
|
"loss": 1.2452, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 1.6201191264063532, |
|
"grad_norm": 1.1389868259429932, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.1589, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 1.6227663798808736, |
|
"grad_norm": 1.2208179235458374, |
|
"learning_rate": 9.420856782731774e-06, |
|
"loss": 1.0969, |
|
"step": 2452 |
|
}, |
|
{ |
|
"epoch": 1.625413633355394, |
|
"grad_norm": 1.2272435426712036, |
|
"learning_rate": 9.293341245726794e-06, |
|
"loss": 0.9552, |
|
"step": 2456 |
|
}, |
|
{ |
|
"epoch": 1.628060886829914, |
|
"grad_norm": 1.1400785446166992, |
|
"learning_rate": 9.16660611489702e-06, |
|
"loss": 0.9583, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.6307081403044341, |
|
"grad_norm": 1.1277272701263428, |
|
"learning_rate": 9.040653819940259e-06, |
|
"loss": 1.0511, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 1.6333553937789542, |
|
"grad_norm": 1.1486189365386963, |
|
"learning_rate": 8.915486775546173e-06, |
|
"loss": 0.9686, |
|
"step": 2468 |
|
}, |
|
{ |
|
"epoch": 1.6360026472534746, |
|
"grad_norm": 1.1076239347457886, |
|
"learning_rate": 8.791107381350027e-06, |
|
"loss": 0.9773, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 1.6386499007279947, |
|
"grad_norm": 1.0638751983642578, |
|
"learning_rate": 8.6675180218867e-06, |
|
"loss": 1.0176, |
|
"step": 2476 |
|
}, |
|
{ |
|
"epoch": 1.641297154202515, |
|
"grad_norm": 1.201035499572754, |
|
"learning_rate": 8.544721066544964e-06, |
|
"loss": 1.0009, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.643944407677035, |
|
"grad_norm": 1.2673206329345703, |
|
"learning_rate": 8.422718869522006e-06, |
|
"loss": 1.1548, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 1.6465916611515552, |
|
"grad_norm": 1.1903181076049805, |
|
"learning_rate": 8.30151376977834e-06, |
|
"loss": 1.0678, |
|
"step": 2488 |
|
}, |
|
{ |
|
"epoch": 1.6492389146260753, |
|
"grad_norm": 1.1597754955291748, |
|
"learning_rate": 8.181108090993001e-06, |
|
"loss": 1.0756, |
|
"step": 2492 |
|
}, |
|
{ |
|
"epoch": 1.6518861681005956, |
|
"grad_norm": 1.142747163772583, |
|
"learning_rate": 8.061504141518888e-06, |
|
"loss": 1.1026, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 1.654533421575116, |
|
"grad_norm": 1.187888741493225, |
|
"learning_rate": 7.942704214338648e-06, |
|
"loss": 1.0138, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.657180675049636, |
|
"grad_norm": 1.1005282402038574, |
|
"learning_rate": 7.824710587020596e-06, |
|
"loss": 1.015, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 1.6598279285241562, |
|
"grad_norm": 1.2265509366989136, |
|
"learning_rate": 7.707525521675097e-06, |
|
"loss": 1.3109, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 1.6624751819986763, |
|
"grad_norm": 1.1046435832977295, |
|
"learning_rate": 7.591151264911239e-06, |
|
"loss": 1.0726, |
|
"step": 2512 |
|
}, |
|
{ |
|
"epoch": 1.6651224354731966, |
|
"grad_norm": 1.1124870777130127, |
|
"learning_rate": 7.475590047793712e-06, |
|
"loss": 1.0319, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 1.6677696889477167, |
|
"grad_norm": 1.0768115520477295, |
|
"learning_rate": 7.360844085800023e-06, |
|
"loss": 0.9718, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.670416942422237, |
|
"grad_norm": 1.1033765077590942, |
|
"learning_rate": 7.246915578778046e-06, |
|
"loss": 0.9838, |
|
"step": 2524 |
|
}, |
|
{ |
|
"epoch": 1.6730641958967571, |
|
"grad_norm": 1.1125131845474243, |
|
"learning_rate": 7.133806710903884e-06, |
|
"loss": 0.9366, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 1.6757114493712773, |
|
"grad_norm": 1.0644124746322632, |
|
"learning_rate": 7.0215196506399515e-06, |
|
"loss": 0.9442, |
|
"step": 2532 |
|
}, |
|
{ |
|
"epoch": 1.6783587028457974, |
|
"grad_norm": 1.4144614934921265, |
|
"learning_rate": 6.910056550693356e-06, |
|
"loss": 1.0511, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 1.6810059563203177, |
|
"grad_norm": 1.1880645751953125, |
|
"learning_rate": 6.799419547974739e-06, |
|
"loss": 1.069, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.683653209794838, |
|
"grad_norm": 1.2131253480911255, |
|
"learning_rate": 6.6896107635572414e-06, |
|
"loss": 1.11, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 1.6863004632693581, |
|
"grad_norm": 1.1012145280838013, |
|
"learning_rate": 6.580632302635831e-06, |
|
"loss": 1.0216, |
|
"step": 2548 |
|
}, |
|
{ |
|
"epoch": 1.6889477167438782, |
|
"grad_norm": 1.4158655405044556, |
|
"learning_rate": 6.472486254486954e-06, |
|
"loss": 0.989, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 1.6915949702183983, |
|
"grad_norm": 1.168895959854126, |
|
"learning_rate": 6.36517469242851e-06, |
|
"loss": 1.1558, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 1.6942422236929184, |
|
"grad_norm": 1.180389642715454, |
|
"learning_rate": 6.258699673780083e-06, |
|
"loss": 1.0815, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.6968894771674388, |
|
"grad_norm": 1.186112642288208, |
|
"learning_rate": 6.15306323982347e-06, |
|
"loss": 1.0766, |
|
"step": 2564 |
|
}, |
|
{ |
|
"epoch": 1.699536730641959, |
|
"grad_norm": 1.3972220420837402, |
|
"learning_rate": 6.04826741576357e-06, |
|
"loss": 0.933, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 1.7021839841164792, |
|
"grad_norm": 1.0709800720214844, |
|
"learning_rate": 5.944314210689611e-06, |
|
"loss": 0.9295, |
|
"step": 2572 |
|
}, |
|
{ |
|
"epoch": 1.7048312375909993, |
|
"grad_norm": 1.131684422492981, |
|
"learning_rate": 5.841205617536516e-06, |
|
"loss": 1.0127, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 1.7074784910655194, |
|
"grad_norm": 1.1289499998092651, |
|
"learning_rate": 5.738943613046821e-06, |
|
"loss": 1.0566, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.7101257445400397, |
|
"grad_norm": 1.0850427150726318, |
|
"learning_rate": 5.637530157732673e-06, |
|
"loss": 0.929, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 1.7127729980145598, |
|
"grad_norm": 1.3074991703033447, |
|
"learning_rate": 5.536967195838333e-06, |
|
"loss": 1.1549, |
|
"step": 2588 |
|
}, |
|
{ |
|
"epoch": 1.7154202514890802, |
|
"grad_norm": 1.286634922027588, |
|
"learning_rate": 5.437256655302814e-06, |
|
"loss": 1.0361, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 1.7180675049636003, |
|
"grad_norm": 1.098363995552063, |
|
"learning_rate": 5.338400447723008e-06, |
|
"loss": 1.0157, |
|
"step": 2596 |
|
}, |
|
{ |
|
"epoch": 1.7207147584381204, |
|
"grad_norm": 1.2663805484771729, |
|
"learning_rate": 5.240400468316975e-06, |
|
"loss": 1.0805, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.7233620119126405, |
|
"grad_norm": 1.2380725145339966, |
|
"learning_rate": 5.143258595887607e-06, |
|
"loss": 1.0504, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 1.7260092653871608, |
|
"grad_norm": 1.534725546836853, |
|
"learning_rate": 5.046976692786665e-06, |
|
"loss": 1.0683, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 1.7286565188616811, |
|
"grad_norm": 1.2903854846954346, |
|
"learning_rate": 4.951556604879048e-06, |
|
"loss": 1.1924, |
|
"step": 2612 |
|
}, |
|
{ |
|
"epoch": 1.7313037723362013, |
|
"grad_norm": 1.378965139389038, |
|
"learning_rate": 4.857000161507353e-06, |
|
"loss": 1.1261, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 1.7339510258107214, |
|
"grad_norm": 1.3099424839019775, |
|
"learning_rate": 4.763309175456876e-06, |
|
"loss": 1.1385, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.7365982792852415, |
|
"grad_norm": 1.1315497159957886, |
|
"learning_rate": 4.67048544292083e-06, |
|
"loss": 1.0022, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 1.7392455327597618, |
|
"grad_norm": 1.0618172883987427, |
|
"learning_rate": 4.5785307434659195e-06, |
|
"loss": 0.933, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 1.741892786234282, |
|
"grad_norm": 1.1535784006118774, |
|
"learning_rate": 4.487446839998194e-06, |
|
"loss": 1.0693, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 1.7445400397088022, |
|
"grad_norm": 1.208883285522461, |
|
"learning_rate": 4.397235478729262e-06, |
|
"loss": 1.0487, |
|
"step": 2636 |
|
}, |
|
{ |
|
"epoch": 1.7471872931833223, |
|
"grad_norm": 1.079362392425537, |
|
"learning_rate": 4.307898389142867e-06, |
|
"loss": 1.0225, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.7498345466578424, |
|
"grad_norm": 1.1642612218856812, |
|
"learning_rate": 4.21943728396163e-06, |
|
"loss": 1.0915, |
|
"step": 2644 |
|
}, |
|
{ |
|
"epoch": 1.7524818001323625, |
|
"grad_norm": 1.202144742012024, |
|
"learning_rate": 4.1318538591143204e-06, |
|
"loss": 0.9903, |
|
"step": 2648 |
|
}, |
|
{ |
|
"epoch": 1.7551290536068829, |
|
"grad_norm": 1.182325839996338, |
|
"learning_rate": 4.045149793703257e-06, |
|
"loss": 1.0321, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 1.7577763070814032, |
|
"grad_norm": 1.1768420934677124, |
|
"learning_rate": 3.959326749972159e-06, |
|
"loss": 1.0065, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 1.7604235605559233, |
|
"grad_norm": 1.1037213802337646, |
|
"learning_rate": 3.8743863732742855e-06, |
|
"loss": 1.0145, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.7630708140304434, |
|
"grad_norm": 1.0442618131637573, |
|
"learning_rate": 3.790330292040878e-06, |
|
"loss": 0.9401, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 1.7657180675049635, |
|
"grad_norm": 1.2205618619918823, |
|
"learning_rate": 3.7071601177499193e-06, |
|
"loss": 1.0445, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 1.7683653209794836, |
|
"grad_norm": 0.982466995716095, |
|
"learning_rate": 3.6248774448952695e-06, |
|
"loss": 0.9302, |
|
"step": 2672 |
|
}, |
|
{ |
|
"epoch": 1.771012574454004, |
|
"grad_norm": 1.2503985166549683, |
|
"learning_rate": 3.5434838509560974e-06, |
|
"loss": 0.9465, |
|
"step": 2676 |
|
}, |
|
{ |
|
"epoch": 1.7736598279285243, |
|
"grad_norm": 1.2538197040557861, |
|
"learning_rate": 3.4629808963666355e-06, |
|
"loss": 1.1634, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.7763070814030444, |
|
"grad_norm": 1.1053706407546997, |
|
"learning_rate": 3.3833701244862347e-06, |
|
"loss": 0.9964, |
|
"step": 2684 |
|
}, |
|
{ |
|
"epoch": 1.7789543348775645, |
|
"grad_norm": 1.2324868440628052, |
|
"learning_rate": 3.304653061569807e-06, |
|
"loss": 1.009, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 1.7816015883520846, |
|
"grad_norm": 1.1064050197601318, |
|
"learning_rate": 3.226831216738568e-06, |
|
"loss": 0.9975, |
|
"step": 2692 |
|
}, |
|
{ |
|
"epoch": 1.784248841826605, |
|
"grad_norm": 1.1996777057647705, |
|
"learning_rate": 3.149906081951076e-06, |
|
"loss": 1.1181, |
|
"step": 2696 |
|
}, |
|
{ |
|
"epoch": 1.786896095301125, |
|
"grad_norm": 1.0701042413711548, |
|
"learning_rate": 3.0738791319746606e-06, |
|
"loss": 0.9735, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.7895433487756454, |
|
"grad_norm": 1.426613211631775, |
|
"learning_rate": 2.9987518243571266e-06, |
|
"loss": 1.0882, |
|
"step": 2704 |
|
}, |
|
{ |
|
"epoch": 1.7921906022501655, |
|
"grad_norm": 1.1900283098220825, |
|
"learning_rate": 2.924525599398831e-06, |
|
"loss": 1.0896, |
|
"step": 2708 |
|
}, |
|
{ |
|
"epoch": 1.7948378557246856, |
|
"grad_norm": 1.203924536705017, |
|
"learning_rate": 2.8512018801250428e-06, |
|
"loss": 1.0041, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 1.7974851091992057, |
|
"grad_norm": 1.1849395036697388, |
|
"learning_rate": 2.7787820722586844e-06, |
|
"loss": 1.018, |
|
"step": 2716 |
|
}, |
|
{ |
|
"epoch": 1.800132362673726, |
|
"grad_norm": 1.3121761083602905, |
|
"learning_rate": 2.707267564193383e-06, |
|
"loss": 1.0887, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.8027796161482463, |
|
"grad_norm": 1.0863194465637207, |
|
"learning_rate": 2.636659726966817e-06, |
|
"loss": 0.9601, |
|
"step": 2724 |
|
}, |
|
{ |
|
"epoch": 1.8054268696227664, |
|
"grad_norm": 1.2052465677261353, |
|
"learning_rate": 2.5669599142344958e-06, |
|
"loss": 1.1252, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 1.8080741230972865, |
|
"grad_norm": 1.2324072122573853, |
|
"learning_rate": 2.4981694622437545e-06, |
|
"loss": 1.0962, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 1.8107213765718067, |
|
"grad_norm": 1.1981109380722046, |
|
"learning_rate": 2.4302896898081516e-06, |
|
"loss": 1.1382, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 1.813368630046327, |
|
"grad_norm": 1.0790292024612427, |
|
"learning_rate": 2.3633218982821724e-06, |
|
"loss": 1.0246, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.816015883520847, |
|
"grad_norm": 1.188328504562378, |
|
"learning_rate": 2.2972673715363268e-06, |
|
"loss": 1.1037, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 1.8186631369953674, |
|
"grad_norm": 2.650550365447998, |
|
"learning_rate": 2.232127375932491e-06, |
|
"loss": 0.9985, |
|
"step": 2748 |
|
}, |
|
{ |
|
"epoch": 1.8213103904698875, |
|
"grad_norm": 1.209547758102417, |
|
"learning_rate": 2.1679031602996168e-06, |
|
"loss": 1.0379, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 1.8239576439444076, |
|
"grad_norm": 1.2373130321502686, |
|
"learning_rate": 2.104595955909844e-06, |
|
"loss": 1.1138, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 1.8266048974189277, |
|
"grad_norm": 1.1303315162658691, |
|
"learning_rate": 2.042206976454869e-06, |
|
"loss": 1.0872, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.829252150893448, |
|
"grad_norm": 1.1631232500076294, |
|
"learning_rate": 1.980737418022649e-06, |
|
"loss": 0.9993, |
|
"step": 2764 |
|
}, |
|
{ |
|
"epoch": 1.8318994043679684, |
|
"grad_norm": 0.9920935034751892, |
|
"learning_rate": 1.9201884590745122e-06, |
|
"loss": 0.9902, |
|
"step": 2768 |
|
}, |
|
{ |
|
"epoch": 1.8345466578424885, |
|
"grad_norm": 1.1404036283493042, |
|
"learning_rate": 1.8605612604225387e-06, |
|
"loss": 0.9403, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 1.8371939113170086, |
|
"grad_norm": 1.3009891510009766, |
|
"learning_rate": 1.8018569652073381e-06, |
|
"loss": 1.065, |
|
"step": 2776 |
|
}, |
|
{ |
|
"epoch": 1.8398411647915287, |
|
"grad_norm": 1.0856890678405762, |
|
"learning_rate": 1.7440766988760793e-06, |
|
"loss": 1.0082, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.8424884182660488, |
|
"grad_norm": 1.2409597635269165, |
|
"learning_rate": 1.6872215691609684e-06, |
|
"loss": 1.2227, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 1.8451356717405691, |
|
"grad_norm": 1.229095458984375, |
|
"learning_rate": 1.631292666057982e-06, |
|
"loss": 1.1196, |
|
"step": 2788 |
|
}, |
|
{ |
|
"epoch": 1.8477829252150895, |
|
"grad_norm": 1.1981017589569092, |
|
"learning_rate": 1.5762910618059789e-06, |
|
"loss": 1.1182, |
|
"step": 2792 |
|
}, |
|
{ |
|
"epoch": 1.8504301786896096, |
|
"grad_norm": 1.2496317625045776, |
|
"learning_rate": 1.5222178108661444e-06, |
|
"loss": 1.011, |
|
"step": 2796 |
|
}, |
|
{ |
|
"epoch": 1.8530774321641297, |
|
"grad_norm": 1.3405871391296387, |
|
"learning_rate": 1.469073949901778e-06, |
|
"loss": 0.9571, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.8557246856386498, |
|
"grad_norm": 1.1392794847488403, |
|
"learning_rate": 1.4168604977583989e-06, |
|
"loss": 0.9235, |
|
"step": 2804 |
|
}, |
|
{ |
|
"epoch": 1.8583719391131701, |
|
"grad_norm": 1.3417925834655762, |
|
"learning_rate": 1.3655784554442385e-06, |
|
"loss": 0.9861, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 1.8610191925876902, |
|
"grad_norm": 1.2177116870880127, |
|
"learning_rate": 1.3152288061110518e-06, |
|
"loss": 1.0414, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 1.8636664460622105, |
|
"grad_norm": 1.18758225440979, |
|
"learning_rate": 1.2658125150352361e-06, |
|
"loss": 1.0958, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 1.8663136995367307, |
|
"grad_norm": 1.068544864654541, |
|
"learning_rate": 1.2173305295993477e-06, |
|
"loss": 0.8817, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.8689609530112508, |
|
"grad_norm": 1.0975282192230225, |
|
"learning_rate": 1.169783779273953e-06, |
|
"loss": 0.9843, |
|
"step": 2824 |
|
}, |
|
{ |
|
"epoch": 1.8716082064857709, |
|
"grad_norm": 1.1519986391067505, |
|
"learning_rate": 1.1231731755997954e-06, |
|
"loss": 1.1748, |
|
"step": 2828 |
|
}, |
|
{ |
|
"epoch": 1.8742554599602912, |
|
"grad_norm": 1.3243839740753174, |
|
"learning_rate": 1.0774996121702908e-06, |
|
"loss": 1.0024, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 1.8769027134348115, |
|
"grad_norm": 1.1130131483078003, |
|
"learning_rate": 1.0327639646144415e-06, |
|
"loss": 0.9669, |
|
"step": 2836 |
|
}, |
|
{ |
|
"epoch": 1.8795499669093316, |
|
"grad_norm": 1.2060186862945557, |
|
"learning_rate": 9.889670905800397e-07, |
|
"loss": 0.9385, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.8821972203838517, |
|
"grad_norm": 1.1549471616744995, |
|
"learning_rate": 9.461098297172011e-07, |
|
"loss": 0.9559, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 1.8848444738583718, |
|
"grad_norm": 1.1581448316574097, |
|
"learning_rate": 9.041930036622903e-07, |
|
"loss": 1.069, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 1.8874917273328922, |
|
"grad_norm": 1.1043188571929932, |
|
"learning_rate": 8.632174160221496e-07, |
|
"loss": 1.0042, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 1.8901389808074123, |
|
"grad_norm": 1.1459840536117554, |
|
"learning_rate": 8.231838523587277e-07, |
|
"loss": 0.9267, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 1.8927862342819326, |
|
"grad_norm": 1.2066096067428589, |
|
"learning_rate": 7.840930801739754e-07, |
|
"loss": 1.0465, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.8954334877564527, |
|
"grad_norm": 1.2505649328231812, |
|
"learning_rate": 7.459458488951632e-07, |
|
"loss": 1.0685, |
|
"step": 2864 |
|
}, |
|
{ |
|
"epoch": 1.8980807412309728, |
|
"grad_norm": 1.138899564743042, |
|
"learning_rate": 7.087428898604975e-07, |
|
"loss": 1.0052, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 1.900727994705493, |
|
"grad_norm": 1.1179523468017578, |
|
"learning_rate": 6.724849163050995e-07, |
|
"loss": 0.9854, |
|
"step": 2872 |
|
}, |
|
{ |
|
"epoch": 1.9033752481800132, |
|
"grad_norm": 1.3499395847320557, |
|
"learning_rate": 6.37172623347354e-07, |
|
"loss": 1.0413, |
|
"step": 2876 |
|
}, |
|
{ |
|
"epoch": 1.9060225016545336, |
|
"grad_norm": 1.0739634037017822, |
|
"learning_rate": 6.02806687975549e-07, |
|
"loss": 1.1554, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.9086697551290537, |
|
"grad_norm": 1.0829598903656006, |
|
"learning_rate": 5.693877690349292e-07, |
|
"loss": 1.0416, |
|
"step": 2884 |
|
}, |
|
{ |
|
"epoch": 1.9113170086035738, |
|
"grad_norm": 1.0071786642074585, |
|
"learning_rate": 5.369165072150239e-07, |
|
"loss": 0.929, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 1.913964262078094, |
|
"grad_norm": 1.1580030918121338, |
|
"learning_rate": 5.053935250374176e-07, |
|
"loss": 1.0629, |
|
"step": 2892 |
|
}, |
|
{ |
|
"epoch": 1.916611515552614, |
|
"grad_norm": 1.2572953701019287, |
|
"learning_rate": 4.7481942684378113e-07, |
|
"loss": 1.1105, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 1.9192587690271343, |
|
"grad_norm": 1.1861546039581299, |
|
"learning_rate": 4.451947987842764e-07, |
|
"loss": 1.0511, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.9219060225016547, |
|
"grad_norm": 1.1360516548156738, |
|
"learning_rate": 4.165202088063425e-07, |
|
"loss": 1.0623, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 1.9245532759761748, |
|
"grad_norm": 1.1186720132827759, |
|
"learning_rate": 3.8879620664381e-07, |
|
"loss": 0.9999, |
|
"step": 2908 |
|
}, |
|
{ |
|
"epoch": 1.9272005294506949, |
|
"grad_norm": 1.2490679025650024, |
|
"learning_rate": 3.620233238063375e-07, |
|
"loss": 1.0442, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 1.929847782925215, |
|
"grad_norm": 1.309167504310608, |
|
"learning_rate": 3.362020735692417e-07, |
|
"loss": 1.1706, |
|
"step": 2916 |
|
}, |
|
{ |
|
"epoch": 1.9324950363997353, |
|
"grad_norm": 1.1864930391311646, |
|
"learning_rate": 3.1133295096364977e-07, |
|
"loss": 1.0731, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.9351422898742554, |
|
"grad_norm": 1.1746701002120972, |
|
"learning_rate": 2.87416432767007e-07, |
|
"loss": 1.0544, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 1.9377895433487757, |
|
"grad_norm": 1.272407054901123, |
|
"learning_rate": 2.644529774939397e-07, |
|
"loss": 1.0909, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 1.9404367968232958, |
|
"grad_norm": 1.1303869485855103, |
|
"learning_rate": 2.4244302538746766e-07, |
|
"loss": 0.9551, |
|
"step": 2932 |
|
}, |
|
{ |
|
"epoch": 1.943084050297816, |
|
"grad_norm": 1.0882586240768433, |
|
"learning_rate": 2.2138699841056655e-07, |
|
"loss": 0.9893, |
|
"step": 2936 |
|
}, |
|
{ |
|
"epoch": 1.945731303772336, |
|
"grad_norm": 1.2608906030654907, |
|
"learning_rate": 2.012853002380466e-07, |
|
"loss": 1.0569, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.9483785572468564, |
|
"grad_norm": 1.2106075286865234, |
|
"learning_rate": 1.8213831624887545e-07, |
|
"loss": 0.9922, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 1.9510258107213767, |
|
"grad_norm": 1.1815046072006226, |
|
"learning_rate": 1.6394641351872297e-07, |
|
"loss": 1.0113, |
|
"step": 2948 |
|
}, |
|
{ |
|
"epoch": 1.9536730641958968, |
|
"grad_norm": 1.1953189373016357, |
|
"learning_rate": 1.4670994081297795e-07, |
|
"loss": 1.0361, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 1.956320317670417, |
|
"grad_norm": 1.0204826593399048, |
|
"learning_rate": 1.3042922858002015e-07, |
|
"loss": 0.9583, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 1.958967571144937, |
|
"grad_norm": 1.1778640747070312, |
|
"learning_rate": 1.1510458894490871e-07, |
|
"loss": 1.0795, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.9616148246194574, |
|
"grad_norm": 1.1050951480865479, |
|
"learning_rate": 1.0073631570340358e-07, |
|
"loss": 0.947, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 1.9642620780939775, |
|
"grad_norm": 1.4342139959335327, |
|
"learning_rate": 8.732468431630892e-08, |
|
"loss": 0.9858, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 1.9669093315684978, |
|
"grad_norm": 1.3275805711746216, |
|
"learning_rate": 7.486995190420509e-08, |
|
"loss": 1.0232, |
|
"step": 2972 |
|
}, |
|
{ |
|
"epoch": 1.969556585043018, |
|
"grad_norm": 1.862630844116211, |
|
"learning_rate": 6.337235724254154e-08, |
|
"loss": 1.1036, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 1.972203838517538, |
|
"grad_norm": 1.1249923706054688, |
|
"learning_rate": 5.2832120757007054e-08, |
|
"loss": 1.1517, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.974851091992058, |
|
"grad_norm": 1.4025081396102905, |
|
"learning_rate": 4.324944451934987e-08, |
|
"loss": 1.1827, |
|
"step": 2984 |
|
}, |
|
{ |
|
"epoch": 1.9774983454665784, |
|
"grad_norm": 1.2881486415863037, |
|
"learning_rate": 3.4624512243497386e-08, |
|
"loss": 0.9921, |
|
"step": 2988 |
|
}, |
|
{ |
|
"epoch": 1.9801455989410988, |
|
"grad_norm": 1.256659746170044, |
|
"learning_rate": 2.6957489281997926e-08, |
|
"loss": 1.0058, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 1.9827928524156189, |
|
"grad_norm": 1.2083126306533813, |
|
"learning_rate": 2.0248522622906552e-08, |
|
"loss": 1.0364, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 1.985440105890139, |
|
"grad_norm": 1.2300423383712769, |
|
"learning_rate": 1.4497740886920685e-08, |
|
"loss": 1.056, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.988087359364659, |
|
"grad_norm": 1.1946439743041992, |
|
"learning_rate": 9.70525432493763e-09, |
|
"loss": 1.1, |
|
"step": 3004 |
|
}, |
|
{ |
|
"epoch": 1.9907346128391792, |
|
"grad_norm": 1.1879000663757324, |
|
"learning_rate": 5.8711548159229305e-09, |
|
"loss": 0.9764, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 1.9933818663136995, |
|
"grad_norm": 1.7793687582015991, |
|
"learning_rate": 2.9955158651839845e-09, |
|
"loss": 1.0218, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 1.9960291197882198, |
|
"grad_norm": 1.2599058151245117, |
|
"learning_rate": 1.0783926029211966e-09, |
|
"loss": 1.0414, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 1.99867637326274, |
|
"grad_norm": 1.2598057985305786, |
|
"learning_rate": 1.1982178318437066e-10, |
|
"loss": 1.1198, |
|
"step": 3020 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 3022, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1511, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.115408240900833e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|