|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 4955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020181634712411706, |
|
"grad_norm": 61.64529800415039, |
|
"learning_rate": 2.0161290322580646e-06, |
|
"loss": 7.4781, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004036326942482341, |
|
"grad_norm": 12.924609184265137, |
|
"learning_rate": 4.032258064516129e-06, |
|
"loss": 5.3628, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006054490413723511, |
|
"grad_norm": 8.111140251159668, |
|
"learning_rate": 6.048387096774194e-06, |
|
"loss": 4.5188, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008072653884964682, |
|
"grad_norm": 8.318406105041504, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 4.2211, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.010090817356205853, |
|
"grad_norm": 4.871032238006592, |
|
"learning_rate": 1.0080645161290323e-05, |
|
"loss": 3.844, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012108980827447022, |
|
"grad_norm": 4.057001113891602, |
|
"learning_rate": 1.2096774193548388e-05, |
|
"loss": 3.5377, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.014127144298688193, |
|
"grad_norm": 3.9526875019073486, |
|
"learning_rate": 1.4112903225806454e-05, |
|
"loss": 3.203, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.016145307769929364, |
|
"grad_norm": 3.171088695526123, |
|
"learning_rate": 1.6129032258064517e-05, |
|
"loss": 3.0832, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.018163471241170535, |
|
"grad_norm": 3.63295841217041, |
|
"learning_rate": 1.8145161290322583e-05, |
|
"loss": 2.9575, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.020181634712411706, |
|
"grad_norm": 2.8197014331817627, |
|
"learning_rate": 2.0161290322580645e-05, |
|
"loss": 2.831, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.022199798183652877, |
|
"grad_norm": 3.540992259979248, |
|
"learning_rate": 2.217741935483871e-05, |
|
"loss": 2.6876, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.024217961654894045, |
|
"grad_norm": 2.642745018005371, |
|
"learning_rate": 2.4193548387096777e-05, |
|
"loss": 2.6302, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.026236125126135216, |
|
"grad_norm": 3.0355093479156494, |
|
"learning_rate": 2.620967741935484e-05, |
|
"loss": 2.5268, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.028254288597376387, |
|
"grad_norm": 2.7031023502349854, |
|
"learning_rate": 2.822580645161291e-05, |
|
"loss": 2.5202, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.030272452068617558, |
|
"grad_norm": 3.2656476497650146, |
|
"learning_rate": 3.024193548387097e-05, |
|
"loss": 2.3902, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03229061553985873, |
|
"grad_norm": 2.836116075515747, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 2.4072, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.034308779011099896, |
|
"grad_norm": 2.732985496520996, |
|
"learning_rate": 3.427419354838709e-05, |
|
"loss": 2.3159, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03632694248234107, |
|
"grad_norm": 2.5948660373687744, |
|
"learning_rate": 3.6290322580645165e-05, |
|
"loss": 2.2847, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03834510595358224, |
|
"grad_norm": 2.968618869781494, |
|
"learning_rate": 3.8306451612903224e-05, |
|
"loss": 2.3511, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04036326942482341, |
|
"grad_norm": 4.173515319824219, |
|
"learning_rate": 4.032258064516129e-05, |
|
"loss": 2.341, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04238143289606458, |
|
"grad_norm": 2.294019937515259, |
|
"learning_rate": 4.2338709677419356e-05, |
|
"loss": 2.2497, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.044399596367305755, |
|
"grad_norm": 2.3338685035705566, |
|
"learning_rate": 4.435483870967742e-05, |
|
"loss": 2.2591, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04641775983854692, |
|
"grad_norm": 2.389831066131592, |
|
"learning_rate": 4.637096774193548e-05, |
|
"loss": 2.2448, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04843592330978809, |
|
"grad_norm": 2.5201873779296875, |
|
"learning_rate": 4.8387096774193554e-05, |
|
"loss": 2.272, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.050454086781029264, |
|
"grad_norm": 3.078508138656616, |
|
"learning_rate": 5.040322580645161e-05, |
|
"loss": 2.1645, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05247225025227043, |
|
"grad_norm": 2.0750768184661865, |
|
"learning_rate": 5.241935483870968e-05, |
|
"loss": 2.2286, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.054490413723511606, |
|
"grad_norm": 2.5442333221435547, |
|
"learning_rate": 5.443548387096774e-05, |
|
"loss": 2.2015, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.056508577194752774, |
|
"grad_norm": 2.4685232639312744, |
|
"learning_rate": 5.645161290322582e-05, |
|
"loss": 2.1876, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05852674066599395, |
|
"grad_norm": 2.6516835689544678, |
|
"learning_rate": 5.8467741935483876e-05, |
|
"loss": 2.0738, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.060544904137235116, |
|
"grad_norm": 2.234955310821533, |
|
"learning_rate": 6.048387096774194e-05, |
|
"loss": 2.1074, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06256306760847628, |
|
"grad_norm": 2.285836935043335, |
|
"learning_rate": 6.25e-05, |
|
"loss": 2.1229, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06458123107971746, |
|
"grad_norm": 2.150893449783325, |
|
"learning_rate": 6.451612903225807e-05, |
|
"loss": 2.0795, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06659939455095863, |
|
"grad_norm": 2.7134313583374023, |
|
"learning_rate": 6.653225806451613e-05, |
|
"loss": 2.0569, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06861755802219979, |
|
"grad_norm": 2.2563529014587402, |
|
"learning_rate": 6.854838709677419e-05, |
|
"loss": 2.054, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07063572149344097, |
|
"grad_norm": 2.307612419128418, |
|
"learning_rate": 7.056451612903226e-05, |
|
"loss": 2.2254, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07265388496468214, |
|
"grad_norm": 2.4022858142852783, |
|
"learning_rate": 7.258064516129033e-05, |
|
"loss": 2.0501, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07467204843592332, |
|
"grad_norm": 1.9709093570709229, |
|
"learning_rate": 7.45967741935484e-05, |
|
"loss": 2.0284, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07669021190716448, |
|
"grad_norm": 1.8165977001190186, |
|
"learning_rate": 7.661290322580645e-05, |
|
"loss": 2.0161, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07870837537840565, |
|
"grad_norm": 2.2313830852508545, |
|
"learning_rate": 7.862903225806451e-05, |
|
"loss": 2.0064, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08072653884964683, |
|
"grad_norm": 4.013941764831543, |
|
"learning_rate": 8.064516129032258e-05, |
|
"loss": 2.0056, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08274470232088799, |
|
"grad_norm": 2.2855305671691895, |
|
"learning_rate": 8.266129032258066e-05, |
|
"loss": 2.0044, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08476286579212916, |
|
"grad_norm": 1.9798551797866821, |
|
"learning_rate": 8.467741935483871e-05, |
|
"loss": 1.9908, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08678102926337034, |
|
"grad_norm": 2.0610132217407227, |
|
"learning_rate": 8.669354838709678e-05, |
|
"loss": 1.9845, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08879919273461151, |
|
"grad_norm": 2.354846477508545, |
|
"learning_rate": 8.870967741935484e-05, |
|
"loss": 2.0008, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.09081735620585267, |
|
"grad_norm": 2.430293560028076, |
|
"learning_rate": 9.072580645161291e-05, |
|
"loss": 1.9855, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09283551967709384, |
|
"grad_norm": 2.4229743480682373, |
|
"learning_rate": 9.274193548387096e-05, |
|
"loss": 1.935, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09485368314833502, |
|
"grad_norm": 1.9692800045013428, |
|
"learning_rate": 9.475806451612904e-05, |
|
"loss": 2.1015, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09687184661957618, |
|
"grad_norm": 2.103461503982544, |
|
"learning_rate": 9.677419354838711e-05, |
|
"loss": 2.0086, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09889001009081735, |
|
"grad_norm": 1.9599565267562866, |
|
"learning_rate": 9.879032258064517e-05, |
|
"loss": 2.0889, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.10090817356205853, |
|
"grad_norm": 2.226715087890625, |
|
"learning_rate": 9.999995545373623e-05, |
|
"loss": 1.9485, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1029263370332997, |
|
"grad_norm": 2.138767719268799, |
|
"learning_rate": 9.999945430918042e-05, |
|
"loss": 1.8391, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.10494450050454086, |
|
"grad_norm": 1.9570167064666748, |
|
"learning_rate": 9.999839634283869e-05, |
|
"loss": 2.004, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.10696266397578204, |
|
"grad_norm": 2.2599446773529053, |
|
"learning_rate": 9.999678156649317e-05, |
|
"loss": 2.0093, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.10898082744702321, |
|
"grad_norm": 1.9920490980148315, |
|
"learning_rate": 9.999460999812691e-05, |
|
"loss": 1.8934, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.11099899091826437, |
|
"grad_norm": 1.9292041063308716, |
|
"learning_rate": 9.999188166192368e-05, |
|
"loss": 1.8937, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.11301715438950555, |
|
"grad_norm": 1.9816484451293945, |
|
"learning_rate": 9.998859658826777e-05, |
|
"loss": 1.9489, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11503531786074672, |
|
"grad_norm": 2.503206729888916, |
|
"learning_rate": 9.998475481374358e-05, |
|
"loss": 1.9871, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1170534813319879, |
|
"grad_norm": 1.8593742847442627, |
|
"learning_rate": 9.998035638113527e-05, |
|
"loss": 1.8784, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11907164480322906, |
|
"grad_norm": 2.2534918785095215, |
|
"learning_rate": 9.997540133942624e-05, |
|
"loss": 1.7789, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12108980827447023, |
|
"grad_norm": 1.7839148044586182, |
|
"learning_rate": 9.996988974379857e-05, |
|
"loss": 1.8833, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1231079717457114, |
|
"grad_norm": 1.9832431077957153, |
|
"learning_rate": 9.996382165563247e-05, |
|
"loss": 1.8618, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.12512613521695257, |
|
"grad_norm": 1.8574587106704712, |
|
"learning_rate": 9.995719714250556e-05, |
|
"loss": 1.9742, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12714429868819374, |
|
"grad_norm": 2.3903088569641113, |
|
"learning_rate": 9.995001627819211e-05, |
|
"loss": 1.8395, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12916246215943492, |
|
"grad_norm": 1.8283036947250366, |
|
"learning_rate": 9.99422791426622e-05, |
|
"loss": 1.7997, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1311806256306761, |
|
"grad_norm": 2.3620362281799316, |
|
"learning_rate": 9.993398582208093e-05, |
|
"loss": 1.8091, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.13319878910191726, |
|
"grad_norm": 1.9973320960998535, |
|
"learning_rate": 9.99251364088073e-05, |
|
"loss": 1.8977, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.13521695257315844, |
|
"grad_norm": 2.1321332454681396, |
|
"learning_rate": 9.991573100139334e-05, |
|
"loss": 1.8635, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13723511604439959, |
|
"grad_norm": 2.079270362854004, |
|
"learning_rate": 9.990576970458285e-05, |
|
"loss": 1.8001, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13925327951564076, |
|
"grad_norm": 2.42185640335083, |
|
"learning_rate": 9.989525262931045e-05, |
|
"loss": 1.8191, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.14127144298688193, |
|
"grad_norm": 1.972676396369934, |
|
"learning_rate": 9.988417989270011e-05, |
|
"loss": 1.8399, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1432896064581231, |
|
"grad_norm": 1.9847174882888794, |
|
"learning_rate": 9.987255161806402e-05, |
|
"loss": 1.7614, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.14530776992936428, |
|
"grad_norm": 1.9572982788085938, |
|
"learning_rate": 9.986036793490112e-05, |
|
"loss": 1.8777, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14732593340060546, |
|
"grad_norm": 1.9790279865264893, |
|
"learning_rate": 9.984762897889568e-05, |
|
"loss": 1.8118, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14934409687184663, |
|
"grad_norm": 1.9155077934265137, |
|
"learning_rate": 9.983433489191581e-05, |
|
"loss": 1.8436, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.15136226034308778, |
|
"grad_norm": 1.9012539386749268, |
|
"learning_rate": 9.98204858220119e-05, |
|
"loss": 1.8109, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.15338042381432895, |
|
"grad_norm": 2.0491812229156494, |
|
"learning_rate": 9.980608192341488e-05, |
|
"loss": 1.8072, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.15539858728557013, |
|
"grad_norm": 1.941206932067871, |
|
"learning_rate": 9.979112335653462e-05, |
|
"loss": 1.7898, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1574167507568113, |
|
"grad_norm": 2.0589675903320312, |
|
"learning_rate": 9.977561028795803e-05, |
|
"loss": 1.7524, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15943491422805248, |
|
"grad_norm": 1.9348995685577393, |
|
"learning_rate": 9.97595428904473e-05, |
|
"loss": 1.7864, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.16145307769929365, |
|
"grad_norm": 1.8057056665420532, |
|
"learning_rate": 9.974292134293792e-05, |
|
"loss": 1.7851, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.16347124117053483, |
|
"grad_norm": 1.8641736507415771, |
|
"learning_rate": 9.97257458305367e-05, |
|
"loss": 1.8094, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.16548940464177597, |
|
"grad_norm": 1.713681936264038, |
|
"learning_rate": 9.970801654451973e-05, |
|
"loss": 1.7454, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.16750756811301715, |
|
"grad_norm": 1.8647186756134033, |
|
"learning_rate": 9.968973368233022e-05, |
|
"loss": 1.7415, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16952573158425832, |
|
"grad_norm": 1.9415329694747925, |
|
"learning_rate": 9.96708974475763e-05, |
|
"loss": 1.7501, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.1715438950554995, |
|
"grad_norm": 1.8891992568969727, |
|
"learning_rate": 9.965150805002878e-05, |
|
"loss": 1.8218, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.17356205852674067, |
|
"grad_norm": 1.9533817768096924, |
|
"learning_rate": 9.963156570561878e-05, |
|
"loss": 1.6947, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.17558022199798184, |
|
"grad_norm": 2.1692941188812256, |
|
"learning_rate": 9.96110706364354e-05, |
|
"loss": 1.784, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.17759838546922302, |
|
"grad_norm": 2.229025363922119, |
|
"learning_rate": 9.959002307072312e-05, |
|
"loss": 1.7266, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.17961654894046417, |
|
"grad_norm": 3.380396604537964, |
|
"learning_rate": 9.956842324287936e-05, |
|
"loss": 1.9071, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.18163471241170534, |
|
"grad_norm": 1.9694714546203613, |
|
"learning_rate": 9.954627139345186e-05, |
|
"loss": 1.7442, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18365287588294651, |
|
"grad_norm": 1.9261081218719482, |
|
"learning_rate": 9.952356776913594e-05, |
|
"loss": 1.8254, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.1856710393541877, |
|
"grad_norm": 1.6406139135360718, |
|
"learning_rate": 9.950031262277183e-05, |
|
"loss": 1.8027, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.18768920282542886, |
|
"grad_norm": 1.874161720275879, |
|
"learning_rate": 9.947650621334179e-05, |
|
"loss": 1.8027, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.18970736629667004, |
|
"grad_norm": 2.183828115463257, |
|
"learning_rate": 9.945214880596725e-05, |
|
"loss": 1.7398, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.1917255297679112, |
|
"grad_norm": 1.954749584197998, |
|
"learning_rate": 9.94272406719059e-05, |
|
"loss": 1.7675, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.19374369323915236, |
|
"grad_norm": 1.966210126876831, |
|
"learning_rate": 9.940178208854858e-05, |
|
"loss": 1.7841, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.19576185671039353, |
|
"grad_norm": 2.180774688720703, |
|
"learning_rate": 9.937577333941626e-05, |
|
"loss": 1.6492, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.1977800201816347, |
|
"grad_norm": 1.921655297279358, |
|
"learning_rate": 9.934921471415687e-05, |
|
"loss": 1.7661, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.19979818365287588, |
|
"grad_norm": 1.8591227531433105, |
|
"learning_rate": 9.932210650854205e-05, |
|
"loss": 1.7543, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.20181634712411706, |
|
"grad_norm": 1.6740055084228516, |
|
"learning_rate": 9.929444902446392e-05, |
|
"loss": 1.7578, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.20383451059535823, |
|
"grad_norm": 2.937347650527954, |
|
"learning_rate": 9.92662425699316e-05, |
|
"loss": 1.7566, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2058526740665994, |
|
"grad_norm": 1.9954780340194702, |
|
"learning_rate": 9.923748745906789e-05, |
|
"loss": 1.8376, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.20787083753784055, |
|
"grad_norm": 2.204568386077881, |
|
"learning_rate": 9.920818401210574e-05, |
|
"loss": 1.7558, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.20988900100908173, |
|
"grad_norm": 2.1600749492645264, |
|
"learning_rate": 9.917833255538467e-05, |
|
"loss": 1.8082, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2119071644803229, |
|
"grad_norm": 2.097487449645996, |
|
"learning_rate": 9.914793342134711e-05, |
|
"loss": 1.7874, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.21392532795156408, |
|
"grad_norm": 1.796174168586731, |
|
"learning_rate": 9.911698694853477e-05, |
|
"loss": 1.7015, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.21594349142280525, |
|
"grad_norm": 1.9651933908462524, |
|
"learning_rate": 9.908549348158485e-05, |
|
"loss": 1.8868, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.21796165489404642, |
|
"grad_norm": 2.083768367767334, |
|
"learning_rate": 9.905345337122609e-05, |
|
"loss": 1.8338, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2199798183652876, |
|
"grad_norm": 1.953058123588562, |
|
"learning_rate": 9.902086697427504e-05, |
|
"loss": 1.792, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.22199798183652875, |
|
"grad_norm": 1.9650559425354004, |
|
"learning_rate": 9.8987734653632e-05, |
|
"loss": 1.6179, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.22401614530776992, |
|
"grad_norm": 1.7365285158157349, |
|
"learning_rate": 9.895405677827692e-05, |
|
"loss": 1.6512, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2260343087790111, |
|
"grad_norm": 1.7468541860580444, |
|
"learning_rate": 9.89198337232654e-05, |
|
"loss": 1.6747, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.22805247225025227, |
|
"grad_norm": 1.967139720916748, |
|
"learning_rate": 9.888506586972446e-05, |
|
"loss": 1.7086, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.23007063572149344, |
|
"grad_norm": 1.824622631072998, |
|
"learning_rate": 9.884975360484827e-05, |
|
"loss": 1.887, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.23208879919273462, |
|
"grad_norm": 2.1145553588867188, |
|
"learning_rate": 9.881389732189392e-05, |
|
"loss": 1.7733, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2341069626639758, |
|
"grad_norm": 1.6385475397109985, |
|
"learning_rate": 9.877749742017694e-05, |
|
"loss": 1.6961, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.23612512613521694, |
|
"grad_norm": 1.9020243883132935, |
|
"learning_rate": 9.874055430506691e-05, |
|
"loss": 1.6124, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.23814328960645811, |
|
"grad_norm": 1.817199945449829, |
|
"learning_rate": 9.870306838798297e-05, |
|
"loss": 1.6601, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2401614530776993, |
|
"grad_norm": 2.001286268234253, |
|
"learning_rate": 9.866504008638917e-05, |
|
"loss": 1.7485, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.24217961654894046, |
|
"grad_norm": 1.9406825304031372, |
|
"learning_rate": 9.862646982378987e-05, |
|
"loss": 1.8185, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.24419778002018164, |
|
"grad_norm": 1.7781312465667725, |
|
"learning_rate": 9.8587358029725e-05, |
|
"loss": 1.751, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2462159434914228, |
|
"grad_norm": 1.793482780456543, |
|
"learning_rate": 9.854770513976531e-05, |
|
"loss": 1.6768, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.248234106962664, |
|
"grad_norm": 1.9680509567260742, |
|
"learning_rate": 9.850751159550746e-05, |
|
"loss": 1.6726, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.25025227043390513, |
|
"grad_norm": 2.0250823497772217, |
|
"learning_rate": 9.846677784456918e-05, |
|
"loss": 1.7675, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2522704339051463, |
|
"grad_norm": 1.7494395971298218, |
|
"learning_rate": 9.842550434058421e-05, |
|
"loss": 1.6584, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2542885973763875, |
|
"grad_norm": 2.1690242290496826, |
|
"learning_rate": 9.838369154319728e-05, |
|
"loss": 1.802, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.25630676084762866, |
|
"grad_norm": 1.6883882284164429, |
|
"learning_rate": 9.8341339918059e-05, |
|
"loss": 1.5858, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.25832492431886983, |
|
"grad_norm": 1.9592852592468262, |
|
"learning_rate": 9.82984499368207e-05, |
|
"loss": 1.7107, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.260343087790111, |
|
"grad_norm": 1.9985136985778809, |
|
"learning_rate": 9.825502207712909e-05, |
|
"loss": 1.674, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2623612512613522, |
|
"grad_norm": 1.6702767610549927, |
|
"learning_rate": 9.821105682262099e-05, |
|
"loss": 1.6821, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.26437941473259335, |
|
"grad_norm": 1.7378309965133667, |
|
"learning_rate": 9.816655466291803e-05, |
|
"loss": 1.7136, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.26639757820383453, |
|
"grad_norm": 1.7488417625427246, |
|
"learning_rate": 9.812151609362102e-05, |
|
"loss": 1.5944, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.2684157416750757, |
|
"grad_norm": 1.7831872701644897, |
|
"learning_rate": 9.807594161630458e-05, |
|
"loss": 1.575, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.2704339051463169, |
|
"grad_norm": 2.0005006790161133, |
|
"learning_rate": 9.802983173851149e-05, |
|
"loss": 1.6744, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.272452068617558, |
|
"grad_norm": 1.5925371646881104, |
|
"learning_rate": 9.798318697374702e-05, |
|
"loss": 1.6679, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.27447023208879917, |
|
"grad_norm": 1.6449369192123413, |
|
"learning_rate": 9.79360078414733e-05, |
|
"loss": 1.6395, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.27648839556004035, |
|
"grad_norm": 1.870029091835022, |
|
"learning_rate": 9.78882948671034e-05, |
|
"loss": 1.6746, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.2785065590312815, |
|
"grad_norm": 1.8943411111831665, |
|
"learning_rate": 9.784004858199563e-05, |
|
"loss": 1.7118, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.2805247225025227, |
|
"grad_norm": 1.9735866785049438, |
|
"learning_rate": 9.779126952344748e-05, |
|
"loss": 1.6215, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.28254288597376387, |
|
"grad_norm": 1.719292402267456, |
|
"learning_rate": 9.774195823468973e-05, |
|
"loss": 1.6028, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.28456104944500504, |
|
"grad_norm": 1.7490273714065552, |
|
"learning_rate": 9.769211526488038e-05, |
|
"loss": 1.5878, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.2865792129162462, |
|
"grad_norm": 1.621436357498169, |
|
"learning_rate": 9.764174116909852e-05, |
|
"loss": 1.7188, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.2885973763874874, |
|
"grad_norm": 1.7977508306503296, |
|
"learning_rate": 9.759083650833815e-05, |
|
"loss": 1.6966, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.29061553985872857, |
|
"grad_norm": 1.6182868480682373, |
|
"learning_rate": 9.753940184950192e-05, |
|
"loss": 1.6072, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.29263370332996974, |
|
"grad_norm": 2.094125747680664, |
|
"learning_rate": 9.748743776539488e-05, |
|
"loss": 1.7654, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.2946518668012109, |
|
"grad_norm": 2.0340657234191895, |
|
"learning_rate": 9.743494483471801e-05, |
|
"loss": 1.5987, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.2966700302724521, |
|
"grad_norm": 1.8374106884002686, |
|
"learning_rate": 9.738192364206185e-05, |
|
"loss": 1.6468, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.29868819374369326, |
|
"grad_norm": 1.8509396314620972, |
|
"learning_rate": 9.732837477789993e-05, |
|
"loss": 1.6514, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3007063572149344, |
|
"grad_norm": 2.0533978939056396, |
|
"learning_rate": 9.727429883858227e-05, |
|
"loss": 1.6375, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.30272452068617556, |
|
"grad_norm": 1.7004354000091553, |
|
"learning_rate": 9.721969642632865e-05, |
|
"loss": 1.5852, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.30474268415741673, |
|
"grad_norm": 1.8703371286392212, |
|
"learning_rate": 9.716456814922196e-05, |
|
"loss": 1.5644, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3067608476286579, |
|
"grad_norm": 1.6673510074615479, |
|
"learning_rate": 9.710891462120141e-05, |
|
"loss": 1.6704, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3087790110998991, |
|
"grad_norm": 1.8909664154052734, |
|
"learning_rate": 9.70527364620557e-05, |
|
"loss": 1.7009, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.31079717457114026, |
|
"grad_norm": 1.6285436153411865, |
|
"learning_rate": 9.699603429741615e-05, |
|
"loss": 1.6874, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.31281533804238143, |
|
"grad_norm": 1.7128878831863403, |
|
"learning_rate": 9.693880875874961e-05, |
|
"loss": 1.8054, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3148335015136226, |
|
"grad_norm": 2.0219228267669678, |
|
"learning_rate": 9.68810604833516e-05, |
|
"loss": 1.5689, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.3168516649848638, |
|
"grad_norm": 1.5529290437698364, |
|
"learning_rate": 9.682279011433908e-05, |
|
"loss": 1.7903, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.31886982845610495, |
|
"grad_norm": 1.7940829992294312, |
|
"learning_rate": 9.676399830064339e-05, |
|
"loss": 1.5716, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.32088799192734613, |
|
"grad_norm": 1.6057904958724976, |
|
"learning_rate": 9.670468569700288e-05, |
|
"loss": 1.6821, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3229061553985873, |
|
"grad_norm": 1.8908942937850952, |
|
"learning_rate": 9.664485296395578e-05, |
|
"loss": 1.541, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3249243188698285, |
|
"grad_norm": 1.8195271492004395, |
|
"learning_rate": 9.658450076783274e-05, |
|
"loss": 1.65, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.32694248234106965, |
|
"grad_norm": 6.518181800842285, |
|
"learning_rate": 9.652362978074947e-05, |
|
"loss": 1.6047, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.32896064581231077, |
|
"grad_norm": 1.5660922527313232, |
|
"learning_rate": 9.646224068059917e-05, |
|
"loss": 1.7102, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.33097880928355194, |
|
"grad_norm": 1.6389318704605103, |
|
"learning_rate": 9.640033415104508e-05, |
|
"loss": 1.6255, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.3329969727547931, |
|
"grad_norm": 1.6905416250228882, |
|
"learning_rate": 9.633791088151283e-05, |
|
"loss": 1.5718, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3350151362260343, |
|
"grad_norm": 1.7866181135177612, |
|
"learning_rate": 9.627497156718271e-05, |
|
"loss": 1.7042, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.33703329969727547, |
|
"grad_norm": 1.7515119314193726, |
|
"learning_rate": 9.621151690898203e-05, |
|
"loss": 1.6239, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.33905146316851664, |
|
"grad_norm": 1.9881833791732788, |
|
"learning_rate": 9.614754761357718e-05, |
|
"loss": 1.7982, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.3410696266397578, |
|
"grad_norm": 1.8311055898666382, |
|
"learning_rate": 9.608306439336592e-05, |
|
"loss": 1.7399, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.343087790110999, |
|
"grad_norm": 1.688659906387329, |
|
"learning_rate": 9.60180679664693e-05, |
|
"loss": 1.6666, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34510595358224017, |
|
"grad_norm": 1.8084384202957153, |
|
"learning_rate": 9.595255905672377e-05, |
|
"loss": 1.5487, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.34712411705348134, |
|
"grad_norm": 1.7272626161575317, |
|
"learning_rate": 9.588653839367302e-05, |
|
"loss": 1.551, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3491422805247225, |
|
"grad_norm": 1.7780117988586426, |
|
"learning_rate": 9.582000671256e-05, |
|
"loss": 1.6598, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.3511604439959637, |
|
"grad_norm": 2.405978202819824, |
|
"learning_rate": 9.575296475431855e-05, |
|
"loss": 1.6297, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.35317860746720486, |
|
"grad_norm": 1.6499069929122925, |
|
"learning_rate": 9.568541326556527e-05, |
|
"loss": 1.5609, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.35519677093844604, |
|
"grad_norm": 1.7063804864883423, |
|
"learning_rate": 9.56173529985912e-05, |
|
"loss": 1.6281, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.35721493440968716, |
|
"grad_norm": 2.427915573120117, |
|
"learning_rate": 9.554878471135339e-05, |
|
"loss": 1.6166, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.35923309788092833, |
|
"grad_norm": 1.7606953382492065, |
|
"learning_rate": 9.547970916746649e-05, |
|
"loss": 1.6152, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.3612512613521695, |
|
"grad_norm": 1.7059513330459595, |
|
"learning_rate": 9.541012713619428e-05, |
|
"loss": 1.5078, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.3632694248234107, |
|
"grad_norm": 1.4295872449874878, |
|
"learning_rate": 9.5340039392441e-05, |
|
"loss": 1.5864, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36528758829465185, |
|
"grad_norm": 1.8844209909439087, |
|
"learning_rate": 9.526944671674286e-05, |
|
"loss": 1.5401, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.36730575176589303, |
|
"grad_norm": 1.6048489809036255, |
|
"learning_rate": 9.51983498952592e-05, |
|
"loss": 1.7246, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.3693239152371342, |
|
"grad_norm": 2.0559234619140625, |
|
"learning_rate": 9.512674971976385e-05, |
|
"loss": 1.4579, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3713420787083754, |
|
"grad_norm": 1.7094260454177856, |
|
"learning_rate": 9.505464698763629e-05, |
|
"loss": 1.5574, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.37336024217961655, |
|
"grad_norm": 1.5393606424331665, |
|
"learning_rate": 9.49820425018527e-05, |
|
"loss": 1.5389, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3753784056508577, |
|
"grad_norm": 2.1417794227600098, |
|
"learning_rate": 9.49089370709771e-05, |
|
"loss": 1.6108, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.3773965691220989, |
|
"grad_norm": 1.6064629554748535, |
|
"learning_rate": 9.483533150915229e-05, |
|
"loss": 1.6211, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.3794147325933401, |
|
"grad_norm": 2.044330596923828, |
|
"learning_rate": 9.476122663609086e-05, |
|
"loss": 1.6693, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.38143289606458125, |
|
"grad_norm": 1.7574785947799683, |
|
"learning_rate": 9.468662327706594e-05, |
|
"loss": 1.5502, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.3834510595358224, |
|
"grad_norm": 1.8084129095077515, |
|
"learning_rate": 9.461152226290212e-05, |
|
"loss": 1.6606, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3854692230070636, |
|
"grad_norm": 1.4581211805343628, |
|
"learning_rate": 9.453592442996614e-05, |
|
"loss": 1.5317, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.3874873864783047, |
|
"grad_norm": 1.8625178337097168, |
|
"learning_rate": 9.445983062015761e-05, |
|
"loss": 1.4944, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3895055499495459, |
|
"grad_norm": 1.7608696222305298, |
|
"learning_rate": 9.43832416808996e-05, |
|
"loss": 1.5913, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.39152371342078707, |
|
"grad_norm": 1.9058195352554321, |
|
"learning_rate": 9.430615846512923e-05, |
|
"loss": 1.6255, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.39354187689202824, |
|
"grad_norm": 1.6042306423187256, |
|
"learning_rate": 9.422858183128808e-05, |
|
"loss": 1.6525, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.3955600403632694, |
|
"grad_norm": 1.6958731412887573, |
|
"learning_rate": 9.415051264331285e-05, |
|
"loss": 1.4745, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.3975782038345106, |
|
"grad_norm": 1.7777879238128662, |
|
"learning_rate": 9.407195177062549e-05, |
|
"loss": 1.5836, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.39959636730575177, |
|
"grad_norm": 1.7249339818954468, |
|
"learning_rate": 9.399290008812365e-05, |
|
"loss": 1.4844, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.40161453077699294, |
|
"grad_norm": 1.854981541633606, |
|
"learning_rate": 9.391335847617093e-05, |
|
"loss": 1.6211, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4036326942482341, |
|
"grad_norm": 1.4041742086410522, |
|
"learning_rate": 9.383332782058705e-05, |
|
"loss": 1.6664, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4056508577194753, |
|
"grad_norm": 1.605724573135376, |
|
"learning_rate": 9.375280901263796e-05, |
|
"loss": 1.5706, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.40766902119071646, |
|
"grad_norm": 1.7073047161102295, |
|
"learning_rate": 9.367180294902603e-05, |
|
"loss": 1.6047, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.40968718466195764, |
|
"grad_norm": 1.5172196626663208, |
|
"learning_rate": 9.359031053187988e-05, |
|
"loss": 1.5207, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4117053481331988, |
|
"grad_norm": 1.9370439052581787, |
|
"learning_rate": 9.350833266874451e-05, |
|
"loss": 1.5746, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.41372351160444, |
|
"grad_norm": 2.1937825679779053, |
|
"learning_rate": 9.342587027257104e-05, |
|
"loss": 1.7112, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4157416750756811, |
|
"grad_norm": 1.7659974098205566, |
|
"learning_rate": 9.334292426170672e-05, |
|
"loss": 1.6329, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4177598385469223, |
|
"grad_norm": 1.5360724925994873, |
|
"learning_rate": 9.325949555988452e-05, |
|
"loss": 1.6289, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.41977800201816345, |
|
"grad_norm": 1.4664136171340942, |
|
"learning_rate": 9.317558509621296e-05, |
|
"loss": 1.6237, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.42179616548940463, |
|
"grad_norm": 1.5282961130142212, |
|
"learning_rate": 9.309119380516573e-05, |
|
"loss": 1.5247, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.4238143289606458, |
|
"grad_norm": 1.7818903923034668, |
|
"learning_rate": 9.300632262657128e-05, |
|
"loss": 1.6479, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.425832492431887, |
|
"grad_norm": 1.8099353313446045, |
|
"learning_rate": 9.292097250560232e-05, |
|
"loss": 1.692, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.42785065590312815, |
|
"grad_norm": 1.6518584489822388, |
|
"learning_rate": 9.283514439276539e-05, |
|
"loss": 1.5806, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.4298688193743693, |
|
"grad_norm": 1.6262339353561401, |
|
"learning_rate": 9.274883924389018e-05, |
|
"loss": 1.6018, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.4318869828456105, |
|
"grad_norm": 1.858011245727539, |
|
"learning_rate": 9.266205802011892e-05, |
|
"loss": 1.6162, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.4339051463168517, |
|
"grad_norm": 1.5540958642959595, |
|
"learning_rate": 9.257480168789565e-05, |
|
"loss": 1.5558, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.43592330978809285, |
|
"grad_norm": 1.5216200351715088, |
|
"learning_rate": 9.248707121895555e-05, |
|
"loss": 1.6317, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.437941473259334, |
|
"grad_norm": 2.030132532119751, |
|
"learning_rate": 9.239886759031398e-05, |
|
"loss": 1.457, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.4399596367305752, |
|
"grad_norm": 1.6100101470947266, |
|
"learning_rate": 9.231019178425573e-05, |
|
"loss": 1.6372, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.4419778002018164, |
|
"grad_norm": 1.6417200565338135, |
|
"learning_rate": 9.222104478832398e-05, |
|
"loss": 1.5867, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.4439959636730575, |
|
"grad_norm": 1.5133955478668213, |
|
"learning_rate": 9.213142759530936e-05, |
|
"loss": 1.5338, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.44601412714429867, |
|
"grad_norm": 1.543249249458313, |
|
"learning_rate": 9.204134120323883e-05, |
|
"loss": 1.6463, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.44803229061553984, |
|
"grad_norm": 2.0193662643432617, |
|
"learning_rate": 9.195078661536471e-05, |
|
"loss": 1.5299, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.450050454086781, |
|
"grad_norm": 1.4934704303741455, |
|
"learning_rate": 9.185976484015333e-05, |
|
"loss": 1.5422, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.4520686175580222, |
|
"grad_norm": 1.6684024333953857, |
|
"learning_rate": 9.176827689127389e-05, |
|
"loss": 1.62, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.45408678102926336, |
|
"grad_norm": 1.7710318565368652, |
|
"learning_rate": 9.167632378758719e-05, |
|
"loss": 1.4557, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.45610494450050454, |
|
"grad_norm": 1.717943549156189, |
|
"learning_rate": 9.158390655313422e-05, |
|
"loss": 1.601, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.4581231079717457, |
|
"grad_norm": 1.751428246498108, |
|
"learning_rate": 9.149102621712482e-05, |
|
"loss": 1.5032, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.4601412714429869, |
|
"grad_norm": 1.8777353763580322, |
|
"learning_rate": 9.139768381392616e-05, |
|
"loss": 1.6255, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.46215943491422806, |
|
"grad_norm": 1.7452517747879028, |
|
"learning_rate": 9.130388038305127e-05, |
|
"loss": 1.6209, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.46417759838546924, |
|
"grad_norm": 1.7831089496612549, |
|
"learning_rate": 9.12096169691474e-05, |
|
"loss": 1.6401, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4661957618567104, |
|
"grad_norm": 1.591808795928955, |
|
"learning_rate": 9.111489462198448e-05, |
|
"loss": 1.5767, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.4682139253279516, |
|
"grad_norm": 1.6915345191955566, |
|
"learning_rate": 9.101971439644335e-05, |
|
"loss": 1.556, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.47023208879919276, |
|
"grad_norm": 1.8185473680496216, |
|
"learning_rate": 9.092407735250404e-05, |
|
"loss": 1.6477, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.4722502522704339, |
|
"grad_norm": 1.7397855520248413, |
|
"learning_rate": 9.082798455523396e-05, |
|
"loss": 1.4885, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.47426841574167505, |
|
"grad_norm": 1.7972854375839233, |
|
"learning_rate": 9.073143707477607e-05, |
|
"loss": 1.6802, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.47628657921291623, |
|
"grad_norm": 1.7079814672470093, |
|
"learning_rate": 9.063443598633688e-05, |
|
"loss": 1.5201, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.4783047426841574, |
|
"grad_norm": 1.7126438617706299, |
|
"learning_rate": 9.053698237017459e-05, |
|
"loss": 1.5861, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.4803229061553986, |
|
"grad_norm": 1.7915846109390259, |
|
"learning_rate": 9.043907731158699e-05, |
|
"loss": 1.5139, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.48234106962663975, |
|
"grad_norm": 1.5365029573440552, |
|
"learning_rate": 9.034072190089932e-05, |
|
"loss": 1.5428, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.4843592330978809, |
|
"grad_norm": 1.720226526260376, |
|
"learning_rate": 9.02419172334523e-05, |
|
"loss": 1.4767, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4863773965691221, |
|
"grad_norm": 1.9600703716278076, |
|
"learning_rate": 9.014266440958974e-05, |
|
"loss": 1.6188, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.4883955600403633, |
|
"grad_norm": 1.8720204830169678, |
|
"learning_rate": 9.004296453464638e-05, |
|
"loss": 1.5432, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.49041372351160445, |
|
"grad_norm": 1.6241135597229004, |
|
"learning_rate": 8.994281871893562e-05, |
|
"loss": 1.5496, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.4924318869828456, |
|
"grad_norm": 1.8902565240859985, |
|
"learning_rate": 8.984222807773706e-05, |
|
"loss": 1.6235, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.4944500504540868, |
|
"grad_norm": 1.925911784172058, |
|
"learning_rate": 8.974119373128411e-05, |
|
"loss": 1.5734, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.496468213925328, |
|
"grad_norm": 2.0046586990356445, |
|
"learning_rate": 8.963971680475161e-05, |
|
"loss": 1.5009, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.49848637739656915, |
|
"grad_norm": 1.6803854703903198, |
|
"learning_rate": 8.95377984282431e-05, |
|
"loss": 1.5605, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5005045408678103, |
|
"grad_norm": 1.5540610551834106, |
|
"learning_rate": 8.943543973677846e-05, |
|
"loss": 1.5961, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5025227043390514, |
|
"grad_norm": 1.625119686126709, |
|
"learning_rate": 8.933264187028109e-05, |
|
"loss": 1.604, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5045408678102926, |
|
"grad_norm": 1.7552522420883179, |
|
"learning_rate": 8.922940597356532e-05, |
|
"loss": 1.4812, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5065590312815338, |
|
"grad_norm": 1.7819663286209106, |
|
"learning_rate": 8.912573319632367e-05, |
|
"loss": 1.6794, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.508577194752775, |
|
"grad_norm": 1.715986728668213, |
|
"learning_rate": 8.90216246931139e-05, |
|
"loss": 1.5846, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.5105953582240161, |
|
"grad_norm": 1.6441289186477661, |
|
"learning_rate": 8.891708162334635e-05, |
|
"loss": 1.5525, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.5126135216952573, |
|
"grad_norm": 1.7777501344680786, |
|
"learning_rate": 8.88121051512709e-05, |
|
"loss": 1.5136, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.5146316851664985, |
|
"grad_norm": 1.7654690742492676, |
|
"learning_rate": 8.870669644596402e-05, |
|
"loss": 1.574, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.5166498486377397, |
|
"grad_norm": 1.329132080078125, |
|
"learning_rate": 8.860085668131582e-05, |
|
"loss": 1.4299, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.5186680121089808, |
|
"grad_norm": 1.7770514488220215, |
|
"learning_rate": 8.84945870360169e-05, |
|
"loss": 1.6012, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.520686175580222, |
|
"grad_norm": 1.421560287475586, |
|
"learning_rate": 8.838788869354522e-05, |
|
"loss": 1.5075, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.5227043390514632, |
|
"grad_norm": 2.0325329303741455, |
|
"learning_rate": 8.828076284215301e-05, |
|
"loss": 1.4582, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.5247225025227044, |
|
"grad_norm": 2.303136110305786, |
|
"learning_rate": 8.817321067485343e-05, |
|
"loss": 1.6037, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5267406659939455, |
|
"grad_norm": 1.5532690286636353, |
|
"learning_rate": 8.806523338940736e-05, |
|
"loss": 1.6264, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.5287588294651867, |
|
"grad_norm": 1.9267081022262573, |
|
"learning_rate": 8.795683218831001e-05, |
|
"loss": 1.6513, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.5307769929364279, |
|
"grad_norm": 1.4832985401153564, |
|
"learning_rate": 8.78480082787776e-05, |
|
"loss": 1.5968, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.5327951564076691, |
|
"grad_norm": 1.6407015323638916, |
|
"learning_rate": 8.773876287273377e-05, |
|
"loss": 1.6084, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.5348133198789102, |
|
"grad_norm": 1.6609562635421753, |
|
"learning_rate": 8.762909718679629e-05, |
|
"loss": 1.5557, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5368314833501514, |
|
"grad_norm": 1.6559169292449951, |
|
"learning_rate": 8.751901244226332e-05, |
|
"loss": 1.623, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.5388496468213926, |
|
"grad_norm": 1.5652551651000977, |
|
"learning_rate": 8.740850986509994e-05, |
|
"loss": 1.4157, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.5408678102926338, |
|
"grad_norm": 1.9073171615600586, |
|
"learning_rate": 8.729759068592442e-05, |
|
"loss": 1.5152, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5428859737638748, |
|
"grad_norm": 1.72683846950531, |
|
"learning_rate": 8.718625613999457e-05, |
|
"loss": 1.6011, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.544904137235116, |
|
"grad_norm": 1.3139292001724243, |
|
"learning_rate": 8.70745074671939e-05, |
|
"loss": 1.5194, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5469223007063572, |
|
"grad_norm": 1.6831001043319702, |
|
"learning_rate": 8.696234591201793e-05, |
|
"loss": 1.5078, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.5489404641775983, |
|
"grad_norm": 1.5346101522445679, |
|
"learning_rate": 8.684977272356024e-05, |
|
"loss": 1.4988, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.5509586276488395, |
|
"grad_norm": 1.6062910556793213, |
|
"learning_rate": 8.673678915549855e-05, |
|
"loss": 1.6626, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.5529767911200807, |
|
"grad_norm": 1.7970744371414185, |
|
"learning_rate": 8.662339646608089e-05, |
|
"loss": 1.5251, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.5549949545913219, |
|
"grad_norm": 1.687828540802002, |
|
"learning_rate": 8.650959591811141e-05, |
|
"loss": 1.5361, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.557013118062563, |
|
"grad_norm": 1.8278907537460327, |
|
"learning_rate": 8.639538877893644e-05, |
|
"loss": 1.4754, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.5590312815338042, |
|
"grad_norm": 1.5675766468048096, |
|
"learning_rate": 8.628077632043032e-05, |
|
"loss": 1.5059, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.5610494450050454, |
|
"grad_norm": 1.5290266275405884, |
|
"learning_rate": 8.616575981898125e-05, |
|
"loss": 1.4684, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.5630676084762866, |
|
"grad_norm": 1.3652130365371704, |
|
"learning_rate": 8.605034055547709e-05, |
|
"loss": 1.4736, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.5650857719475277, |
|
"grad_norm": 1.4992094039916992, |
|
"learning_rate": 8.593451981529108e-05, |
|
"loss": 1.5559, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5671039354187689, |
|
"grad_norm": 1.5727661848068237, |
|
"learning_rate": 8.581829888826754e-05, |
|
"loss": 1.5884, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.5691220988900101, |
|
"grad_norm": 1.8026494979858398, |
|
"learning_rate": 8.570167906870745e-05, |
|
"loss": 1.5782, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.5711402623612513, |
|
"grad_norm": 1.701768159866333, |
|
"learning_rate": 8.558466165535411e-05, |
|
"loss": 1.598, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.5731584258324924, |
|
"grad_norm": 1.6463897228240967, |
|
"learning_rate": 8.546724795137865e-05, |
|
"loss": 1.4889, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.5751765893037336, |
|
"grad_norm": 1.5795738697052002, |
|
"learning_rate": 8.534943926436554e-05, |
|
"loss": 1.5306, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.5771947527749748, |
|
"grad_norm": 1.5358326435089111, |
|
"learning_rate": 8.523123690629791e-05, |
|
"loss": 1.5801, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.579212916246216, |
|
"grad_norm": 1.566599726676941, |
|
"learning_rate": 8.511264219354313e-05, |
|
"loss": 1.4904, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.5812310797174571, |
|
"grad_norm": 1.3994381427764893, |
|
"learning_rate": 8.4993656446838e-05, |
|
"loss": 1.5097, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.5832492431886983, |
|
"grad_norm": 1.5844892263412476, |
|
"learning_rate": 8.48742809912741e-05, |
|
"loss": 1.561, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.5852674066599395, |
|
"grad_norm": 1.592240333557129, |
|
"learning_rate": 8.475451715628302e-05, |
|
"loss": 1.4621, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5872855701311807, |
|
"grad_norm": 1.835154414176941, |
|
"learning_rate": 8.463436627562158e-05, |
|
"loss": 1.5978, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.5893037336024218, |
|
"grad_norm": 1.5988234281539917, |
|
"learning_rate": 8.451382968735693e-05, |
|
"loss": 1.4611, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.591321897073663, |
|
"grad_norm": 1.551164984703064, |
|
"learning_rate": 8.43929087338517e-05, |
|
"loss": 1.4682, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.5933400605449042, |
|
"grad_norm": 1.537986159324646, |
|
"learning_rate": 8.4271604761749e-05, |
|
"loss": 1.437, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5953582240161454, |
|
"grad_norm": 1.5694961547851562, |
|
"learning_rate": 8.414991912195747e-05, |
|
"loss": 1.5569, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5973763874873865, |
|
"grad_norm": 1.6652250289916992, |
|
"learning_rate": 8.402785316963618e-05, |
|
"loss": 1.4936, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.5993945509586277, |
|
"grad_norm": 1.690019965171814, |
|
"learning_rate": 8.390540826417964e-05, |
|
"loss": 1.4642, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.6014127144298688, |
|
"grad_norm": 1.532211422920227, |
|
"learning_rate": 8.378258576920253e-05, |
|
"loss": 1.5379, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.6034308779011099, |
|
"grad_norm": 1.6568650007247925, |
|
"learning_rate": 8.365938705252459e-05, |
|
"loss": 1.4731, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.6054490413723511, |
|
"grad_norm": 1.3985533714294434, |
|
"learning_rate": 8.353581348615538e-05, |
|
"loss": 1.5571, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6074672048435923, |
|
"grad_norm": 1.7050758600234985, |
|
"learning_rate": 8.341186644627901e-05, |
|
"loss": 1.6194, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.6094853683148335, |
|
"grad_norm": 1.656028389930725, |
|
"learning_rate": 8.32875473132388e-05, |
|
"loss": 1.4723, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.6115035317860746, |
|
"grad_norm": 1.4241358041763306, |
|
"learning_rate": 8.316285747152189e-05, |
|
"loss": 1.4154, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.6135216952573158, |
|
"grad_norm": 1.6242072582244873, |
|
"learning_rate": 8.30377983097438e-05, |
|
"loss": 1.4525, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.615539858728557, |
|
"grad_norm": 1.6200363636016846, |
|
"learning_rate": 8.291237122063309e-05, |
|
"loss": 1.5451, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.6175580221997982, |
|
"grad_norm": 1.2775624990463257, |
|
"learning_rate": 8.27865776010157e-05, |
|
"loss": 1.454, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.6195761856710393, |
|
"grad_norm": 1.5615248680114746, |
|
"learning_rate": 8.266041885179949e-05, |
|
"loss": 1.4296, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.6215943491422805, |
|
"grad_norm": 1.7214854955673218, |
|
"learning_rate": 8.253389637795858e-05, |
|
"loss": 1.4538, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.6236125126135217, |
|
"grad_norm": 1.4547313451766968, |
|
"learning_rate": 8.240701158851778e-05, |
|
"loss": 1.4107, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.6256306760847629, |
|
"grad_norm": 1.4714813232421875, |
|
"learning_rate": 8.227976589653676e-05, |
|
"loss": 1.3942, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.627648839556004, |
|
"grad_norm": 1.4874024391174316, |
|
"learning_rate": 8.215216071909448e-05, |
|
"loss": 1.5679, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.6296670030272452, |
|
"grad_norm": 1.5745984315872192, |
|
"learning_rate": 8.202419747727333e-05, |
|
"loss": 1.4826, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.6316851664984864, |
|
"grad_norm": 1.5688875913619995, |
|
"learning_rate": 8.189587759614325e-05, |
|
"loss": 1.4611, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.6337033299697276, |
|
"grad_norm": 1.5299983024597168, |
|
"learning_rate": 8.176720250474594e-05, |
|
"loss": 1.4565, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.6357214934409687, |
|
"grad_norm": 1.5063962936401367, |
|
"learning_rate": 8.163817363607894e-05, |
|
"loss": 1.5174, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.6377396569122099, |
|
"grad_norm": 1.7101162672042847, |
|
"learning_rate": 8.150879242707962e-05, |
|
"loss": 1.4651, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.6397578203834511, |
|
"grad_norm": 1.6039749383926392, |
|
"learning_rate": 8.137906031860925e-05, |
|
"loss": 1.5918, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.6417759838546923, |
|
"grad_norm": 1.54131019115448, |
|
"learning_rate": 8.124897875543684e-05, |
|
"loss": 1.4304, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.6437941473259334, |
|
"grad_norm": 1.422153353691101, |
|
"learning_rate": 8.111854918622321e-05, |
|
"loss": 1.6108, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.6458123107971746, |
|
"grad_norm": 1.6443060636520386, |
|
"learning_rate": 8.098777306350469e-05, |
|
"loss": 1.4497, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6478304742684158, |
|
"grad_norm": 1.5704275369644165, |
|
"learning_rate": 8.08566518436771e-05, |
|
"loss": 1.5172, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.649848637739657, |
|
"grad_norm": 1.7546700239181519, |
|
"learning_rate": 8.072518698697938e-05, |
|
"loss": 1.4498, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6518668012108981, |
|
"grad_norm": 1.650565266609192, |
|
"learning_rate": 8.059337995747743e-05, |
|
"loss": 1.4536, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.6538849646821393, |
|
"grad_norm": 1.516605019569397, |
|
"learning_rate": 8.046123222304781e-05, |
|
"loss": 1.5499, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.6559031281533805, |
|
"grad_norm": 1.4544646739959717, |
|
"learning_rate": 8.032874525536131e-05, |
|
"loss": 1.4791, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.6579212916246215, |
|
"grad_norm": 1.590570092201233, |
|
"learning_rate": 8.019592052986665e-05, |
|
"loss": 1.3705, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.6599394550958627, |
|
"grad_norm": 1.3931959867477417, |
|
"learning_rate": 8.006275952577397e-05, |
|
"loss": 1.5409, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.6619576185671039, |
|
"grad_norm": 1.676604986190796, |
|
"learning_rate": 7.992926372603842e-05, |
|
"loss": 1.4835, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.6639757820383451, |
|
"grad_norm": 1.832785725593567, |
|
"learning_rate": 7.979543461734362e-05, |
|
"loss": 1.4715, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.6659939455095862, |
|
"grad_norm": 1.3548346757888794, |
|
"learning_rate": 7.966127369008512e-05, |
|
"loss": 1.4553, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6680121089808274, |
|
"grad_norm": 1.397743821144104, |
|
"learning_rate": 7.952678243835376e-05, |
|
"loss": 1.4793, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.6700302724520686, |
|
"grad_norm": 1.5748425722122192, |
|
"learning_rate": 7.939196235991904e-05, |
|
"loss": 1.4791, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.6720484359233098, |
|
"grad_norm": 1.672951340675354, |
|
"learning_rate": 7.925681495621253e-05, |
|
"loss": 1.5467, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.6740665993945509, |
|
"grad_norm": 1.3464049100875854, |
|
"learning_rate": 7.912134173231098e-05, |
|
"loss": 1.4887, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.6760847628657921, |
|
"grad_norm": 1.761104941368103, |
|
"learning_rate": 7.898554419691974e-05, |
|
"loss": 1.4937, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.6781029263370333, |
|
"grad_norm": 1.5034103393554688, |
|
"learning_rate": 7.884942386235582e-05, |
|
"loss": 1.3636, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.6801210898082745, |
|
"grad_norm": 1.463498592376709, |
|
"learning_rate": 7.871298224453113e-05, |
|
"loss": 1.3987, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.6821392532795156, |
|
"grad_norm": 1.4649510383605957, |
|
"learning_rate": 7.857622086293557e-05, |
|
"loss": 1.5976, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.6841574167507568, |
|
"grad_norm": 1.6214101314544678, |
|
"learning_rate": 7.843914124062006e-05, |
|
"loss": 1.4602, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.686175580221998, |
|
"grad_norm": 1.752223253250122, |
|
"learning_rate": 7.830174490417972e-05, |
|
"loss": 1.4979, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6881937436932392, |
|
"grad_norm": 1.4902223348617554, |
|
"learning_rate": 7.816403338373666e-05, |
|
"loss": 1.4157, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.6902119071644803, |
|
"grad_norm": 1.4046521186828613, |
|
"learning_rate": 7.802600821292314e-05, |
|
"loss": 1.3817, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.6922300706357215, |
|
"grad_norm": 1.5104862451553345, |
|
"learning_rate": 7.78876709288644e-05, |
|
"loss": 1.4425, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.6942482341069627, |
|
"grad_norm": 1.6517716646194458, |
|
"learning_rate": 7.774902307216148e-05, |
|
"loss": 1.5526, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.6962663975782039, |
|
"grad_norm": 1.43543541431427, |
|
"learning_rate": 7.76100661868742e-05, |
|
"loss": 1.5054, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.698284561049445, |
|
"grad_norm": 1.665265679359436, |
|
"learning_rate": 7.747080182050388e-05, |
|
"loss": 1.4123, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.7003027245206862, |
|
"grad_norm": 1.5892359018325806, |
|
"learning_rate": 7.733123152397609e-05, |
|
"loss": 1.4904, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.7023208879919274, |
|
"grad_norm": 1.5370794534683228, |
|
"learning_rate": 7.719135685162342e-05, |
|
"loss": 1.3999, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.7043390514631686, |
|
"grad_norm": 1.509031891822815, |
|
"learning_rate": 7.705117936116822e-05, |
|
"loss": 1.5462, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.7063572149344097, |
|
"grad_norm": 1.753881812095642, |
|
"learning_rate": 7.691070061370507e-05, |
|
"loss": 1.5522, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7083753784056509, |
|
"grad_norm": 1.5018798112869263, |
|
"learning_rate": 7.676992217368364e-05, |
|
"loss": 1.4837, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.7103935418768921, |
|
"grad_norm": 1.5720365047454834, |
|
"learning_rate": 7.662884560889105e-05, |
|
"loss": 1.3675, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.7124117053481333, |
|
"grad_norm": 1.3165663480758667, |
|
"learning_rate": 7.648747249043457e-05, |
|
"loss": 1.5472, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.7144298688193743, |
|
"grad_norm": 1.523558497428894, |
|
"learning_rate": 7.634580439272401e-05, |
|
"loss": 1.5398, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.7164480322906155, |
|
"grad_norm": 1.283341884613037, |
|
"learning_rate": 7.620384289345425e-05, |
|
"loss": 1.5009, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.7184661957618567, |
|
"grad_norm": 1.6135417222976685, |
|
"learning_rate": 7.606158957358769e-05, |
|
"loss": 1.4926, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.7204843592330978, |
|
"grad_norm": 1.5714365243911743, |
|
"learning_rate": 7.591904601733655e-05, |
|
"loss": 1.5098, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.722502522704339, |
|
"grad_norm": 1.4282561540603638, |
|
"learning_rate": 7.577621381214529e-05, |
|
"loss": 1.4814, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.7245206861755802, |
|
"grad_norm": 1.384590744972229, |
|
"learning_rate": 7.563309454867295e-05, |
|
"loss": 1.4716, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.7265388496468214, |
|
"grad_norm": 1.3100378513336182, |
|
"learning_rate": 7.548968982077542e-05, |
|
"loss": 1.3972, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.7285570131180625, |
|
"grad_norm": 1.2014504671096802, |
|
"learning_rate": 7.534600122548765e-05, |
|
"loss": 1.392, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.7305751765893037, |
|
"grad_norm": 1.5689890384674072, |
|
"learning_rate": 7.520203036300588e-05, |
|
"loss": 1.4531, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.7325933400605449, |
|
"grad_norm": 1.7837287187576294, |
|
"learning_rate": 7.505777883666993e-05, |
|
"loss": 1.6061, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.7346115035317861, |
|
"grad_norm": 1.3635213375091553, |
|
"learning_rate": 7.491324825294514e-05, |
|
"loss": 1.4351, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.7366296670030272, |
|
"grad_norm": 1.3929572105407715, |
|
"learning_rate": 7.476844022140464e-05, |
|
"loss": 1.4991, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.7386478304742684, |
|
"grad_norm": 1.3147705793380737, |
|
"learning_rate": 7.462335635471136e-05, |
|
"loss": 1.4049, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.7406659939455096, |
|
"grad_norm": 1.5074125528335571, |
|
"learning_rate": 7.44779982686001e-05, |
|
"loss": 1.4351, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.7426841574167508, |
|
"grad_norm": 1.4625370502471924, |
|
"learning_rate": 7.43323675818595e-05, |
|
"loss": 1.3535, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.7447023208879919, |
|
"grad_norm": 1.6273128986358643, |
|
"learning_rate": 7.418646591631404e-05, |
|
"loss": 1.3886, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.7467204843592331, |
|
"grad_norm": 1.5632721185684204, |
|
"learning_rate": 7.404029489680598e-05, |
|
"loss": 1.4134, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7487386478304743, |
|
"grad_norm": 1.491721510887146, |
|
"learning_rate": 7.389385615117723e-05, |
|
"loss": 1.4279, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.7507568113017155, |
|
"grad_norm": 1.5549407005310059, |
|
"learning_rate": 7.37471513102513e-05, |
|
"loss": 1.3888, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.7527749747729566, |
|
"grad_norm": 1.4470081329345703, |
|
"learning_rate": 7.360018200781502e-05, |
|
"loss": 1.4272, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.7547931382441978, |
|
"grad_norm": 1.686854600906372, |
|
"learning_rate": 7.345294988060046e-05, |
|
"loss": 1.5853, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.756811301715439, |
|
"grad_norm": 1.5734236240386963, |
|
"learning_rate": 7.330545656826662e-05, |
|
"loss": 1.44, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.7588294651866802, |
|
"grad_norm": 1.3938251733779907, |
|
"learning_rate": 7.315770371338126e-05, |
|
"loss": 1.3882, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.7608476286579213, |
|
"grad_norm": 1.5192281007766724, |
|
"learning_rate": 7.300969296140244e-05, |
|
"loss": 1.5221, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.7628657921291625, |
|
"grad_norm": 1.7236813306808472, |
|
"learning_rate": 7.286142596066044e-05, |
|
"loss": 1.4553, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.7648839556004037, |
|
"grad_norm": 1.402596116065979, |
|
"learning_rate": 7.271290436233916e-05, |
|
"loss": 1.4925, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.7669021190716448, |
|
"grad_norm": 1.5744677782058716, |
|
"learning_rate": 7.25641298204579e-05, |
|
"loss": 1.4484, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.768920282542886, |
|
"grad_norm": 1.640576720237732, |
|
"learning_rate": 7.241510399185287e-05, |
|
"loss": 1.4277, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.7709384460141272, |
|
"grad_norm": 1.6124114990234375, |
|
"learning_rate": 7.226582853615874e-05, |
|
"loss": 1.3545, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.7729566094853683, |
|
"grad_norm": 1.7142858505249023, |
|
"learning_rate": 7.211630511579015e-05, |
|
"loss": 1.4184, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.7749747729566094, |
|
"grad_norm": 1.3614004850387573, |
|
"learning_rate": 7.196653539592326e-05, |
|
"loss": 1.4101, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.7769929364278506, |
|
"grad_norm": 1.3301514387130737, |
|
"learning_rate": 7.181652104447711e-05, |
|
"loss": 1.4297, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.7790110998990918, |
|
"grad_norm": 1.441169023513794, |
|
"learning_rate": 7.166626373209514e-05, |
|
"loss": 1.4615, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.781029263370333, |
|
"grad_norm": 1.4002233743667603, |
|
"learning_rate": 7.15157651321265e-05, |
|
"loss": 1.5348, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.7830474268415741, |
|
"grad_norm": 1.4642239809036255, |
|
"learning_rate": 7.136502692060746e-05, |
|
"loss": 1.5266, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.7850655903128153, |
|
"grad_norm": 1.4477249383926392, |
|
"learning_rate": 7.121405077624276e-05, |
|
"loss": 1.468, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.7870837537840565, |
|
"grad_norm": 1.3571120500564575, |
|
"learning_rate": 7.106283838038685e-05, |
|
"loss": 1.472, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7891019172552977, |
|
"grad_norm": 1.3332570791244507, |
|
"learning_rate": 7.091139141702527e-05, |
|
"loss": 1.3955, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.7911200807265388, |
|
"grad_norm": 1.5350067615509033, |
|
"learning_rate": 7.075971157275575e-05, |
|
"loss": 1.4683, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.79313824419778, |
|
"grad_norm": 1.4552041292190552, |
|
"learning_rate": 7.06078005367696e-05, |
|
"loss": 1.4189, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.7951564076690212, |
|
"grad_norm": 1.4249471426010132, |
|
"learning_rate": 7.045566000083278e-05, |
|
"loss": 1.4861, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.7971745711402624, |
|
"grad_norm": 1.3417631387710571, |
|
"learning_rate": 7.030329165926706e-05, |
|
"loss": 1.4106, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.7991927346115035, |
|
"grad_norm": 1.616866111755371, |
|
"learning_rate": 7.01506972089312e-05, |
|
"loss": 1.5251, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.8012108980827447, |
|
"grad_norm": 1.5517117977142334, |
|
"learning_rate": 6.999787834920202e-05, |
|
"loss": 1.388, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.8032290615539859, |
|
"grad_norm": 1.3729262351989746, |
|
"learning_rate": 6.984483678195553e-05, |
|
"loss": 1.4466, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.805247225025227, |
|
"grad_norm": 1.5104920864105225, |
|
"learning_rate": 6.969157421154789e-05, |
|
"loss": 1.488, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.8072653884964682, |
|
"grad_norm": 1.644098162651062, |
|
"learning_rate": 6.95380923447965e-05, |
|
"loss": 1.3932, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8092835519677094, |
|
"grad_norm": 1.4966418743133545, |
|
"learning_rate": 6.938439289096095e-05, |
|
"loss": 1.4094, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.8113017154389506, |
|
"grad_norm": 1.6125532388687134, |
|
"learning_rate": 6.923047756172401e-05, |
|
"loss": 1.4399, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.8133198789101918, |
|
"grad_norm": 1.3540160655975342, |
|
"learning_rate": 6.907634807117257e-05, |
|
"loss": 1.3183, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.8153380423814329, |
|
"grad_norm": 1.390666127204895, |
|
"learning_rate": 6.892200613577852e-05, |
|
"loss": 1.5065, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.8173562058526741, |
|
"grad_norm": 1.4623992443084717, |
|
"learning_rate": 6.876745347437964e-05, |
|
"loss": 1.4661, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.8193743693239153, |
|
"grad_norm": 1.251976728439331, |
|
"learning_rate": 6.861269180816052e-05, |
|
"loss": 1.3724, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.8213925327951564, |
|
"grad_norm": 1.4126683473587036, |
|
"learning_rate": 6.845772286063332e-05, |
|
"loss": 1.4495, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.8234106962663976, |
|
"grad_norm": 1.3776429891586304, |
|
"learning_rate": 6.830254835761856e-05, |
|
"loss": 1.4128, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.8254288597376388, |
|
"grad_norm": 1.5709989070892334, |
|
"learning_rate": 6.814717002722602e-05, |
|
"loss": 1.5365, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.82744702320888, |
|
"grad_norm": 1.326343059539795, |
|
"learning_rate": 6.799158959983536e-05, |
|
"loss": 1.4633, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.829465186680121, |
|
"grad_norm": 1.4777864217758179, |
|
"learning_rate": 6.78358088080769e-05, |
|
"loss": 1.5106, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.8314833501513622, |
|
"grad_norm": 1.595955729484558, |
|
"learning_rate": 6.767982938681239e-05, |
|
"loss": 1.5467, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.8335015136226034, |
|
"grad_norm": 1.611190915107727, |
|
"learning_rate": 6.752365307311556e-05, |
|
"loss": 1.4587, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.8355196770938446, |
|
"grad_norm": 1.2045243978500366, |
|
"learning_rate": 6.736728160625284e-05, |
|
"loss": 1.5403, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.8375378405650857, |
|
"grad_norm": 1.5161255598068237, |
|
"learning_rate": 6.721071672766406e-05, |
|
"loss": 1.4287, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.8395560040363269, |
|
"grad_norm": 1.5501888990402222, |
|
"learning_rate": 6.705396018094297e-05, |
|
"loss": 1.4729, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.8415741675075681, |
|
"grad_norm": 1.350813388824463, |
|
"learning_rate": 6.689701371181781e-05, |
|
"loss": 1.3742, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.8435923309788093, |
|
"grad_norm": 1.5314749479293823, |
|
"learning_rate": 6.673987906813191e-05, |
|
"loss": 1.355, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.8456104944500504, |
|
"grad_norm": 1.5933685302734375, |
|
"learning_rate": 6.658255799982424e-05, |
|
"loss": 1.4609, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.8476286579212916, |
|
"grad_norm": 1.429506778717041, |
|
"learning_rate": 6.642505225890987e-05, |
|
"loss": 1.3823, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.8496468213925328, |
|
"grad_norm": 1.5170999765396118, |
|
"learning_rate": 6.626736359946052e-05, |
|
"loss": 1.4732, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.851664984863774, |
|
"grad_norm": 1.3810912370681763, |
|
"learning_rate": 6.610949377758497e-05, |
|
"loss": 1.4792, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.8536831483350151, |
|
"grad_norm": 1.5815091133117676, |
|
"learning_rate": 6.595144455140952e-05, |
|
"loss": 1.4539, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.8557013118062563, |
|
"grad_norm": 1.4099366664886475, |
|
"learning_rate": 6.579321768105845e-05, |
|
"loss": 1.459, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.8577194752774975, |
|
"grad_norm": 1.529607892036438, |
|
"learning_rate": 6.563481492863436e-05, |
|
"loss": 1.434, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.8597376387487387, |
|
"grad_norm": 1.4503751993179321, |
|
"learning_rate": 6.547623805819854e-05, |
|
"loss": 1.3988, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.8617558022199798, |
|
"grad_norm": 1.3069409132003784, |
|
"learning_rate": 6.531748883575143e-05, |
|
"loss": 1.4464, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.863773965691221, |
|
"grad_norm": 1.5749943256378174, |
|
"learning_rate": 6.51585690292128e-05, |
|
"loss": 1.3765, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.8657921291624622, |
|
"grad_norm": 1.7546286582946777, |
|
"learning_rate": 6.499948040840219e-05, |
|
"loss": 1.4531, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.8678102926337034, |
|
"grad_norm": 1.4653708934783936, |
|
"learning_rate": 6.484022474501914e-05, |
|
"loss": 1.421, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8698284561049445, |
|
"grad_norm": 1.4347306489944458, |
|
"learning_rate": 6.468080381262347e-05, |
|
"loss": 1.4107, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.8718466195761857, |
|
"grad_norm": 1.6876280307769775, |
|
"learning_rate": 6.45212193866155e-05, |
|
"loss": 1.4524, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.8738647830474269, |
|
"grad_norm": 1.4129562377929688, |
|
"learning_rate": 6.436147324421635e-05, |
|
"loss": 1.3881, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.875882946518668, |
|
"grad_norm": 1.5428842306137085, |
|
"learning_rate": 6.420156716444805e-05, |
|
"loss": 1.3926, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.8779011099899092, |
|
"grad_norm": 1.4280976057052612, |
|
"learning_rate": 6.404150292811386e-05, |
|
"loss": 1.3836, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.8799192734611504, |
|
"grad_norm": 1.5553336143493652, |
|
"learning_rate": 6.388128231777828e-05, |
|
"loss": 1.5083, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.8819374369323916, |
|
"grad_norm": 1.3947172164916992, |
|
"learning_rate": 6.372090711774732e-05, |
|
"loss": 1.3669, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.8839556004036327, |
|
"grad_norm": 1.413853645324707, |
|
"learning_rate": 6.356037911404858e-05, |
|
"loss": 1.4119, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.8859737638748738, |
|
"grad_norm": 1.3096851110458374, |
|
"learning_rate": 6.339970009441137e-05, |
|
"loss": 1.391, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.887991927346115, |
|
"grad_norm": 1.2649918794631958, |
|
"learning_rate": 6.323887184824678e-05, |
|
"loss": 1.3865, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8900100908173562, |
|
"grad_norm": 1.4351869821548462, |
|
"learning_rate": 6.307789616662778e-05, |
|
"loss": 1.3554, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.8920282542885973, |
|
"grad_norm": 1.4802148342132568, |
|
"learning_rate": 6.291677484226929e-05, |
|
"loss": 1.5499, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.8940464177598385, |
|
"grad_norm": 1.6230851411819458, |
|
"learning_rate": 6.275550966950814e-05, |
|
"loss": 1.4822, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.8960645812310797, |
|
"grad_norm": 1.442018985748291, |
|
"learning_rate": 6.259410244428318e-05, |
|
"loss": 1.383, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.8980827447023209, |
|
"grad_norm": 1.493252158164978, |
|
"learning_rate": 6.243255496411519e-05, |
|
"loss": 1.4048, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.900100908173562, |
|
"grad_norm": 1.5293428897857666, |
|
"learning_rate": 6.227086902808697e-05, |
|
"loss": 1.5437, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.9021190716448032, |
|
"grad_norm": 1.3244189023971558, |
|
"learning_rate": 6.210904643682318e-05, |
|
"loss": 1.4652, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.9041372351160444, |
|
"grad_norm": 1.4349359273910522, |
|
"learning_rate": 6.194708899247037e-05, |
|
"loss": 1.4024, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.9061553985872856, |
|
"grad_norm": 1.5854321718215942, |
|
"learning_rate": 6.178499849867689e-05, |
|
"loss": 1.4496, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.9081735620585267, |
|
"grad_norm": 1.4568113088607788, |
|
"learning_rate": 6.162277676057284e-05, |
|
"loss": 1.4378, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.9101917255297679, |
|
"grad_norm": 1.3261117935180664, |
|
"learning_rate": 6.146042558474987e-05, |
|
"loss": 1.4257, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.9122098890010091, |
|
"grad_norm": 1.4419591426849365, |
|
"learning_rate": 6.129794677924113e-05, |
|
"loss": 1.4045, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.9142280524722503, |
|
"grad_norm": 1.3974449634552002, |
|
"learning_rate": 6.113534215350116e-05, |
|
"loss": 1.4281, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.9162462159434914, |
|
"grad_norm": 1.3392093181610107, |
|
"learning_rate": 6.097261351838569e-05, |
|
"loss": 1.5913, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.9182643794147326, |
|
"grad_norm": 1.3982704877853394, |
|
"learning_rate": 6.0809762686131474e-05, |
|
"loss": 1.391, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.9202825428859738, |
|
"grad_norm": 1.2605746984481812, |
|
"learning_rate": 6.064679147033614e-05, |
|
"loss": 1.4911, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.922300706357215, |
|
"grad_norm": 1.5178440809249878, |
|
"learning_rate": 6.0483701685937954e-05, |
|
"loss": 1.4428, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.9243188698284561, |
|
"grad_norm": 1.522623062133789, |
|
"learning_rate": 6.0320495149195644e-05, |
|
"loss": 1.395, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.9263370332996973, |
|
"grad_norm": 1.7341537475585938, |
|
"learning_rate": 6.015717367766815e-05, |
|
"loss": 1.4924, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.9283551967709385, |
|
"grad_norm": 1.4069305658340454, |
|
"learning_rate": 5.999373909019437e-05, |
|
"loss": 1.4476, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9303733602421796, |
|
"grad_norm": 1.2393527030944824, |
|
"learning_rate": 5.9830193206872974e-05, |
|
"loss": 1.4227, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.9323915237134208, |
|
"grad_norm": 1.2959623336791992, |
|
"learning_rate": 5.966653784904207e-05, |
|
"loss": 1.4123, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.934409687184662, |
|
"grad_norm": 1.6350857019424438, |
|
"learning_rate": 5.950277483925889e-05, |
|
"loss": 1.4116, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.9364278506559032, |
|
"grad_norm": 1.1435925960540771, |
|
"learning_rate": 5.933890600127958e-05, |
|
"loss": 1.4417, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.9384460141271443, |
|
"grad_norm": 1.4868639707565308, |
|
"learning_rate": 5.917493316003884e-05, |
|
"loss": 1.4769, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.9404641775983855, |
|
"grad_norm": 1.4463169574737549, |
|
"learning_rate": 5.90108581416296e-05, |
|
"loss": 1.4507, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.9424823410696267, |
|
"grad_norm": 1.3405494689941406, |
|
"learning_rate": 5.8846682773282694e-05, |
|
"loss": 1.4446, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.9445005045408678, |
|
"grad_norm": 1.4846607446670532, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 1.439, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.9465186680121089, |
|
"grad_norm": 1.2763575315475464, |
|
"learning_rate": 5.851803830126666e-05, |
|
"loss": 1.5239, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.9485368314833501, |
|
"grad_norm": 1.274949550628662, |
|
"learning_rate": 5.835357285756552e-05, |
|
"loss": 1.374, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9505549949545913, |
|
"grad_norm": 1.277209758758545, |
|
"learning_rate": 5.8189014383821914e-05, |
|
"loss": 1.3033, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.9525731584258325, |
|
"grad_norm": 1.395630955696106, |
|
"learning_rate": 5.8024364712650724e-05, |
|
"loss": 1.3979, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.9545913218970736, |
|
"grad_norm": 1.2895914316177368, |
|
"learning_rate": 5.785962567768243e-05, |
|
"loss": 1.376, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.9566094853683148, |
|
"grad_norm": 1.2383575439453125, |
|
"learning_rate": 5.769479911354273e-05, |
|
"loss": 1.4396, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.958627648839556, |
|
"grad_norm": 1.3582751750946045, |
|
"learning_rate": 5.7529886855832096e-05, |
|
"loss": 1.497, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.9606458123107972, |
|
"grad_norm": 1.4777917861938477, |
|
"learning_rate": 5.736489074110533e-05, |
|
"loss": 1.2924, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.9626639757820383, |
|
"grad_norm": 1.448093056678772, |
|
"learning_rate": 5.71998126068511e-05, |
|
"loss": 1.3533, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.9646821392532795, |
|
"grad_norm": 1.336315393447876, |
|
"learning_rate": 5.7034654291471524e-05, |
|
"loss": 1.4275, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.9667003027245207, |
|
"grad_norm": 1.2323698997497559, |
|
"learning_rate": 5.686941763426161e-05, |
|
"loss": 1.4587, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.9687184661957619, |
|
"grad_norm": 1.4090189933776855, |
|
"learning_rate": 5.670410447538889e-05, |
|
"loss": 1.3976, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.970736629667003, |
|
"grad_norm": 1.471298336982727, |
|
"learning_rate": 5.653871665587278e-05, |
|
"loss": 1.4002, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.9727547931382442, |
|
"grad_norm": 1.2523037195205688, |
|
"learning_rate": 5.6373256017564215e-05, |
|
"loss": 1.3906, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.9747729566094854, |
|
"grad_norm": 1.441237211227417, |
|
"learning_rate": 5.620772440312508e-05, |
|
"loss": 1.3976, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.9767911200807265, |
|
"grad_norm": 1.7172768115997314, |
|
"learning_rate": 5.6042123656007685e-05, |
|
"loss": 1.4364, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.9788092835519677, |
|
"grad_norm": 1.4941967725753784, |
|
"learning_rate": 5.587645562043422e-05, |
|
"loss": 1.4107, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.9808274470232089, |
|
"grad_norm": 1.4575670957565308, |
|
"learning_rate": 5.5710722141376245e-05, |
|
"loss": 1.342, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.9828456104944501, |
|
"grad_norm": 1.235521674156189, |
|
"learning_rate": 5.5544925064534145e-05, |
|
"loss": 1.2921, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.9848637739656912, |
|
"grad_norm": 1.5723048448562622, |
|
"learning_rate": 5.537906623631657e-05, |
|
"loss": 1.5273, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.9868819374369324, |
|
"grad_norm": 1.4097591638565063, |
|
"learning_rate": 5.521314750381983e-05, |
|
"loss": 1.3714, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.9889001009081736, |
|
"grad_norm": 1.4014616012573242, |
|
"learning_rate": 5.5047170714807406e-05, |
|
"loss": 1.3598, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9909182643794148, |
|
"grad_norm": 1.2477079629898071, |
|
"learning_rate": 5.4881137717689315e-05, |
|
"loss": 1.3501, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.992936427850656, |
|
"grad_norm": 1.3461527824401855, |
|
"learning_rate": 5.471505036150154e-05, |
|
"loss": 1.3813, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.9949545913218971, |
|
"grad_norm": 1.6173443794250488, |
|
"learning_rate": 5.454891049588544e-05, |
|
"loss": 1.5266, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.9969727547931383, |
|
"grad_norm": 1.351027011871338, |
|
"learning_rate": 5.438271997106712e-05, |
|
"loss": 1.395, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.9989909182643795, |
|
"grad_norm": 1.210114598274231, |
|
"learning_rate": 5.421648063783689e-05, |
|
"loss": 1.284, |
|
"step": 4950 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 4955, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.387441374475059e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|