|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1942794016660199, |
|
"eval_steps": 1000, |
|
"global_step": 40000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.9856985041650495e-05, |
|
"grad_norm": 8.064935684204102, |
|
"learning_rate": 0.0, |
|
"loss": 3.4849, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014928492520825246, |
|
"grad_norm": 3.3433420658111572, |
|
"learning_rate": 0.00019998742849959144, |
|
"loss": 1.9038, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0029856985041650493, |
|
"grad_norm": 2.907883405685425, |
|
"learning_rate": 0.0001999731427036726, |
|
"loss": 1.608, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004478547756247574, |
|
"grad_norm": 3.2614288330078125, |
|
"learning_rate": 0.00019995885690775376, |
|
"loss": 1.5505, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.005971397008330099, |
|
"grad_norm": 3.8400654792785645, |
|
"learning_rate": 0.00019994457111183493, |
|
"loss": 1.5737, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.007464246260412624, |
|
"grad_norm": 3.3358442783355713, |
|
"learning_rate": 0.00019993028531591612, |
|
"loss": 1.567, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.008957095512495149, |
|
"grad_norm": 2.131911277770996, |
|
"learning_rate": 0.00019991599951999726, |
|
"loss": 1.5208, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.010449944764577673, |
|
"grad_norm": 3.180992364883423, |
|
"learning_rate": 0.00019990171372407845, |
|
"loss": 1.5586, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.011942794016660197, |
|
"grad_norm": 3.024989128112793, |
|
"learning_rate": 0.0001998874279281596, |
|
"loss": 1.5267, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.013435643268742723, |
|
"grad_norm": 3.4935102462768555, |
|
"learning_rate": 0.00019987314213224078, |
|
"loss": 1.5609, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.014928492520825247, |
|
"grad_norm": 3.565504550933838, |
|
"learning_rate": 0.00019985885633632194, |
|
"loss": 1.5112, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01642134177290777, |
|
"grad_norm": 2.0692882537841797, |
|
"learning_rate": 0.00019984457054040308, |
|
"loss": 1.5087, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.017914191024990297, |
|
"grad_norm": 4.110323905944824, |
|
"learning_rate": 0.00019983028474448427, |
|
"loss": 1.5073, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01940704027707282, |
|
"grad_norm": 2.8632736206054688, |
|
"learning_rate": 0.0001998159989485654, |
|
"loss": 1.472, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.020899889529155346, |
|
"grad_norm": 2.629347324371338, |
|
"learning_rate": 0.0001998017131526466, |
|
"loss": 1.5236, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.022392738781237872, |
|
"grad_norm": 3.696873188018799, |
|
"learning_rate": 0.00019978742735672774, |
|
"loss": 1.5216, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.023885588033320394, |
|
"grad_norm": 3.253305435180664, |
|
"learning_rate": 0.00019977314156080893, |
|
"loss": 1.5915, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02537843728540292, |
|
"grad_norm": 2.9587886333465576, |
|
"learning_rate": 0.0001997588557648901, |
|
"loss": 1.4862, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.026871286537485446, |
|
"grad_norm": 3.3311073780059814, |
|
"learning_rate": 0.00019974456996897126, |
|
"loss": 1.4703, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.02836413578956797, |
|
"grad_norm": 2.5960264205932617, |
|
"learning_rate": 0.00019973028417305243, |
|
"loss": 1.4818, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.029856985041650495, |
|
"grad_norm": 3.3142144680023193, |
|
"learning_rate": 0.0001997159983771336, |
|
"loss": 1.4746, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03134983429373302, |
|
"grad_norm": 3.5049827098846436, |
|
"learning_rate": 0.00019970171258121476, |
|
"loss": 1.442, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.03284268354581554, |
|
"grad_norm": 3.0218605995178223, |
|
"learning_rate": 0.00019968742678529592, |
|
"loss": 1.5265, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03433553279789807, |
|
"grad_norm": 2.936182975769043, |
|
"learning_rate": 0.00019967314098937709, |
|
"loss": 1.5174, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.035828382049980595, |
|
"grad_norm": 2.877253293991089, |
|
"learning_rate": 0.00019965885519345825, |
|
"loss": 1.4499, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03732123130206312, |
|
"grad_norm": 6.07016658782959, |
|
"learning_rate": 0.00019964456939753941, |
|
"loss": 1.4542, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.03881408055414564, |
|
"grad_norm": 2.1618189811706543, |
|
"learning_rate": 0.0001996302836016206, |
|
"loss": 1.4343, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.040306929806228166, |
|
"grad_norm": 2.8267719745635986, |
|
"learning_rate": 0.00019961599780570174, |
|
"loss": 1.463, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.04179977905831069, |
|
"grad_norm": 2.6036462783813477, |
|
"learning_rate": 0.00019960171200978294, |
|
"loss": 1.4557, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.04329262831039322, |
|
"grad_norm": 3.0187127590179443, |
|
"learning_rate": 0.00019958742621386407, |
|
"loss": 1.4472, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.044785477562475744, |
|
"grad_norm": 3.9822633266448975, |
|
"learning_rate": 0.00019957314041794526, |
|
"loss": 1.4384, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04627832681455826, |
|
"grad_norm": 2.919654607772827, |
|
"learning_rate": 0.0001995588546220264, |
|
"loss": 1.4969, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.04777117606664079, |
|
"grad_norm": 2.922963857650757, |
|
"learning_rate": 0.0001995445688261076, |
|
"loss": 1.4987, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.049264025318723315, |
|
"grad_norm": 2.9638512134552, |
|
"learning_rate": 0.00019953028303018876, |
|
"loss": 1.4722, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.05075687457080584, |
|
"grad_norm": 3.408391237258911, |
|
"learning_rate": 0.00019951599723426992, |
|
"loss": 1.4723, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.052249723822888366, |
|
"grad_norm": 3.023597240447998, |
|
"learning_rate": 0.0001995017114383511, |
|
"loss": 1.4478, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.05374257307497089, |
|
"grad_norm": 2.1655213832855225, |
|
"learning_rate": 0.00019948742564243225, |
|
"loss": 1.4071, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05523542232705341, |
|
"grad_norm": 3.796663999557495, |
|
"learning_rate": 0.00019947313984651342, |
|
"loss": 1.446, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.05672827157913594, |
|
"grad_norm": 3.0415594577789307, |
|
"learning_rate": 0.00019945885405059458, |
|
"loss": 1.4324, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.05822112083121846, |
|
"grad_norm": 2.524627685546875, |
|
"learning_rate": 0.00019944456825467575, |
|
"loss": 1.3963, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.05971397008330099, |
|
"grad_norm": 3.2881991863250732, |
|
"learning_rate": 0.0001994302824587569, |
|
"loss": 1.4522, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.061206819335383515, |
|
"grad_norm": 3.392430067062378, |
|
"learning_rate": 0.00019941599666283808, |
|
"loss": 1.4329, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.06269966858746603, |
|
"grad_norm": 3.9426393508911133, |
|
"learning_rate": 0.00019940171086691927, |
|
"loss": 1.5203, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.06419251783954856, |
|
"grad_norm": 3.3737235069274902, |
|
"learning_rate": 0.0001993874250710004, |
|
"loss": 1.3674, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.06568536709163109, |
|
"grad_norm": 3.783085346221924, |
|
"learning_rate": 0.0001993731392750816, |
|
"loss": 1.4339, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.06717821634371361, |
|
"grad_norm": 3.4819202423095703, |
|
"learning_rate": 0.00019935885347916273, |
|
"loss": 1.4436, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.06867106559579614, |
|
"grad_norm": 3.141775608062744, |
|
"learning_rate": 0.00019934456768324393, |
|
"loss": 1.4683, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.07016391484787866, |
|
"grad_norm": 3.2881035804748535, |
|
"learning_rate": 0.00019933028188732506, |
|
"loss": 1.4395, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.07165676409996119, |
|
"grad_norm": 3.718122959136963, |
|
"learning_rate": 0.00019931599609140626, |
|
"loss": 1.4396, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.07314961335204372, |
|
"grad_norm": 4.3829474449157715, |
|
"learning_rate": 0.00019930171029548742, |
|
"loss": 1.4477, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.07464246260412624, |
|
"grad_norm": 3.3698525428771973, |
|
"learning_rate": 0.00019928742449956858, |
|
"loss": 1.3529, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.07613531185620875, |
|
"grad_norm": 3.7569565773010254, |
|
"learning_rate": 0.00019927313870364975, |
|
"loss": 1.4246, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.07762816110829128, |
|
"grad_norm": 3.1486406326293945, |
|
"learning_rate": 0.00019925885290773091, |
|
"loss": 1.3813, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.0791210103603738, |
|
"grad_norm": 4.0635480880737305, |
|
"learning_rate": 0.00019924456711181208, |
|
"loss": 1.4259, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.08061385961245633, |
|
"grad_norm": 3.2710611820220947, |
|
"learning_rate": 0.00019923028131589324, |
|
"loss": 1.3747, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.08210670886453886, |
|
"grad_norm": 3.4968345165252686, |
|
"learning_rate": 0.0001992159955199744, |
|
"loss": 1.4721, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.08359955811662138, |
|
"grad_norm": 4.274214267730713, |
|
"learning_rate": 0.00019920170972405557, |
|
"loss": 1.437, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.08509240736870391, |
|
"grad_norm": 2.970602512359619, |
|
"learning_rate": 0.00019918742392813674, |
|
"loss": 1.3336, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.08658525662078644, |
|
"grad_norm": 4.143342971801758, |
|
"learning_rate": 0.00019917313813221793, |
|
"loss": 1.4264, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.08807810587286896, |
|
"grad_norm": 3.7546920776367188, |
|
"learning_rate": 0.00019915885233629907, |
|
"loss": 1.441, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.08957095512495149, |
|
"grad_norm": 3.9160516262054443, |
|
"learning_rate": 0.00019914456654038026, |
|
"loss": 1.4261, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.09106380437703401, |
|
"grad_norm": 3.842073917388916, |
|
"learning_rate": 0.0001991302807444614, |
|
"loss": 1.4076, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.09255665362911653, |
|
"grad_norm": 4.392395496368408, |
|
"learning_rate": 0.0001991159949485426, |
|
"loss": 1.3789, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.09404950288119905, |
|
"grad_norm": 3.822425603866577, |
|
"learning_rate": 0.00019910170915262373, |
|
"loss": 1.3877, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.09554235213328158, |
|
"grad_norm": 3.1348562240600586, |
|
"learning_rate": 0.0001990874233567049, |
|
"loss": 1.4081, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.0970352013853641, |
|
"grad_norm": 3.453887939453125, |
|
"learning_rate": 0.00019907313756078608, |
|
"loss": 1.4143, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.09852805063744663, |
|
"grad_norm": 3.5057384967803955, |
|
"learning_rate": 0.00019905885176486722, |
|
"loss": 1.4264, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.10002089988952916, |
|
"grad_norm": 3.145796060562134, |
|
"learning_rate": 0.0001990445659689484, |
|
"loss": 1.4368, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.10151374914161168, |
|
"grad_norm": 3.4077043533325195, |
|
"learning_rate": 0.00019903028017302955, |
|
"loss": 1.388, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.10300659839369421, |
|
"grad_norm": 3.65567946434021, |
|
"learning_rate": 0.00019901599437711074, |
|
"loss": 1.42, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.10449944764577673, |
|
"grad_norm": 4.460702419281006, |
|
"learning_rate": 0.0001990017085811919, |
|
"loss": 1.3991, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.10599229689785926, |
|
"grad_norm": 4.155653476715088, |
|
"learning_rate": 0.00019898742278527307, |
|
"loss": 1.371, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.10748514614994178, |
|
"grad_norm": 3.8904318809509277, |
|
"learning_rate": 0.00019897313698935423, |
|
"loss": 1.4378, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.1089779954020243, |
|
"grad_norm": 4.0509233474731445, |
|
"learning_rate": 0.0001989588511934354, |
|
"loss": 1.3945, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.11047084465410682, |
|
"grad_norm": 3.785123109817505, |
|
"learning_rate": 0.00019894456539751656, |
|
"loss": 1.436, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.11196369390618935, |
|
"grad_norm": 3.4556167125701904, |
|
"learning_rate": 0.00019893027960159773, |
|
"loss": 1.3794, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.11345654315827187, |
|
"grad_norm": 4.0479559898376465, |
|
"learning_rate": 0.0001989159938056789, |
|
"loss": 1.4734, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.1149493924103544, |
|
"grad_norm": 3.890805721282959, |
|
"learning_rate": 0.00019890170800976006, |
|
"loss": 1.4341, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.11644224166243693, |
|
"grad_norm": 3.8178727626800537, |
|
"learning_rate": 0.00019888742221384122, |
|
"loss": 1.4754, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.11793509091451945, |
|
"grad_norm": 2.456165075302124, |
|
"learning_rate": 0.00019887313641792241, |
|
"loss": 1.3887, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.11942794016660198, |
|
"grad_norm": 3.5763051509857178, |
|
"learning_rate": 0.00019885885062200355, |
|
"loss": 1.3901, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1209207894186845, |
|
"grad_norm": 3.885662317276001, |
|
"learning_rate": 0.00019884456482608474, |
|
"loss": 1.3856, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.12241363867076703, |
|
"grad_norm": 3.6095409393310547, |
|
"learning_rate": 0.00019883027903016588, |
|
"loss": 1.448, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.12390648792284956, |
|
"grad_norm": 3.7112534046173096, |
|
"learning_rate": 0.00019881599323424707, |
|
"loss": 1.3537, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.12539933717493207, |
|
"grad_norm": 3.3566672801971436, |
|
"learning_rate": 0.0001988017074383282, |
|
"loss": 1.4389, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.1268921864270146, |
|
"grad_norm": 4.570401191711426, |
|
"learning_rate": 0.0001987874216424094, |
|
"loss": 1.4191, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.12838503567909712, |
|
"grad_norm": 4.455029010772705, |
|
"learning_rate": 0.00019877313584649057, |
|
"loss": 1.3677, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.12987788493117966, |
|
"grad_norm": 3.0861828327178955, |
|
"learning_rate": 0.00019875885005057173, |
|
"loss": 1.3677, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.13137073418326217, |
|
"grad_norm": 4.419896602630615, |
|
"learning_rate": 0.0001987445642546529, |
|
"loss": 1.4524, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.1328635834353447, |
|
"grad_norm": 5.187576770782471, |
|
"learning_rate": 0.00019873027845873406, |
|
"loss": 1.3868, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.13435643268742722, |
|
"grad_norm": 5.111696243286133, |
|
"learning_rate": 0.00019871599266281523, |
|
"loss": 1.4458, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.13584928193950974, |
|
"grad_norm": 3.2652997970581055, |
|
"learning_rate": 0.0001987017068668964, |
|
"loss": 1.4529, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.13734213119159228, |
|
"grad_norm": 4.190273761749268, |
|
"learning_rate": 0.00019868742107097755, |
|
"loss": 1.3991, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.1388349804436748, |
|
"grad_norm": 4.85620641708374, |
|
"learning_rate": 0.00019867313527505872, |
|
"loss": 1.3916, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.14032782969575733, |
|
"grad_norm": 3.030954360961914, |
|
"learning_rate": 0.00019865884947913988, |
|
"loss": 1.3805, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.14182067894783984, |
|
"grad_norm": 3.264406681060791, |
|
"learning_rate": 0.00019864456368322108, |
|
"loss": 1.4048, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.14331352819992238, |
|
"grad_norm": 3.2138588428497314, |
|
"learning_rate": 0.0001986302778873022, |
|
"loss": 1.4092, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1448063774520049, |
|
"grad_norm": 3.847222328186035, |
|
"learning_rate": 0.0001986159920913834, |
|
"loss": 1.3871, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.14629922670408743, |
|
"grad_norm": 4.004987716674805, |
|
"learning_rate": 0.00019860170629546454, |
|
"loss": 1.3845, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.14779207595616994, |
|
"grad_norm": 3.5088725090026855, |
|
"learning_rate": 0.00019858742049954573, |
|
"loss": 1.379, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.14928492520825248, |
|
"grad_norm": 3.275099277496338, |
|
"learning_rate": 0.00019857313470362687, |
|
"loss": 1.3628, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.150777774460335, |
|
"grad_norm": 3.7903060913085938, |
|
"learning_rate": 0.00019855884890770806, |
|
"loss": 1.3804, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.1522706237124175, |
|
"grad_norm": 4.294798374176025, |
|
"learning_rate": 0.00019854456311178923, |
|
"loss": 1.3988, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.15376347296450005, |
|
"grad_norm": 3.2719295024871826, |
|
"learning_rate": 0.0001985302773158704, |
|
"loss": 1.387, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.15525632221658256, |
|
"grad_norm": 4.143224239349365, |
|
"learning_rate": 0.00019851599151995156, |
|
"loss": 1.393, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.1567491714686651, |
|
"grad_norm": 3.404754638671875, |
|
"learning_rate": 0.00019850170572403272, |
|
"loss": 1.4205, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.1582420207207476, |
|
"grad_norm": 3.607126474380493, |
|
"learning_rate": 0.0001984874199281139, |
|
"loss": 1.4239, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.15973486997283015, |
|
"grad_norm": 4.140823841094971, |
|
"learning_rate": 0.00019847313413219505, |
|
"loss": 1.4204, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.16122771922491266, |
|
"grad_norm": 3.893251419067383, |
|
"learning_rate": 0.00019845884833627622, |
|
"loss": 1.392, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.1627205684769952, |
|
"grad_norm": 4.304211139678955, |
|
"learning_rate": 0.00019844456254035738, |
|
"loss": 1.44, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.16421341772907772, |
|
"grad_norm": 5.273501873016357, |
|
"learning_rate": 0.00019843027674443855, |
|
"loss": 1.445, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.16570626698116026, |
|
"grad_norm": 4.787700176239014, |
|
"learning_rate": 0.00019841599094851974, |
|
"loss": 1.3668, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.16719911623324277, |
|
"grad_norm": 3.7984108924865723, |
|
"learning_rate": 0.00019840170515260087, |
|
"loss": 1.3523, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.16869196548532528, |
|
"grad_norm": 3.885608673095703, |
|
"learning_rate": 0.00019838741935668207, |
|
"loss": 1.3917, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.17018481473740782, |
|
"grad_norm": 3.459803342819214, |
|
"learning_rate": 0.0001983731335607632, |
|
"loss": 1.3833, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.17167766398949033, |
|
"grad_norm": 3.7103006839752197, |
|
"learning_rate": 0.0001983588477648444, |
|
"loss": 1.4473, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.17317051324157287, |
|
"grad_norm": 6.645928382873535, |
|
"learning_rate": 0.00019834456196892553, |
|
"loss": 1.3706, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.17466336249365538, |
|
"grad_norm": 3.7201037406921387, |
|
"learning_rate": 0.0001983302761730067, |
|
"loss": 1.3733, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.17615621174573792, |
|
"grad_norm": 4.050106048583984, |
|
"learning_rate": 0.0001983159903770879, |
|
"loss": 1.4096, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.17764906099782043, |
|
"grad_norm": 4.190842628479004, |
|
"learning_rate": 0.00019830170458116903, |
|
"loss": 1.4404, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.17914191024990297, |
|
"grad_norm": 4.393162727355957, |
|
"learning_rate": 0.00019828741878525022, |
|
"loss": 1.4443, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.1806347595019855, |
|
"grad_norm": 3.597520351409912, |
|
"learning_rate": 0.00019827313298933136, |
|
"loss": 1.4063, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.18212760875406803, |
|
"grad_norm": 3.608085870742798, |
|
"learning_rate": 0.00019825884719341255, |
|
"loss": 1.3857, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.18362045800615054, |
|
"grad_norm": 3.7055492401123047, |
|
"learning_rate": 0.0001982445613974937, |
|
"loss": 1.3997, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.18511330725823305, |
|
"grad_norm": 3.875457763671875, |
|
"learning_rate": 0.00019823027560157488, |
|
"loss": 1.4296, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.1866061565103156, |
|
"grad_norm": 5.074592590332031, |
|
"learning_rate": 0.00019821598980565604, |
|
"loss": 1.3785, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.1880990057623981, |
|
"grad_norm": 6.013392448425293, |
|
"learning_rate": 0.0001982017040097372, |
|
"loss": 1.4391, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.18959185501448064, |
|
"grad_norm": 5.679958820343018, |
|
"learning_rate": 0.00019818741821381837, |
|
"loss": 1.367, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.19108470426656315, |
|
"grad_norm": 3.6182546615600586, |
|
"learning_rate": 0.00019817313241789954, |
|
"loss": 1.4508, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.1925775535186457, |
|
"grad_norm": 5.209213733673096, |
|
"learning_rate": 0.0001981588466219807, |
|
"loss": 1.3878, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.1940704027707282, |
|
"grad_norm": 3.0043230056762695, |
|
"learning_rate": 0.00019814456082606187, |
|
"loss": 1.4248, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.19556325202281075, |
|
"grad_norm": 3.157851219177246, |
|
"learning_rate": 0.00019813027503014303, |
|
"loss": 1.3725, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.19705610127489326, |
|
"grad_norm": 3.5292418003082275, |
|
"learning_rate": 0.0001981159892342242, |
|
"loss": 1.3932, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.1985489505269758, |
|
"grad_norm": 3.2819600105285645, |
|
"learning_rate": 0.00019810170343830536, |
|
"loss": 1.3495, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.2000417997790583, |
|
"grad_norm": 3.0243399143218994, |
|
"learning_rate": 0.00019808741764238655, |
|
"loss": 1.3689, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.20153464903114082, |
|
"grad_norm": 3.4495368003845215, |
|
"learning_rate": 0.0001980731318464677, |
|
"loss": 1.3725, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.20302749828322336, |
|
"grad_norm": 3.538259744644165, |
|
"learning_rate": 0.00019805884605054888, |
|
"loss": 1.3905, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.20452034753530587, |
|
"grad_norm": 4.162181377410889, |
|
"learning_rate": 0.00019804456025463002, |
|
"loss": 1.4129, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.20601319678738841, |
|
"grad_norm": 4.592432022094727, |
|
"learning_rate": 0.0001980302744587112, |
|
"loss": 1.3634, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.20750604603947093, |
|
"grad_norm": 3.45967960357666, |
|
"learning_rate": 0.00019801598866279237, |
|
"loss": 1.416, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.20899889529155347, |
|
"grad_norm": 4.221930503845215, |
|
"learning_rate": 0.00019800170286687354, |
|
"loss": 1.4051, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.21049174454363598, |
|
"grad_norm": 4.144239902496338, |
|
"learning_rate": 0.0001979874170709547, |
|
"loss": 1.4219, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.21198459379571852, |
|
"grad_norm": 4.7492570877075195, |
|
"learning_rate": 0.00019797313127503587, |
|
"loss": 1.4028, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.21347744304780103, |
|
"grad_norm": 3.5841355323791504, |
|
"learning_rate": 0.00019795884547911703, |
|
"loss": 1.4361, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.21497029229988357, |
|
"grad_norm": 4.662593364715576, |
|
"learning_rate": 0.0001979445596831982, |
|
"loss": 1.3816, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.21646314155196608, |
|
"grad_norm": 4.700701713562012, |
|
"learning_rate": 0.00019793027388727936, |
|
"loss": 1.4226, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.2179559908040486, |
|
"grad_norm": 4.025181293487549, |
|
"learning_rate": 0.00019791598809136053, |
|
"loss": 1.4291, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.21944884005613113, |
|
"grad_norm": 3.064573049545288, |
|
"learning_rate": 0.0001979017022954417, |
|
"loss": 1.4293, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.22094168930821365, |
|
"grad_norm": 6.342152118682861, |
|
"learning_rate": 0.00019788741649952288, |
|
"loss": 1.4173, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.22243453856029619, |
|
"grad_norm": 5.89996337890625, |
|
"learning_rate": 0.00019787313070360402, |
|
"loss": 1.396, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.2239273878123787, |
|
"grad_norm": 4.462945938110352, |
|
"learning_rate": 0.0001978588449076852, |
|
"loss": 1.3868, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.22542023706446124, |
|
"grad_norm": 3.6449055671691895, |
|
"learning_rate": 0.00019784455911176635, |
|
"loss": 1.396, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.22691308631654375, |
|
"grad_norm": 4.674243927001953, |
|
"learning_rate": 0.00019783027331584754, |
|
"loss": 1.395, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.2284059355686263, |
|
"grad_norm": 3.6160385608673096, |
|
"learning_rate": 0.00019781598751992868, |
|
"loss": 1.3918, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.2298987848207088, |
|
"grad_norm": 4.326193332672119, |
|
"learning_rate": 0.00019780170172400987, |
|
"loss": 1.3947, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.23139163407279134, |
|
"grad_norm": 5.4003777503967285, |
|
"learning_rate": 0.00019778741592809104, |
|
"loss": 1.344, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.23288448332487385, |
|
"grad_norm": 4.711580753326416, |
|
"learning_rate": 0.0001977731301321722, |
|
"loss": 1.3959, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.23437733257695637, |
|
"grad_norm": 3.4752814769744873, |
|
"learning_rate": 0.00019775884433625337, |
|
"loss": 1.3722, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.2358701818290389, |
|
"grad_norm": 4.028527736663818, |
|
"learning_rate": 0.00019774455854033453, |
|
"loss": 1.3683, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.23736303108112142, |
|
"grad_norm": 4.094334602355957, |
|
"learning_rate": 0.0001977302727444157, |
|
"loss": 1.3607, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.23885588033320396, |
|
"grad_norm": 5.232580661773682, |
|
"learning_rate": 0.00019771598694849686, |
|
"loss": 1.4354, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.24034872958528647, |
|
"grad_norm": 4.269852161407471, |
|
"learning_rate": 0.00019770170115257802, |
|
"loss": 1.4372, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.241841578837369, |
|
"grad_norm": 3.312541961669922, |
|
"learning_rate": 0.0001976874153566592, |
|
"loss": 1.3826, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.24333442808945152, |
|
"grad_norm": 3.8900692462921143, |
|
"learning_rate": 0.00019767312956074035, |
|
"loss": 1.4189, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.24482727734153406, |
|
"grad_norm": 3.894512414932251, |
|
"learning_rate": 0.00019765884376482155, |
|
"loss": 1.3365, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.24632012659361657, |
|
"grad_norm": 4.644411563873291, |
|
"learning_rate": 0.00019764455796890268, |
|
"loss": 1.4311, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.2478129758456991, |
|
"grad_norm": 8.174029350280762, |
|
"learning_rate": 0.00019763027217298387, |
|
"loss": 1.361, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.24930582509778162, |
|
"grad_norm": 4.615732192993164, |
|
"learning_rate": 0.000197615986377065, |
|
"loss": 1.4552, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.25079867434986414, |
|
"grad_norm": 4.421249866485596, |
|
"learning_rate": 0.0001976017005811462, |
|
"loss": 1.3463, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.2522915236019467, |
|
"grad_norm": 2.8386716842651367, |
|
"learning_rate": 0.00019758741478522734, |
|
"loss": 1.348, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.2537843728540292, |
|
"grad_norm": 4.3141703605651855, |
|
"learning_rate": 0.0001975731289893085, |
|
"loss": 1.4306, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.2552772221061117, |
|
"grad_norm": 3.947331428527832, |
|
"learning_rate": 0.0001975588431933897, |
|
"loss": 1.3823, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.25677007135819424, |
|
"grad_norm": 3.2268636226654053, |
|
"learning_rate": 0.00019754455739747084, |
|
"loss": 1.4199, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.2582629206102768, |
|
"grad_norm": 4.0353102684021, |
|
"learning_rate": 0.00019753027160155203, |
|
"loss": 1.3927, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.2597557698623593, |
|
"grad_norm": 3.490560293197632, |
|
"learning_rate": 0.00019751598580563316, |
|
"loss": 1.401, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.2612486191144418, |
|
"grad_norm": 5.577207088470459, |
|
"learning_rate": 0.00019750170000971436, |
|
"loss": 1.3586, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.26274146836652434, |
|
"grad_norm": 4.168467998504639, |
|
"learning_rate": 0.0001974874142137955, |
|
"loss": 1.3303, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.2642343176186069, |
|
"grad_norm": 3.812627077102661, |
|
"learning_rate": 0.00019747312841787669, |
|
"loss": 1.3717, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.2657271668706894, |
|
"grad_norm": 4.875237464904785, |
|
"learning_rate": 0.00019745884262195785, |
|
"loss": 1.3873, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.2672200161227719, |
|
"grad_norm": 4.048189163208008, |
|
"learning_rate": 0.00019744455682603902, |
|
"loss": 1.3775, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.26871286537485445, |
|
"grad_norm": 3.9090261459350586, |
|
"learning_rate": 0.00019743027103012018, |
|
"loss": 1.4296, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.270205714626937, |
|
"grad_norm": 2.8476953506469727, |
|
"learning_rate": 0.00019741598523420134, |
|
"loss": 1.4175, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.2716985638790195, |
|
"grad_norm": 5.782102584838867, |
|
"learning_rate": 0.0001974016994382825, |
|
"loss": 1.3835, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.273191413131102, |
|
"grad_norm": 4.640264987945557, |
|
"learning_rate": 0.00019738741364236367, |
|
"loss": 1.4524, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.27468426238318455, |
|
"grad_norm": 4.81790828704834, |
|
"learning_rate": 0.00019737312784644484, |
|
"loss": 1.3183, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.2761771116352671, |
|
"grad_norm": 2.685009717941284, |
|
"learning_rate": 0.000197358842050526, |
|
"loss": 1.3243, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.2776699608873496, |
|
"grad_norm": 5.321321487426758, |
|
"learning_rate": 0.00019734455625460717, |
|
"loss": 1.4086, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.2791628101394321, |
|
"grad_norm": 3.065791368484497, |
|
"learning_rate": 0.00019733027045868836, |
|
"loss": 1.337, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.28065565939151466, |
|
"grad_norm": 4.3569817543029785, |
|
"learning_rate": 0.0001973159846627695, |
|
"loss": 1.4082, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.2821485086435972, |
|
"grad_norm": 4.67582368850708, |
|
"learning_rate": 0.0001973016988668507, |
|
"loss": 1.3832, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.2836413578956797, |
|
"grad_norm": 4.942144870758057, |
|
"learning_rate": 0.00019728741307093183, |
|
"loss": 1.3734, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.2851342071477622, |
|
"grad_norm": 4.853246688842773, |
|
"learning_rate": 0.00019727312727501302, |
|
"loss": 1.4111, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.28662705639984476, |
|
"grad_norm": 3.071237325668335, |
|
"learning_rate": 0.00019725884147909418, |
|
"loss": 1.3746, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.28811990565192724, |
|
"grad_norm": 4.844615459442139, |
|
"learning_rate": 0.00019724455568317535, |
|
"loss": 1.3051, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.2896127549040098, |
|
"grad_norm": 5.954223155975342, |
|
"learning_rate": 0.0001972302698872565, |
|
"loss": 1.4131, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.2911056041560923, |
|
"grad_norm": 3.6717801094055176, |
|
"learning_rate": 0.00019721598409133768, |
|
"loss": 1.4166, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.29259845340817486, |
|
"grad_norm": 3.6257095336914062, |
|
"learning_rate": 0.00019720169829541884, |
|
"loss": 1.3679, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.29409130266025735, |
|
"grad_norm": 4.245635032653809, |
|
"learning_rate": 0.0001971874124995, |
|
"loss": 1.3171, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.2955841519123399, |
|
"grad_norm": 5.362602710723877, |
|
"learning_rate": 0.00019717312670358117, |
|
"loss": 1.3932, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.2970770011644224, |
|
"grad_norm": 4.6283721923828125, |
|
"learning_rate": 0.00019715884090766234, |
|
"loss": 1.3757, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.29856985041650497, |
|
"grad_norm": 4.299574851989746, |
|
"learning_rate": 0.0001971445551117435, |
|
"loss": 1.4018, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.30006269966858745, |
|
"grad_norm": 4.7913641929626465, |
|
"learning_rate": 0.0001971302693158247, |
|
"loss": 1.4228, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.30155554892067, |
|
"grad_norm": 5.312823295593262, |
|
"learning_rate": 0.00019711598351990583, |
|
"loss": 1.3866, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.30304839817275253, |
|
"grad_norm": 4.099662780761719, |
|
"learning_rate": 0.00019710169772398702, |
|
"loss": 1.454, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.304541247424835, |
|
"grad_norm": 4.254878520965576, |
|
"learning_rate": 0.00019708741192806816, |
|
"loss": 1.3526, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.30603409667691756, |
|
"grad_norm": 4.056606292724609, |
|
"learning_rate": 0.00019707312613214935, |
|
"loss": 1.4167, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.3075269459290001, |
|
"grad_norm": 3.790809154510498, |
|
"learning_rate": 0.0001970588403362305, |
|
"loss": 1.3536, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.30901979518108263, |
|
"grad_norm": 4.46298360824585, |
|
"learning_rate": 0.00019704455454031168, |
|
"loss": 1.3613, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.3105126444331651, |
|
"grad_norm": 4.52452278137207, |
|
"learning_rate": 0.00019703026874439284, |
|
"loss": 1.4591, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.31200549368524766, |
|
"grad_norm": 4.735177040100098, |
|
"learning_rate": 0.000197015982948474, |
|
"loss": 1.4617, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.3134983429373302, |
|
"grad_norm": 4.48261833190918, |
|
"learning_rate": 0.00019700169715255517, |
|
"loss": 1.4072, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.31499119218941274, |
|
"grad_norm": 3.3441503047943115, |
|
"learning_rate": 0.00019698741135663634, |
|
"loss": 1.4082, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.3164840414414952, |
|
"grad_norm": 3.9771218299865723, |
|
"learning_rate": 0.0001969731255607175, |
|
"loss": 1.4184, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.31797689069357776, |
|
"grad_norm": 6.366194725036621, |
|
"learning_rate": 0.00019695883976479867, |
|
"loss": 1.387, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.3194697399456603, |
|
"grad_norm": 5.072678089141846, |
|
"learning_rate": 0.00019694455396887983, |
|
"loss": 1.3996, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.3209625891977428, |
|
"grad_norm": 3.7204978466033936, |
|
"learning_rate": 0.000196930268172961, |
|
"loss": 1.3774, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.3224554384498253, |
|
"grad_norm": 4.47731351852417, |
|
"learning_rate": 0.00019691598237704216, |
|
"loss": 1.3552, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.32394828770190787, |
|
"grad_norm": 3.4569220542907715, |
|
"learning_rate": 0.00019690169658112335, |
|
"loss": 1.3794, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.3254411369539904, |
|
"grad_norm": 4.344145774841309, |
|
"learning_rate": 0.0001968874107852045, |
|
"loss": 1.4419, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.3269339862060729, |
|
"grad_norm": 4.089848041534424, |
|
"learning_rate": 0.00019687312498928568, |
|
"loss": 1.4116, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.32842683545815543, |
|
"grad_norm": 3.995945930480957, |
|
"learning_rate": 0.00019685883919336682, |
|
"loss": 1.3532, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.32991968471023797, |
|
"grad_norm": 3.8309378623962402, |
|
"learning_rate": 0.00019684455339744798, |
|
"loss": 1.3957, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.3314125339623205, |
|
"grad_norm": 4.386235237121582, |
|
"learning_rate": 0.00019683026760152915, |
|
"loss": 1.3616, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.332905383214403, |
|
"grad_norm": 5.133239269256592, |
|
"learning_rate": 0.00019681598180561031, |
|
"loss": 1.3959, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.33439823246648553, |
|
"grad_norm": 4.216183662414551, |
|
"learning_rate": 0.0001968016960096915, |
|
"loss": 1.3446, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.3358910817185681, |
|
"grad_norm": 3.631131172180176, |
|
"learning_rate": 0.00019678741021377264, |
|
"loss": 1.3779, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.33738393097065056, |
|
"grad_norm": 4.603448390960693, |
|
"learning_rate": 0.00019677312441785384, |
|
"loss": 1.3962, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.3388767802227331, |
|
"grad_norm": 3.6482913494110107, |
|
"learning_rate": 0.00019675883862193497, |
|
"loss": 1.3931, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.34036962947481564, |
|
"grad_norm": 5.040388107299805, |
|
"learning_rate": 0.00019674455282601616, |
|
"loss": 1.3465, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.3418624787268982, |
|
"grad_norm": 5.762825012207031, |
|
"learning_rate": 0.0001967302670300973, |
|
"loss": 1.3577, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.34335532797898066, |
|
"grad_norm": 4.941501617431641, |
|
"learning_rate": 0.0001967159812341785, |
|
"loss": 1.3676, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.3448481772310632, |
|
"grad_norm": 5.368370532989502, |
|
"learning_rate": 0.00019670169543825966, |
|
"loss": 1.4265, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.34634102648314574, |
|
"grad_norm": 4.931522369384766, |
|
"learning_rate": 0.00019668740964234082, |
|
"loss": 1.3551, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.3478338757352283, |
|
"grad_norm": 3.9685990810394287, |
|
"learning_rate": 0.000196673123846422, |
|
"loss": 1.402, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.34932672498731077, |
|
"grad_norm": 5.771200656890869, |
|
"learning_rate": 0.00019665883805050315, |
|
"loss": 1.3596, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.3508195742393933, |
|
"grad_norm": 5.142852306365967, |
|
"learning_rate": 0.00019664455225458432, |
|
"loss": 1.385, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.35231242349147585, |
|
"grad_norm": 3.295628786087036, |
|
"learning_rate": 0.00019663026645866548, |
|
"loss": 1.3454, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.35380527274355833, |
|
"grad_norm": 4.34658145904541, |
|
"learning_rate": 0.00019661598066274665, |
|
"loss": 1.3976, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.35529812199564087, |
|
"grad_norm": 4.032591819763184, |
|
"learning_rate": 0.0001966016948668278, |
|
"loss": 1.3571, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.3567909712477234, |
|
"grad_norm": 3.9286158084869385, |
|
"learning_rate": 0.00019658740907090898, |
|
"loss": 1.398, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.35828382049980595, |
|
"grad_norm": 5.184597492218018, |
|
"learning_rate": 0.00019657312327499017, |
|
"loss": 1.3827, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.35977666975188843, |
|
"grad_norm": 4.4749226570129395, |
|
"learning_rate": 0.0001965588374790713, |
|
"loss": 1.373, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.361269519003971, |
|
"grad_norm": 3.5633764266967773, |
|
"learning_rate": 0.0001965445516831525, |
|
"loss": 1.3704, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.3627623682560535, |
|
"grad_norm": 7.570897102355957, |
|
"learning_rate": 0.00019653026588723363, |
|
"loss": 1.3515, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.36425521750813605, |
|
"grad_norm": 4.239411354064941, |
|
"learning_rate": 0.00019651598009131483, |
|
"loss": 1.3813, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.36574806676021854, |
|
"grad_norm": 3.8941049575805664, |
|
"learning_rate": 0.00019650169429539596, |
|
"loss": 1.3954, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.3672409160123011, |
|
"grad_norm": 4.8694586753845215, |
|
"learning_rate": 0.00019648740849947716, |
|
"loss": 1.3531, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.3687337652643836, |
|
"grad_norm": 3.914964437484741, |
|
"learning_rate": 0.00019647312270355832, |
|
"loss": 1.3954, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.3702266145164661, |
|
"grad_norm": 3.4050538539886475, |
|
"learning_rate": 0.00019645883690763948, |
|
"loss": 1.3498, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.37171946376854864, |
|
"grad_norm": 4.436797618865967, |
|
"learning_rate": 0.00019644455111172065, |
|
"loss": 1.3889, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.3732123130206312, |
|
"grad_norm": 2.7660670280456543, |
|
"learning_rate": 0.00019643026531580181, |
|
"loss": 1.3392, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.3747051622727137, |
|
"grad_norm": 5.364072799682617, |
|
"learning_rate": 0.00019641597951988298, |
|
"loss": 1.3157, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.3761980115247962, |
|
"grad_norm": 5.123339653015137, |
|
"learning_rate": 0.00019640169372396414, |
|
"loss": 1.3523, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.37769086077687875, |
|
"grad_norm": 3.4495439529418945, |
|
"learning_rate": 0.0001963874079280453, |
|
"loss": 1.3331, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.3791837100289613, |
|
"grad_norm": 4.613680362701416, |
|
"learning_rate": 0.00019637312213212647, |
|
"loss": 1.3707, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.3806765592810438, |
|
"grad_norm": 3.5819404125213623, |
|
"learning_rate": 0.00019635883633620764, |
|
"loss": 1.4023, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.3821694085331263, |
|
"grad_norm": 3.5075576305389404, |
|
"learning_rate": 0.00019634455054028883, |
|
"loss": 1.3815, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.38366225778520885, |
|
"grad_norm": 4.425256729125977, |
|
"learning_rate": 0.00019633026474436997, |
|
"loss": 1.3801, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.3851551070372914, |
|
"grad_norm": 3.5711112022399902, |
|
"learning_rate": 0.00019631597894845116, |
|
"loss": 1.3375, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.3866479562893739, |
|
"grad_norm": 5.728016376495361, |
|
"learning_rate": 0.0001963016931525323, |
|
"loss": 1.3914, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.3881408055414564, |
|
"grad_norm": 4.294504642486572, |
|
"learning_rate": 0.0001962874073566135, |
|
"loss": 1.4561, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.38963365479353895, |
|
"grad_norm": 5.329941749572754, |
|
"learning_rate": 0.00019627312156069465, |
|
"loss": 1.4018, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.3911265040456215, |
|
"grad_norm": 4.166362762451172, |
|
"learning_rate": 0.00019625883576477582, |
|
"loss": 1.4334, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.392619353297704, |
|
"grad_norm": 3.3922691345214844, |
|
"learning_rate": 0.00019624454996885698, |
|
"loss": 1.3956, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.3941122025497865, |
|
"grad_norm": 4.340898036956787, |
|
"learning_rate": 0.00019623026417293815, |
|
"loss": 1.3643, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.39560505180186906, |
|
"grad_norm": 6.623823165893555, |
|
"learning_rate": 0.0001962159783770193, |
|
"loss": 1.362, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.3970979010539516, |
|
"grad_norm": 4.490639686584473, |
|
"learning_rate": 0.00019620169258110048, |
|
"loss": 1.3742, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.3985907503060341, |
|
"grad_norm": 4.179808139801025, |
|
"learning_rate": 0.00019618740678518164, |
|
"loss": 1.4191, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.4000835995581166, |
|
"grad_norm": 5.623187065124512, |
|
"learning_rate": 0.0001961731209892628, |
|
"loss": 1.3793, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.40157644881019916, |
|
"grad_norm": 3.9650678634643555, |
|
"learning_rate": 0.00019615883519334397, |
|
"loss": 1.3917, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.40306929806228164, |
|
"grad_norm": 5.047702312469482, |
|
"learning_rate": 0.00019614454939742516, |
|
"loss": 1.4501, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.4045621473143642, |
|
"grad_norm": 3.46647310256958, |
|
"learning_rate": 0.0001961302636015063, |
|
"loss": 1.4091, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.4060549965664467, |
|
"grad_norm": 3.2203481197357178, |
|
"learning_rate": 0.0001961159778055875, |
|
"loss": 1.4003, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.40754784581852926, |
|
"grad_norm": 3.727679967880249, |
|
"learning_rate": 0.00019610169200966863, |
|
"loss": 1.4004, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.40904069507061175, |
|
"grad_norm": 4.469257831573486, |
|
"learning_rate": 0.0001960874062137498, |
|
"loss": 1.4061, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.4105335443226943, |
|
"grad_norm": 4.041538715362549, |
|
"learning_rate": 0.00019607312041783096, |
|
"loss": 1.411, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.41202639357477683, |
|
"grad_norm": 5.2691779136657715, |
|
"learning_rate": 0.00019605883462191212, |
|
"loss": 1.3396, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.41351924282685937, |
|
"grad_norm": 6.236726760864258, |
|
"learning_rate": 0.00019604454882599331, |
|
"loss": 1.4295, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.41501209207894185, |
|
"grad_norm": 6.703745365142822, |
|
"learning_rate": 0.00019603026303007445, |
|
"loss": 1.4105, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.4165049413310244, |
|
"grad_norm": 3.997664451599121, |
|
"learning_rate": 0.00019601597723415564, |
|
"loss": 1.347, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.41799779058310693, |
|
"grad_norm": 5.311407566070557, |
|
"learning_rate": 0.00019600169143823678, |
|
"loss": 1.4546, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.4194906398351894, |
|
"grad_norm": 4.0283098220825195, |
|
"learning_rate": 0.00019598740564231797, |
|
"loss": 1.4513, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.42098348908727196, |
|
"grad_norm": 7.345764636993408, |
|
"learning_rate": 0.0001959731198463991, |
|
"loss": 1.3832, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.4224763383393545, |
|
"grad_norm": 4.324542045593262, |
|
"learning_rate": 0.0001959588340504803, |
|
"loss": 1.3751, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.42396918759143704, |
|
"grad_norm": 3.8322675228118896, |
|
"learning_rate": 0.00019594454825456147, |
|
"loss": 1.344, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.4254620368435195, |
|
"grad_norm": 4.62548303604126, |
|
"learning_rate": 0.00019593026245864263, |
|
"loss": 1.4346, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.42695488609560206, |
|
"grad_norm": 4.585489273071289, |
|
"learning_rate": 0.0001959159766627238, |
|
"loss": 1.4145, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.4284477353476846, |
|
"grad_norm": 3.64227557182312, |
|
"learning_rate": 0.00019590169086680496, |
|
"loss": 1.367, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.42994058459976714, |
|
"grad_norm": 4.730580806732178, |
|
"learning_rate": 0.00019588740507088613, |
|
"loss": 1.3653, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.4314334338518496, |
|
"grad_norm": 4.4075398445129395, |
|
"learning_rate": 0.0001958731192749673, |
|
"loss": 1.4081, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.43292628310393216, |
|
"grad_norm": 3.0498785972595215, |
|
"learning_rate": 0.00019585883347904845, |
|
"loss": 1.343, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.4344191323560147, |
|
"grad_norm": 4.179199695587158, |
|
"learning_rate": 0.00019584454768312962, |
|
"loss": 1.3662, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.4359119816080972, |
|
"grad_norm": 4.1148786544799805, |
|
"learning_rate": 0.00019583026188721078, |
|
"loss": 1.3785, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.4374048308601797, |
|
"grad_norm": 4.012060165405273, |
|
"learning_rate": 0.00019581597609129198, |
|
"loss": 1.4056, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.43889768011226227, |
|
"grad_norm": 7.186342716217041, |
|
"learning_rate": 0.0001958016902953731, |
|
"loss": 1.3943, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.4403905293643448, |
|
"grad_norm": 3.951267957687378, |
|
"learning_rate": 0.0001957874044994543, |
|
"loss": 1.3976, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.4418833786164273, |
|
"grad_norm": 5.276801586151123, |
|
"learning_rate": 0.00019577311870353544, |
|
"loss": 1.4137, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.44337622786850983, |
|
"grad_norm": 4.107429504394531, |
|
"learning_rate": 0.00019575883290761663, |
|
"loss": 1.388, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.44486907712059237, |
|
"grad_norm": 4.274941444396973, |
|
"learning_rate": 0.00019574454711169777, |
|
"loss": 1.3446, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.4463619263726749, |
|
"grad_norm": 4.174200534820557, |
|
"learning_rate": 0.00019573026131577896, |
|
"loss": 1.3703, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.4478547756247574, |
|
"grad_norm": 3.867125988006592, |
|
"learning_rate": 0.00019571597551986013, |
|
"loss": 1.3745, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.44934762487683994, |
|
"grad_norm": 6.454402923583984, |
|
"learning_rate": 0.0001957016897239413, |
|
"loss": 1.3081, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.4508404741289225, |
|
"grad_norm": 3.8726885318756104, |
|
"learning_rate": 0.00019568740392802246, |
|
"loss": 1.3527, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.45233332338100496, |
|
"grad_norm": 3.6218361854553223, |
|
"learning_rate": 0.00019567311813210362, |
|
"loss": 1.4549, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.4538261726330875, |
|
"grad_norm": 5.2475361824035645, |
|
"learning_rate": 0.0001956588323361848, |
|
"loss": 1.361, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.45531902188517004, |
|
"grad_norm": 4.388748645782471, |
|
"learning_rate": 0.00019564454654026595, |
|
"loss": 1.3164, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.4568118711372526, |
|
"grad_norm": 4.97973108291626, |
|
"learning_rate": 0.00019563026074434712, |
|
"loss": 1.3755, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.45830472038933506, |
|
"grad_norm": 4.538138389587402, |
|
"learning_rate": 0.00019561597494842828, |
|
"loss": 1.4339, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.4597975696414176, |
|
"grad_norm": 4.389719009399414, |
|
"learning_rate": 0.00019560168915250945, |
|
"loss": 1.4163, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.46129041889350014, |
|
"grad_norm": 4.347919464111328, |
|
"learning_rate": 0.00019558740335659064, |
|
"loss": 1.3663, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.4627832681455827, |
|
"grad_norm": 4.820595741271973, |
|
"learning_rate": 0.00019557311756067177, |
|
"loss": 1.3441, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.46427611739766517, |
|
"grad_norm": 2.6965413093566895, |
|
"learning_rate": 0.00019555883176475297, |
|
"loss": 1.3657, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.4657689666497477, |
|
"grad_norm": 4.741116523742676, |
|
"learning_rate": 0.0001955445459688341, |
|
"loss": 1.4414, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.46726181590183025, |
|
"grad_norm": 3.9512829780578613, |
|
"learning_rate": 0.0001955302601729153, |
|
"loss": 1.354, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.46875466515391273, |
|
"grad_norm": 7.704863548278809, |
|
"learning_rate": 0.00019551597437699646, |
|
"loss": 1.4107, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.47024751440599527, |
|
"grad_norm": 3.502988338470459, |
|
"learning_rate": 0.00019550168858107763, |
|
"loss": 1.415, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.4717403636580778, |
|
"grad_norm": 4.246065139770508, |
|
"learning_rate": 0.0001954874027851588, |
|
"loss": 1.3414, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.47323321291016035, |
|
"grad_norm": 3.4039735794067383, |
|
"learning_rate": 0.00019547311698923995, |
|
"loss": 1.3303, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.47472606216224283, |
|
"grad_norm": 3.279521942138672, |
|
"learning_rate": 0.00019545883119332112, |
|
"loss": 1.3447, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.4762189114143254, |
|
"grad_norm": 2.9335134029388428, |
|
"learning_rate": 0.00019544454539740228, |
|
"loss": 1.3998, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.4777117606664079, |
|
"grad_norm": 3.317011833190918, |
|
"learning_rate": 0.00019543025960148345, |
|
"loss": 1.3506, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.47920460991849045, |
|
"grad_norm": 3.5980935096740723, |
|
"learning_rate": 0.0001954159738055646, |
|
"loss": 1.3622, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.48069745917057294, |
|
"grad_norm": 4.726743698120117, |
|
"learning_rate": 0.00019540168800964578, |
|
"loss": 1.3414, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.4821903084226555, |
|
"grad_norm": 5.129758358001709, |
|
"learning_rate": 0.00019538740221372694, |
|
"loss": 1.3873, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.483683157674738, |
|
"grad_norm": 5.122271537780762, |
|
"learning_rate": 0.0001953731164178081, |
|
"loss": 1.428, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.4851760069268205, |
|
"grad_norm": 3.359868049621582, |
|
"learning_rate": 0.0001953588306218893, |
|
"loss": 1.3111, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.48666885617890304, |
|
"grad_norm": 5.066514492034912, |
|
"learning_rate": 0.00019534454482597044, |
|
"loss": 1.2961, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.4881617054309856, |
|
"grad_norm": 4.902595520019531, |
|
"learning_rate": 0.0001953302590300516, |
|
"loss": 1.3682, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.4896545546830681, |
|
"grad_norm": 5.0537028312683105, |
|
"learning_rate": 0.00019531597323413277, |
|
"loss": 1.315, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.4911474039351506, |
|
"grad_norm": 3.7002792358398438, |
|
"learning_rate": 0.00019530168743821393, |
|
"loss": 1.3441, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 0.49264025318723315, |
|
"grad_norm": 4.845950603485107, |
|
"learning_rate": 0.00019528740164229512, |
|
"loss": 1.3887, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.4941331024393157, |
|
"grad_norm": 4.933434963226318, |
|
"learning_rate": 0.00019527311584637626, |
|
"loss": 1.3865, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 0.4956259516913982, |
|
"grad_norm": 3.8103625774383545, |
|
"learning_rate": 0.00019525883005045745, |
|
"loss": 1.3757, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.4971188009434807, |
|
"grad_norm": 4.501999855041504, |
|
"learning_rate": 0.0001952445442545386, |
|
"loss": 1.4482, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.49861165019556325, |
|
"grad_norm": 5.600002765655518, |
|
"learning_rate": 0.00019523025845861978, |
|
"loss": 1.4209, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.5001044994476458, |
|
"grad_norm": 5.138682842254639, |
|
"learning_rate": 0.00019521597266270092, |
|
"loss": 1.4287, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.5015973486997283, |
|
"grad_norm": 5.575449466705322, |
|
"learning_rate": 0.0001952016868667821, |
|
"loss": 1.3992, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.5030901979518109, |
|
"grad_norm": 3.6443893909454346, |
|
"learning_rate": 0.00019518740107086327, |
|
"loss": 1.3661, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 0.5045830472038934, |
|
"grad_norm": 3.904905319213867, |
|
"learning_rate": 0.00019517311527494444, |
|
"loss": 1.4448, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.5060758964559758, |
|
"grad_norm": 4.380904197692871, |
|
"learning_rate": 0.0001951588294790256, |
|
"loss": 1.4076, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.5075687457080584, |
|
"grad_norm": 3.5924415588378906, |
|
"learning_rate": 0.00019514454368310677, |
|
"loss": 1.3833, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.5090615949601409, |
|
"grad_norm": 3.7007193565368652, |
|
"learning_rate": 0.00019513025788718793, |
|
"loss": 1.391, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.5105544442122234, |
|
"grad_norm": 3.9879095554351807, |
|
"learning_rate": 0.0001951159720912691, |
|
"loss": 1.3419, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.512047293464306, |
|
"grad_norm": 5.663998126983643, |
|
"learning_rate": 0.00019510168629535026, |
|
"loss": 1.3481, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 0.5135401427163885, |
|
"grad_norm": 3.9803707599639893, |
|
"learning_rate": 0.00019508740049943143, |
|
"loss": 1.4034, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.5150329919684711, |
|
"grad_norm": 3.718477725982666, |
|
"learning_rate": 0.0001950731147035126, |
|
"loss": 1.406, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.5165258412205536, |
|
"grad_norm": 4.864751815795898, |
|
"learning_rate": 0.00019505882890759378, |
|
"loss": 1.375, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.518018690472636, |
|
"grad_norm": 3.697645664215088, |
|
"learning_rate": 0.00019504454311167492, |
|
"loss": 1.4283, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 0.5195115397247186, |
|
"grad_norm": 4.063074111938477, |
|
"learning_rate": 0.0001950302573157561, |
|
"loss": 1.378, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.5210043889768011, |
|
"grad_norm": 4.223004341125488, |
|
"learning_rate": 0.00019501597151983725, |
|
"loss": 1.3789, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 0.5224972382288836, |
|
"grad_norm": 3.329366683959961, |
|
"learning_rate": 0.00019500168572391844, |
|
"loss": 1.346, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.5239900874809662, |
|
"grad_norm": 4.774710178375244, |
|
"learning_rate": 0.00019498739992799958, |
|
"loss": 1.3895, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.5254829367330487, |
|
"grad_norm": 6.2145490646362305, |
|
"learning_rate": 0.00019497311413208077, |
|
"loss": 1.3715, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.5269757859851312, |
|
"grad_norm": 3.9069626331329346, |
|
"learning_rate": 0.00019495882833616194, |
|
"loss": 1.4572, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 0.5284686352372138, |
|
"grad_norm": 3.347576141357422, |
|
"learning_rate": 0.0001949445425402431, |
|
"loss": 1.359, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.5299614844892963, |
|
"grad_norm": 5.305202484130859, |
|
"learning_rate": 0.00019493025674432427, |
|
"loss": 1.4038, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.5314543337413788, |
|
"grad_norm": 3.865619659423828, |
|
"learning_rate": 0.00019491597094840543, |
|
"loss": 1.3913, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.5329471829934613, |
|
"grad_norm": 4.791336536407471, |
|
"learning_rate": 0.0001949016851524866, |
|
"loss": 1.3852, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.5344400322455438, |
|
"grad_norm": 3.7827060222625732, |
|
"learning_rate": 0.00019488739935656776, |
|
"loss": 1.3108, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.5359328814976264, |
|
"grad_norm": 4.945117473602295, |
|
"learning_rate": 0.00019487311356064892, |
|
"loss": 1.346, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 0.5374257307497089, |
|
"grad_norm": 4.561169147491455, |
|
"learning_rate": 0.0001948588277647301, |
|
"loss": 1.3904, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.5389185800017914, |
|
"grad_norm": 4.608798027038574, |
|
"learning_rate": 0.00019484454196881125, |
|
"loss": 1.4133, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 0.540411429253874, |
|
"grad_norm": 4.303143501281738, |
|
"learning_rate": 0.00019483025617289245, |
|
"loss": 1.3994, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.5419042785059565, |
|
"grad_norm": 5.815835952758789, |
|
"learning_rate": 0.00019481597037697358, |
|
"loss": 1.392, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 0.543397127758039, |
|
"grad_norm": 5.349491596221924, |
|
"learning_rate": 0.00019480168458105477, |
|
"loss": 1.418, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.5448899770101215, |
|
"grad_norm": 3.7407824993133545, |
|
"learning_rate": 0.0001947873987851359, |
|
"loss": 1.3747, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 0.546382826262204, |
|
"grad_norm": 5.2810163497924805, |
|
"learning_rate": 0.0001947731129892171, |
|
"loss": 1.4023, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.5478756755142866, |
|
"grad_norm": 4.417948246002197, |
|
"learning_rate": 0.00019475882719329824, |
|
"loss": 1.4005, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 0.5493685247663691, |
|
"grad_norm": 5.287749290466309, |
|
"learning_rate": 0.00019474454139737943, |
|
"loss": 1.4211, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.5508613740184516, |
|
"grad_norm": 4.0996809005737305, |
|
"learning_rate": 0.0001947302556014606, |
|
"loss": 1.3559, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 0.5523542232705342, |
|
"grad_norm": 5.229327201843262, |
|
"learning_rate": 0.00019471596980554176, |
|
"loss": 1.4549, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.5538470725226167, |
|
"grad_norm": 4.409546852111816, |
|
"learning_rate": 0.00019470168400962293, |
|
"loss": 1.3683, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 0.5553399217746992, |
|
"grad_norm": 5.4077229499816895, |
|
"learning_rate": 0.0001946873982137041, |
|
"loss": 1.398, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.5568327710267817, |
|
"grad_norm": 5.208966255187988, |
|
"learning_rate": 0.00019467311241778526, |
|
"loss": 1.3276, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 0.5583256202788642, |
|
"grad_norm": 4.8162617683410645, |
|
"learning_rate": 0.00019465882662186642, |
|
"loss": 1.3314, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.5598184695309467, |
|
"grad_norm": 5.04697322845459, |
|
"learning_rate": 0.00019464454082594759, |
|
"loss": 1.3883, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.5613113187830293, |
|
"grad_norm": 4.038108825683594, |
|
"learning_rate": 0.00019463025503002875, |
|
"loss": 1.4377, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.5628041680351118, |
|
"grad_norm": 4.910576820373535, |
|
"learning_rate": 0.00019461596923410992, |
|
"loss": 1.3713, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 0.5642970172871944, |
|
"grad_norm": 5.3433756828308105, |
|
"learning_rate": 0.00019460168343819108, |
|
"loss": 1.4359, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.5657898665392769, |
|
"grad_norm": 3.9515552520751953, |
|
"learning_rate": 0.00019458739764227224, |
|
"loss": 1.3608, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 0.5672827157913594, |
|
"grad_norm": 4.515705108642578, |
|
"learning_rate": 0.0001945731118463534, |
|
"loss": 1.3274, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.568775565043442, |
|
"grad_norm": 4.434077262878418, |
|
"learning_rate": 0.00019455882605043457, |
|
"loss": 1.3681, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 0.5702684142955244, |
|
"grad_norm": 4.534008979797363, |
|
"learning_rate": 0.00019454454025451574, |
|
"loss": 1.3863, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.5717612635476069, |
|
"grad_norm": 4.200322151184082, |
|
"learning_rate": 0.00019453025445859693, |
|
"loss": 1.3743, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 0.5732541127996895, |
|
"grad_norm": 5.686845779418945, |
|
"learning_rate": 0.00019451596866267807, |
|
"loss": 1.3225, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.574746962051772, |
|
"grad_norm": 7.821211814880371, |
|
"learning_rate": 0.00019450168286675926, |
|
"loss": 1.3775, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 0.5762398113038545, |
|
"grad_norm": 5.200834274291992, |
|
"learning_rate": 0.0001944873970708404, |
|
"loss": 1.3423, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.5777326605559371, |
|
"grad_norm": 5.26302433013916, |
|
"learning_rate": 0.0001944731112749216, |
|
"loss": 1.3813, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 0.5792255098080196, |
|
"grad_norm": 3.3207719326019287, |
|
"learning_rate": 0.00019445882547900273, |
|
"loss": 1.3922, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.5807183590601022, |
|
"grad_norm": 4.619020938873291, |
|
"learning_rate": 0.00019444453968308392, |
|
"loss": 1.3533, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 0.5822112083121846, |
|
"grad_norm": 5.780002593994141, |
|
"learning_rate": 0.00019443025388716508, |
|
"loss": 1.4035, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.5837040575642671, |
|
"grad_norm": 4.961215496063232, |
|
"learning_rate": 0.00019441596809124625, |
|
"loss": 1.3687, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 0.5851969068163497, |
|
"grad_norm": 4.50115442276001, |
|
"learning_rate": 0.0001944016822953274, |
|
"loss": 1.342, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.5866897560684322, |
|
"grad_norm": 3.9477944374084473, |
|
"learning_rate": 0.00019438739649940858, |
|
"loss": 1.394, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 0.5881826053205147, |
|
"grad_norm": 3.7466814517974854, |
|
"learning_rate": 0.00019437311070348974, |
|
"loss": 1.3414, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.5896754545725973, |
|
"grad_norm": 4.382058143615723, |
|
"learning_rate": 0.0001943588249075709, |
|
"loss": 1.3669, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 0.5911683038246798, |
|
"grad_norm": 3.7016665935516357, |
|
"learning_rate": 0.00019434453911165207, |
|
"loss": 1.4548, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.5926611530767623, |
|
"grad_norm": 4.4738030433654785, |
|
"learning_rate": 0.00019433025331573324, |
|
"loss": 1.4273, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 0.5941540023288449, |
|
"grad_norm": 5.2445454597473145, |
|
"learning_rate": 0.0001943159675198144, |
|
"loss": 1.3746, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.5956468515809273, |
|
"grad_norm": 3.766219139099121, |
|
"learning_rate": 0.0001943016817238956, |
|
"loss": 1.4391, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 0.5971397008330099, |
|
"grad_norm": 6.310808181762695, |
|
"learning_rate": 0.00019428739592797673, |
|
"loss": 1.3316, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.5986325500850924, |
|
"grad_norm": 4.055521488189697, |
|
"learning_rate": 0.00019427311013205792, |
|
"loss": 1.3784, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 0.6001253993371749, |
|
"grad_norm": 4.933177471160889, |
|
"learning_rate": 0.00019425882433613906, |
|
"loss": 1.3352, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.6016182485892575, |
|
"grad_norm": 3.8867061138153076, |
|
"learning_rate": 0.00019424453854022025, |
|
"loss": 1.4169, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.60311109784134, |
|
"grad_norm": 3.364475727081299, |
|
"learning_rate": 0.0001942302527443014, |
|
"loss": 1.3767, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.6046039470934225, |
|
"grad_norm": 3.48152232170105, |
|
"learning_rate": 0.00019421596694838258, |
|
"loss": 1.3659, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 0.6060967963455051, |
|
"grad_norm": 3.3658649921417236, |
|
"learning_rate": 0.00019420168115246374, |
|
"loss": 1.3745, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.6075896455975875, |
|
"grad_norm": 4.441917896270752, |
|
"learning_rate": 0.0001941873953565449, |
|
"loss": 1.3827, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 0.60908249484967, |
|
"grad_norm": 5.014800548553467, |
|
"learning_rate": 0.00019417310956062607, |
|
"loss": 1.3792, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.6105753441017526, |
|
"grad_norm": 5.472316265106201, |
|
"learning_rate": 0.00019415882376470724, |
|
"loss": 1.394, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 0.6120681933538351, |
|
"grad_norm": 5.35073184967041, |
|
"learning_rate": 0.0001941445379687884, |
|
"loss": 1.401, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.6135610426059177, |
|
"grad_norm": 4.284445762634277, |
|
"learning_rate": 0.00019413025217286957, |
|
"loss": 1.4354, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 0.6150538918580002, |
|
"grad_norm": 3.561774492263794, |
|
"learning_rate": 0.00019411596637695073, |
|
"loss": 1.3295, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.6165467411100827, |
|
"grad_norm": 4.479186534881592, |
|
"learning_rate": 0.0001941016805810319, |
|
"loss": 1.3629, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 0.6180395903621653, |
|
"grad_norm": 4.186618804931641, |
|
"learning_rate": 0.00019408739478511306, |
|
"loss": 1.3399, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.6195324396142478, |
|
"grad_norm": 3.589655637741089, |
|
"learning_rate": 0.00019407310898919425, |
|
"loss": 1.3516, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 0.6210252888663302, |
|
"grad_norm": 4.330646514892578, |
|
"learning_rate": 0.0001940588231932754, |
|
"loss": 1.3148, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.6225181381184128, |
|
"grad_norm": 6.324933052062988, |
|
"learning_rate": 0.00019404453739735658, |
|
"loss": 1.331, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 0.6240109873704953, |
|
"grad_norm": 4.652800559997559, |
|
"learning_rate": 0.00019403025160143772, |
|
"loss": 1.3604, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.6255038366225778, |
|
"grad_norm": 4.831106662750244, |
|
"learning_rate": 0.0001940159658055189, |
|
"loss": 1.3322, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 0.6269966858746604, |
|
"grad_norm": 6.0938920974731445, |
|
"learning_rate": 0.00019400168000960005, |
|
"loss": 1.4106, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.6284895351267429, |
|
"grad_norm": 4.424108028411865, |
|
"learning_rate": 0.00019398739421368124, |
|
"loss": 1.4001, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 0.6299823843788255, |
|
"grad_norm": 4.329803466796875, |
|
"learning_rate": 0.0001939731084177624, |
|
"loss": 1.4048, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.631475233630908, |
|
"grad_norm": 3.915818929672241, |
|
"learning_rate": 0.00019395882262184357, |
|
"loss": 1.3619, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 0.6329680828829904, |
|
"grad_norm": 3.9562571048736572, |
|
"learning_rate": 0.00019394453682592474, |
|
"loss": 1.3636, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.634460932135073, |
|
"grad_norm": 5.229249954223633, |
|
"learning_rate": 0.0001939302510300059, |
|
"loss": 1.3502, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 0.6359537813871555, |
|
"grad_norm": 4.149145126342773, |
|
"learning_rate": 0.00019391596523408706, |
|
"loss": 1.3603, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.637446630639238, |
|
"grad_norm": 4.068868637084961, |
|
"learning_rate": 0.00019390167943816823, |
|
"loss": 1.3945, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 0.6389394798913206, |
|
"grad_norm": 3.6808931827545166, |
|
"learning_rate": 0.0001938873936422494, |
|
"loss": 1.4015, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.6404323291434031, |
|
"grad_norm": 4.391795635223389, |
|
"learning_rate": 0.00019387310784633056, |
|
"loss": 1.3428, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 0.6419251783954856, |
|
"grad_norm": 5.109655380249023, |
|
"learning_rate": 0.00019385882205041172, |
|
"loss": 1.3874, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.6434180276475682, |
|
"grad_norm": 4.560513496398926, |
|
"learning_rate": 0.0001938445362544929, |
|
"loss": 1.3833, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 0.6449108768996507, |
|
"grad_norm": 3.6108620166778564, |
|
"learning_rate": 0.00019383025045857405, |
|
"loss": 1.3981, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.6464037261517332, |
|
"grad_norm": 4.100367546081543, |
|
"learning_rate": 0.00019381596466265522, |
|
"loss": 1.3139, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 0.6478965754038157, |
|
"grad_norm": 3.1513540744781494, |
|
"learning_rate": 0.00019380167886673638, |
|
"loss": 1.3359, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.6493894246558982, |
|
"grad_norm": 4.793807506561279, |
|
"learning_rate": 0.00019378739307081755, |
|
"loss": 1.4025, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 0.6508822739079808, |
|
"grad_norm": 4.403114318847656, |
|
"learning_rate": 0.0001937731072748987, |
|
"loss": 1.349, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.6523751231600633, |
|
"grad_norm": 4.445423603057861, |
|
"learning_rate": 0.00019375882147897988, |
|
"loss": 1.4641, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 0.6538679724121458, |
|
"grad_norm": 5.721547603607178, |
|
"learning_rate": 0.00019374453568306107, |
|
"loss": 1.3451, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.6553608216642284, |
|
"grad_norm": 4.411465644836426, |
|
"learning_rate": 0.0001937302498871422, |
|
"loss": 1.3893, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 0.6568536709163109, |
|
"grad_norm": 3.181412696838379, |
|
"learning_rate": 0.0001937159640912234, |
|
"loss": 1.385, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.6583465201683933, |
|
"grad_norm": 7.374556541442871, |
|
"learning_rate": 0.00019370167829530453, |
|
"loss": 1.4051, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 0.6598393694204759, |
|
"grad_norm": 4.452084064483643, |
|
"learning_rate": 0.00019368739249938573, |
|
"loss": 1.3389, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.6613322186725584, |
|
"grad_norm": 5.60634708404541, |
|
"learning_rate": 0.0001936731067034669, |
|
"loss": 1.3924, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 0.662825067924641, |
|
"grad_norm": 4.35741662979126, |
|
"learning_rate": 0.00019365882090754806, |
|
"loss": 1.3957, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.6643179171767235, |
|
"grad_norm": 4.173916816711426, |
|
"learning_rate": 0.00019364453511162922, |
|
"loss": 1.3669, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 0.665810766428806, |
|
"grad_norm": 3.8707377910614014, |
|
"learning_rate": 0.00019363024931571038, |
|
"loss": 1.3478, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.6673036156808886, |
|
"grad_norm": 4.765937328338623, |
|
"learning_rate": 0.00019361596351979155, |
|
"loss": 1.4007, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 0.6687964649329711, |
|
"grad_norm": 4.315809726715088, |
|
"learning_rate": 0.00019360167772387271, |
|
"loss": 1.3622, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.6702893141850536, |
|
"grad_norm": 5.977219104766846, |
|
"learning_rate": 0.00019358739192795388, |
|
"loss": 1.3836, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 0.6717821634371361, |
|
"grad_norm": 4.420149803161621, |
|
"learning_rate": 0.00019357310613203504, |
|
"loss": 1.3488, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.6732750126892186, |
|
"grad_norm": 3.9537293910980225, |
|
"learning_rate": 0.0001935588203361162, |
|
"loss": 1.3892, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 0.6747678619413011, |
|
"grad_norm": 5.468355655670166, |
|
"learning_rate": 0.0001935445345401974, |
|
"loss": 1.4475, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.6762607111933837, |
|
"grad_norm": 4.3148674964904785, |
|
"learning_rate": 0.00019353024874427854, |
|
"loss": 1.3866, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 0.6777535604454662, |
|
"grad_norm": 4.004809379577637, |
|
"learning_rate": 0.00019351596294835973, |
|
"loss": 1.3811, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.6792464096975488, |
|
"grad_norm": 4.404988765716553, |
|
"learning_rate": 0.00019350167715244087, |
|
"loss": 1.3648, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 0.6807392589496313, |
|
"grad_norm": 5.115052223205566, |
|
"learning_rate": 0.00019348739135652206, |
|
"loss": 1.3523, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.6822321082017138, |
|
"grad_norm": 5.083719730377197, |
|
"learning_rate": 0.0001934731055606032, |
|
"loss": 1.3978, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 0.6837249574537964, |
|
"grad_norm": 4.038282871246338, |
|
"learning_rate": 0.0001934588197646844, |
|
"loss": 1.2788, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.6852178067058788, |
|
"grad_norm": 5.438407897949219, |
|
"learning_rate": 0.00019344453396876555, |
|
"loss": 1.4181, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 0.6867106559579613, |
|
"grad_norm": 5.381191730499268, |
|
"learning_rate": 0.00019343024817284672, |
|
"loss": 1.4145, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.6882035052100439, |
|
"grad_norm": 3.183706283569336, |
|
"learning_rate": 0.00019341596237692788, |
|
"loss": 1.3271, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 0.6896963544621264, |
|
"grad_norm": 4.063404083251953, |
|
"learning_rate": 0.00019340167658100905, |
|
"loss": 1.3717, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.6911892037142089, |
|
"grad_norm": 5.677481651306152, |
|
"learning_rate": 0.0001933873907850902, |
|
"loss": 1.344, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 0.6926820529662915, |
|
"grad_norm": 5.376470565795898, |
|
"learning_rate": 0.00019337310498917138, |
|
"loss": 1.295, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.694174902218374, |
|
"grad_norm": 3.8844990730285645, |
|
"learning_rate": 0.00019335881919325254, |
|
"loss": 1.381, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.6956677514704566, |
|
"grad_norm": 5.285017013549805, |
|
"learning_rate": 0.0001933445333973337, |
|
"loss": 1.3505, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.697160600722539, |
|
"grad_norm": 4.133642196655273, |
|
"learning_rate": 0.00019333024760141487, |
|
"loss": 1.3604, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 0.6986534499746215, |
|
"grad_norm": 5.037447929382324, |
|
"learning_rate": 0.00019331596180549606, |
|
"loss": 1.3209, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.7001462992267041, |
|
"grad_norm": 4.916257858276367, |
|
"learning_rate": 0.0001933016760095772, |
|
"loss": 1.4084, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 0.7016391484787866, |
|
"grad_norm": 3.469505786895752, |
|
"learning_rate": 0.0001932873902136584, |
|
"loss": 1.3705, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.7031319977308691, |
|
"grad_norm": 3.624896287918091, |
|
"learning_rate": 0.00019327310441773953, |
|
"loss": 1.3507, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 0.7046248469829517, |
|
"grad_norm": 4.352174758911133, |
|
"learning_rate": 0.00019325881862182072, |
|
"loss": 1.3184, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.7061176962350342, |
|
"grad_norm": 6.4549150466918945, |
|
"learning_rate": 0.00019324453282590186, |
|
"loss": 1.4161, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 0.7076105454871167, |
|
"grad_norm": 6.221735000610352, |
|
"learning_rate": 0.00019323024702998305, |
|
"loss": 1.3425, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.7091033947391993, |
|
"grad_norm": 4.285811901092529, |
|
"learning_rate": 0.00019321596123406421, |
|
"loss": 1.3418, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 0.7105962439912817, |
|
"grad_norm": 4.533527374267578, |
|
"learning_rate": 0.00019320167543814538, |
|
"loss": 1.3723, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.7120890932433643, |
|
"grad_norm": 3.838109016418457, |
|
"learning_rate": 0.00019318738964222654, |
|
"loss": 1.3624, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 0.7135819424954468, |
|
"grad_norm": 3.8401126861572266, |
|
"learning_rate": 0.0001931731038463077, |
|
"loss": 1.4239, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.7150747917475293, |
|
"grad_norm": 5.9049787521362305, |
|
"learning_rate": 0.00019315881805038887, |
|
"loss": 1.3642, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 0.7165676409996119, |
|
"grad_norm": 5.397033214569092, |
|
"learning_rate": 0.00019314453225447004, |
|
"loss": 1.3298, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.7180604902516944, |
|
"grad_norm": 5.649387359619141, |
|
"learning_rate": 0.0001931302464585512, |
|
"loss": 1.4102, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 0.7195533395037769, |
|
"grad_norm": 5.697938919067383, |
|
"learning_rate": 0.00019311596066263237, |
|
"loss": 1.3765, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.7210461887558595, |
|
"grad_norm": 4.244998455047607, |
|
"learning_rate": 0.00019310167486671353, |
|
"loss": 1.3471, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 0.722539038007942, |
|
"grad_norm": 4.579226493835449, |
|
"learning_rate": 0.0001930873890707947, |
|
"loss": 1.3897, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.7240318872600244, |
|
"grad_norm": 8.80657958984375, |
|
"learning_rate": 0.00019307310327487586, |
|
"loss": 1.3628, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 0.725524736512107, |
|
"grad_norm": 5.42709493637085, |
|
"learning_rate": 0.00019305881747895703, |
|
"loss": 1.3587, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.7270175857641895, |
|
"grad_norm": 4.2680840492248535, |
|
"learning_rate": 0.0001930445316830382, |
|
"loss": 1.3414, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 0.7285104350162721, |
|
"grad_norm": 5.641107559204102, |
|
"learning_rate": 0.00019303024588711935, |
|
"loss": 1.4095, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.7300032842683546, |
|
"grad_norm": 5.29530143737793, |
|
"learning_rate": 0.00019301596009120052, |
|
"loss": 1.389, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 0.7314961335204371, |
|
"grad_norm": 4.110136032104492, |
|
"learning_rate": 0.00019300167429528168, |
|
"loss": 1.4641, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.7329889827725197, |
|
"grad_norm": 4.390302658081055, |
|
"learning_rate": 0.00019298738849936288, |
|
"loss": 1.4028, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 0.7344818320246022, |
|
"grad_norm": 3.717409372329712, |
|
"learning_rate": 0.000192973102703444, |
|
"loss": 1.436, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.7359746812766846, |
|
"grad_norm": 5.399808406829834, |
|
"learning_rate": 0.0001929588169075252, |
|
"loss": 1.2887, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 0.7374675305287672, |
|
"grad_norm": 5.027936935424805, |
|
"learning_rate": 0.00019294453111160634, |
|
"loss": 1.3591, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.7389603797808497, |
|
"grad_norm": 5.12235689163208, |
|
"learning_rate": 0.00019293024531568753, |
|
"loss": 1.4305, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 0.7404532290329322, |
|
"grad_norm": 4.445746898651123, |
|
"learning_rate": 0.0001929159595197687, |
|
"loss": 1.3609, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.7419460782850148, |
|
"grad_norm": 4.590288162231445, |
|
"learning_rate": 0.00019290167372384986, |
|
"loss": 1.3811, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 0.7434389275370973, |
|
"grad_norm": 3.841573476791382, |
|
"learning_rate": 0.00019288738792793103, |
|
"loss": 1.3632, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.7449317767891799, |
|
"grad_norm": 5.2155327796936035, |
|
"learning_rate": 0.0001928731021320122, |
|
"loss": 1.4422, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 0.7464246260412624, |
|
"grad_norm": 4.942720413208008, |
|
"learning_rate": 0.00019285881633609336, |
|
"loss": 1.3916, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.7479174752933448, |
|
"grad_norm": 5.292357921600342, |
|
"learning_rate": 0.00019284453054017452, |
|
"loss": 1.3048, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 0.7494103245454274, |
|
"grad_norm": 6.690375804901123, |
|
"learning_rate": 0.0001928302447442557, |
|
"loss": 1.3967, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.7509031737975099, |
|
"grad_norm": 4.590394496917725, |
|
"learning_rate": 0.00019281595894833685, |
|
"loss": 1.3925, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 0.7523960230495924, |
|
"grad_norm": 4.220889091491699, |
|
"learning_rate": 0.00019280167315241802, |
|
"loss": 1.3263, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.753888872301675, |
|
"grad_norm": 4.847371578216553, |
|
"learning_rate": 0.00019278738735649918, |
|
"loss": 1.4141, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 0.7553817215537575, |
|
"grad_norm": 5.392793655395508, |
|
"learning_rate": 0.00019277310156058035, |
|
"loss": 1.3147, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.75687457080584, |
|
"grad_norm": 4.261468887329102, |
|
"learning_rate": 0.00019275881576466154, |
|
"loss": 1.3102, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 0.7583674200579226, |
|
"grad_norm": 4.498802661895752, |
|
"learning_rate": 0.00019274452996874267, |
|
"loss": 1.3912, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.759860269310005, |
|
"grad_norm": 5.2235283851623535, |
|
"learning_rate": 0.00019273024417282387, |
|
"loss": 1.3375, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 0.7613531185620876, |
|
"grad_norm": 6.409016132354736, |
|
"learning_rate": 0.000192715958376905, |
|
"loss": 1.3339, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.7628459678141701, |
|
"grad_norm": 4.4392805099487305, |
|
"learning_rate": 0.0001927016725809862, |
|
"loss": 1.3599, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 0.7643388170662526, |
|
"grad_norm": 5.580776691436768, |
|
"learning_rate": 0.00019268738678506736, |
|
"loss": 1.3812, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.7658316663183352, |
|
"grad_norm": 5.473046779632568, |
|
"learning_rate": 0.00019267310098914852, |
|
"loss": 1.3582, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 0.7673245155704177, |
|
"grad_norm": 6.353076934814453, |
|
"learning_rate": 0.0001926588151932297, |
|
"loss": 1.3388, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.7688173648225002, |
|
"grad_norm": 4.247453689575195, |
|
"learning_rate": 0.00019264452939731085, |
|
"loss": 1.3745, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 0.7703102140745828, |
|
"grad_norm": 5.048892498016357, |
|
"learning_rate": 0.00019263024360139202, |
|
"loss": 1.405, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.7718030633266653, |
|
"grad_norm": 4.883440017700195, |
|
"learning_rate": 0.00019261595780547318, |
|
"loss": 1.4082, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 0.7732959125787477, |
|
"grad_norm": 4.221151828765869, |
|
"learning_rate": 0.00019260167200955435, |
|
"loss": 1.3766, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.7747887618308303, |
|
"grad_norm": 8.68738079071045, |
|
"learning_rate": 0.0001925873862136355, |
|
"loss": 1.4046, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 0.7762816110829128, |
|
"grad_norm": 4.479017734527588, |
|
"learning_rate": 0.00019257310041771668, |
|
"loss": 1.3452, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.7777744603349954, |
|
"grad_norm": 4.299473285675049, |
|
"learning_rate": 0.00019255881462179787, |
|
"loss": 1.3702, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 0.7792673095870779, |
|
"grad_norm": 5.272589683532715, |
|
"learning_rate": 0.000192544528825879, |
|
"loss": 1.3725, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.7807601588391604, |
|
"grad_norm": 4.644061088562012, |
|
"learning_rate": 0.0001925302430299602, |
|
"loss": 1.4143, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 0.782253008091243, |
|
"grad_norm": 4.7033185958862305, |
|
"learning_rate": 0.00019251595723404134, |
|
"loss": 1.3666, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.7837458573433255, |
|
"grad_norm": 3.5262560844421387, |
|
"learning_rate": 0.00019250167143812253, |
|
"loss": 1.4135, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 0.785238706595408, |
|
"grad_norm": 3.8599159717559814, |
|
"learning_rate": 0.00019248738564220367, |
|
"loss": 1.3258, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.7867315558474905, |
|
"grad_norm": 5.743364334106445, |
|
"learning_rate": 0.00019247309984628486, |
|
"loss": 1.3946, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.788224405099573, |
|
"grad_norm": 5.478078365325928, |
|
"learning_rate": 0.00019245881405036602, |
|
"loss": 1.3881, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.7897172543516555, |
|
"grad_norm": 5.912649631500244, |
|
"learning_rate": 0.0001924445282544472, |
|
"loss": 1.4139, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 0.7912101036037381, |
|
"grad_norm": 3.753570079803467, |
|
"learning_rate": 0.00019243024245852835, |
|
"loss": 1.3267, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.7927029528558206, |
|
"grad_norm": 4.155190944671631, |
|
"learning_rate": 0.00019241595666260952, |
|
"loss": 1.3539, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 0.7941958021079032, |
|
"grad_norm": 4.314638614654541, |
|
"learning_rate": 0.00019240167086669068, |
|
"loss": 1.3635, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.7956886513599857, |
|
"grad_norm": 4.177329063415527, |
|
"learning_rate": 0.00019238738507077185, |
|
"loss": 1.3642, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 0.7971815006120682, |
|
"grad_norm": 4.3052144050598145, |
|
"learning_rate": 0.000192373099274853, |
|
"loss": 1.3011, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.7986743498641508, |
|
"grad_norm": 5.800657272338867, |
|
"learning_rate": 0.00019235881347893417, |
|
"loss": 1.3247, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 0.8001671991162332, |
|
"grad_norm": 4.211179733276367, |
|
"learning_rate": 0.00019234452768301534, |
|
"loss": 1.3399, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.8016600483683157, |
|
"grad_norm": 4.8080315589904785, |
|
"learning_rate": 0.0001923302418870965, |
|
"loss": 1.3439, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 0.8031528976203983, |
|
"grad_norm": 5.03045129776001, |
|
"learning_rate": 0.00019231595609117767, |
|
"loss": 1.4009, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.8046457468724808, |
|
"grad_norm": 8.932162284851074, |
|
"learning_rate": 0.00019230167029525883, |
|
"loss": 1.3298, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 0.8061385961245633, |
|
"grad_norm": 5.651429176330566, |
|
"learning_rate": 0.00019228738449934, |
|
"loss": 1.4044, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.8076314453766459, |
|
"grad_norm": 4.428074836730957, |
|
"learning_rate": 0.00019227309870342116, |
|
"loss": 1.3988, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 0.8091242946287284, |
|
"grad_norm": 6.512781620025635, |
|
"learning_rate": 0.00019225881290750233, |
|
"loss": 1.3529, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.810617143880811, |
|
"grad_norm": 5.393406867980957, |
|
"learning_rate": 0.0001922445271115835, |
|
"loss": 1.3266, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 0.8121099931328934, |
|
"grad_norm": 3.74702525138855, |
|
"learning_rate": 0.00019223024131566468, |
|
"loss": 1.416, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.8136028423849759, |
|
"grad_norm": 3.856290817260742, |
|
"learning_rate": 0.00019221595551974582, |
|
"loss": 1.4082, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 0.8150956916370585, |
|
"grad_norm": 4.597848415374756, |
|
"learning_rate": 0.000192201669723827, |
|
"loss": 1.3751, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.816588540889141, |
|
"grad_norm": 3.875593662261963, |
|
"learning_rate": 0.00019218738392790815, |
|
"loss": 1.4024, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 0.8180813901412235, |
|
"grad_norm": 5.470495700836182, |
|
"learning_rate": 0.00019217309813198934, |
|
"loss": 1.394, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.8195742393933061, |
|
"grad_norm": 4.562834739685059, |
|
"learning_rate": 0.00019215881233607048, |
|
"loss": 1.3161, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 0.8210670886453886, |
|
"grad_norm": 4.183352470397949, |
|
"learning_rate": 0.00019214452654015167, |
|
"loss": 1.3427, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.8225599378974711, |
|
"grad_norm": 5.345236301422119, |
|
"learning_rate": 0.00019213024074423284, |
|
"loss": 1.3551, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 0.8240527871495537, |
|
"grad_norm": 5.217405319213867, |
|
"learning_rate": 0.000192115954948314, |
|
"loss": 1.3174, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.8255456364016361, |
|
"grad_norm": 4.486965179443359, |
|
"learning_rate": 0.00019210166915239517, |
|
"loss": 1.3931, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 0.8270384856537187, |
|
"grad_norm": 4.183075904846191, |
|
"learning_rate": 0.00019208738335647633, |
|
"loss": 1.3672, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.8285313349058012, |
|
"grad_norm": 4.916398525238037, |
|
"learning_rate": 0.0001920730975605575, |
|
"loss": 1.3993, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 0.8300241841578837, |
|
"grad_norm": 3.99765682220459, |
|
"learning_rate": 0.00019205881176463866, |
|
"loss": 1.362, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.8315170334099663, |
|
"grad_norm": 3.948981523513794, |
|
"learning_rate": 0.00019204452596871982, |
|
"loss": 1.3912, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 0.8330098826620488, |
|
"grad_norm": 6.524683475494385, |
|
"learning_rate": 0.000192030240172801, |
|
"loss": 1.3845, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.8345027319141313, |
|
"grad_norm": 4.598319053649902, |
|
"learning_rate": 0.00019201595437688215, |
|
"loss": 1.38, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 0.8359955811662139, |
|
"grad_norm": 4.287365436553955, |
|
"learning_rate": 0.00019200166858096335, |
|
"loss": 1.3687, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.8374884304182963, |
|
"grad_norm": 5.497170448303223, |
|
"learning_rate": 0.00019198738278504448, |
|
"loss": 1.4324, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 0.8389812796703788, |
|
"grad_norm": 3.9373042583465576, |
|
"learning_rate": 0.00019197309698912567, |
|
"loss": 1.3892, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.8404741289224614, |
|
"grad_norm": 5.157922267913818, |
|
"learning_rate": 0.0001919588111932068, |
|
"loss": 1.3764, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 0.8419669781745439, |
|
"grad_norm": 4.0024895668029785, |
|
"learning_rate": 0.000191944525397288, |
|
"loss": 1.3571, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.8434598274266265, |
|
"grad_norm": 4.894957542419434, |
|
"learning_rate": 0.00019193023960136917, |
|
"loss": 1.3997, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 0.844952676678709, |
|
"grad_norm": 4.829247951507568, |
|
"learning_rate": 0.00019191595380545033, |
|
"loss": 1.3751, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.8464455259307915, |
|
"grad_norm": 4.123593330383301, |
|
"learning_rate": 0.0001919016680095315, |
|
"loss": 1.4183, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 0.8479383751828741, |
|
"grad_norm": 4.012912273406982, |
|
"learning_rate": 0.00019188738221361266, |
|
"loss": 1.3757, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.8494312244349566, |
|
"grad_norm": 5.17979097366333, |
|
"learning_rate": 0.00019187309641769383, |
|
"loss": 1.3975, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 0.850924073687039, |
|
"grad_norm": 4.665642261505127, |
|
"learning_rate": 0.000191858810621775, |
|
"loss": 1.4539, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.8524169229391216, |
|
"grad_norm": 4.333798408508301, |
|
"learning_rate": 0.00019184452482585616, |
|
"loss": 1.3711, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 0.8539097721912041, |
|
"grad_norm": 4.485865116119385, |
|
"learning_rate": 0.00019183023902993732, |
|
"loss": 1.4299, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.8554026214432866, |
|
"grad_norm": 6.3318328857421875, |
|
"learning_rate": 0.00019181595323401849, |
|
"loss": 1.3621, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 0.8568954706953692, |
|
"grad_norm": 3.915356159210205, |
|
"learning_rate": 0.00019180166743809965, |
|
"loss": 1.4561, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.8583883199474517, |
|
"grad_norm": 4.613978862762451, |
|
"learning_rate": 0.00019178738164218081, |
|
"loss": 1.3267, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 0.8598811691995343, |
|
"grad_norm": 4.935427188873291, |
|
"learning_rate": 0.000191773095846262, |
|
"loss": 1.3962, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.8613740184516168, |
|
"grad_norm": 4.204285144805908, |
|
"learning_rate": 0.00019175881005034314, |
|
"loss": 1.3644, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 0.8628668677036992, |
|
"grad_norm": 3.901266098022461, |
|
"learning_rate": 0.00019174452425442434, |
|
"loss": 1.344, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.8643597169557818, |
|
"grad_norm": 5.1740922927856445, |
|
"learning_rate": 0.00019173023845850547, |
|
"loss": 1.3852, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 0.8658525662078643, |
|
"grad_norm": 4.984529972076416, |
|
"learning_rate": 0.00019171595266258667, |
|
"loss": 1.3853, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.8673454154599468, |
|
"grad_norm": 4.719232082366943, |
|
"learning_rate": 0.00019170166686666783, |
|
"loss": 1.3962, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 0.8688382647120294, |
|
"grad_norm": 6.084499359130859, |
|
"learning_rate": 0.000191687381070749, |
|
"loss": 1.3377, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.8703311139641119, |
|
"grad_norm": 5.261173248291016, |
|
"learning_rate": 0.00019167309527483016, |
|
"loss": 1.3673, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 0.8718239632161944, |
|
"grad_norm": 5.18539571762085, |
|
"learning_rate": 0.00019165880947891132, |
|
"loss": 1.358, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.873316812468277, |
|
"grad_norm": 4.789880275726318, |
|
"learning_rate": 0.0001916445236829925, |
|
"loss": 1.3426, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 0.8748096617203595, |
|
"grad_norm": 3.2945287227630615, |
|
"learning_rate": 0.00019163023788707365, |
|
"loss": 1.3308, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.876302510972442, |
|
"grad_norm": 5.060371398925781, |
|
"learning_rate": 0.00019161595209115482, |
|
"loss": 1.3497, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 0.8777953602245245, |
|
"grad_norm": 4.771635055541992, |
|
"learning_rate": 0.00019160166629523598, |
|
"loss": 1.3517, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.879288209476607, |
|
"grad_norm": 4.408042907714844, |
|
"learning_rate": 0.00019158738049931715, |
|
"loss": 1.3636, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 0.8807810587286896, |
|
"grad_norm": 5.183899879455566, |
|
"learning_rate": 0.0001915730947033983, |
|
"loss": 1.3569, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.8822739079807721, |
|
"grad_norm": 4.176016807556152, |
|
"learning_rate": 0.00019155880890747948, |
|
"loss": 1.3659, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 0.8837667572328546, |
|
"grad_norm": 4.783830165863037, |
|
"learning_rate": 0.00019154452311156064, |
|
"loss": 1.368, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.8852596064849372, |
|
"grad_norm": 3.7985739707946777, |
|
"learning_rate": 0.0001915302373156418, |
|
"loss": 1.4091, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 0.8867524557370197, |
|
"grad_norm": 4.401581287384033, |
|
"learning_rate": 0.00019151595151972297, |
|
"loss": 1.3613, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.8882453049891021, |
|
"grad_norm": 6.214754581451416, |
|
"learning_rate": 0.00019150166572380414, |
|
"loss": 1.4077, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 0.8897381542411847, |
|
"grad_norm": 4.197826862335205, |
|
"learning_rate": 0.0001914873799278853, |
|
"loss": 1.347, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.8912310034932672, |
|
"grad_norm": 3.1535027027130127, |
|
"learning_rate": 0.0001914730941319665, |
|
"loss": 1.3452, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 0.8927238527453498, |
|
"grad_norm": 4.618538856506348, |
|
"learning_rate": 0.00019145880833604763, |
|
"loss": 1.3417, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.8942167019974323, |
|
"grad_norm": 3.913506031036377, |
|
"learning_rate": 0.00019144452254012882, |
|
"loss": 1.3503, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 0.8957095512495148, |
|
"grad_norm": 4.013761043548584, |
|
"learning_rate": 0.00019143023674420996, |
|
"loss": 1.4109, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.8972024005015974, |
|
"grad_norm": 4.6257758140563965, |
|
"learning_rate": 0.00019141595094829115, |
|
"loss": 1.3679, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 0.8986952497536799, |
|
"grad_norm": 4.422520637512207, |
|
"learning_rate": 0.0001914016651523723, |
|
"loss": 1.3893, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.9001880990057624, |
|
"grad_norm": 4.911538124084473, |
|
"learning_rate": 0.00019138737935645348, |
|
"loss": 1.3346, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 0.901680948257845, |
|
"grad_norm": 6.410045623779297, |
|
"learning_rate": 0.00019137309356053464, |
|
"loss": 1.3651, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.9031737975099274, |
|
"grad_norm": 3.8565444946289062, |
|
"learning_rate": 0.0001913588077646158, |
|
"loss": 1.3436, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 0.9046666467620099, |
|
"grad_norm": 4.632288455963135, |
|
"learning_rate": 0.00019134452196869697, |
|
"loss": 1.3858, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.9061594960140925, |
|
"grad_norm": 5.382070064544678, |
|
"learning_rate": 0.00019133023617277814, |
|
"loss": 1.3872, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 0.907652345266175, |
|
"grad_norm": 4.425014019012451, |
|
"learning_rate": 0.0001913159503768593, |
|
"loss": 1.4488, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.9091451945182576, |
|
"grad_norm": 3.7266223430633545, |
|
"learning_rate": 0.00019130166458094047, |
|
"loss": 1.376, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 0.9106380437703401, |
|
"grad_norm": 7.25547981262207, |
|
"learning_rate": 0.00019128737878502163, |
|
"loss": 1.3488, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.9121308930224226, |
|
"grad_norm": 5.824578285217285, |
|
"learning_rate": 0.0001912730929891028, |
|
"loss": 1.4001, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 0.9136237422745052, |
|
"grad_norm": 5.808544635772705, |
|
"learning_rate": 0.00019125880719318396, |
|
"loss": 1.3132, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.9151165915265876, |
|
"grad_norm": 4.343785285949707, |
|
"learning_rate": 0.00019124452139726515, |
|
"loss": 1.3592, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 0.9166094407786701, |
|
"grad_norm": 5.0626325607299805, |
|
"learning_rate": 0.0001912302356013463, |
|
"loss": 1.4418, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.9181022900307527, |
|
"grad_norm": 4.336055755615234, |
|
"learning_rate": 0.00019121594980542748, |
|
"loss": 1.3624, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 0.9195951392828352, |
|
"grad_norm": 6.215260982513428, |
|
"learning_rate": 0.00019120166400950862, |
|
"loss": 1.4053, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.9210879885349177, |
|
"grad_norm": 4.496364116668701, |
|
"learning_rate": 0.0001911873782135898, |
|
"loss": 1.4117, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 0.9225808377870003, |
|
"grad_norm": 4.023138046264648, |
|
"learning_rate": 0.00019117309241767095, |
|
"loss": 1.3783, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.9240736870390828, |
|
"grad_norm": 3.8177783489227295, |
|
"learning_rate": 0.00019115880662175214, |
|
"loss": 1.3127, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 0.9255665362911654, |
|
"grad_norm": 3.893087387084961, |
|
"learning_rate": 0.0001911445208258333, |
|
"loss": 1.3359, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.9270593855432478, |
|
"grad_norm": 3.9967384338378906, |
|
"learning_rate": 0.00019113023502991447, |
|
"loss": 1.3897, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 0.9285522347953303, |
|
"grad_norm": 3.2351789474487305, |
|
"learning_rate": 0.00019111594923399563, |
|
"loss": 1.3406, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.9300450840474129, |
|
"grad_norm": 4.571500778198242, |
|
"learning_rate": 0.0001911016634380768, |
|
"loss": 1.3783, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 0.9315379332994954, |
|
"grad_norm": 6.241518020629883, |
|
"learning_rate": 0.00019108737764215796, |
|
"loss": 1.3659, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.9330307825515779, |
|
"grad_norm": 3.2547826766967773, |
|
"learning_rate": 0.00019107309184623913, |
|
"loss": 1.3137, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 0.9345236318036605, |
|
"grad_norm": 5.386812686920166, |
|
"learning_rate": 0.0001910588060503203, |
|
"loss": 1.3974, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.936016481055743, |
|
"grad_norm": 4.239005088806152, |
|
"learning_rate": 0.00019104452025440146, |
|
"loss": 1.3789, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 0.9375093303078255, |
|
"grad_norm": 3.7252326011657715, |
|
"learning_rate": 0.00019103023445848262, |
|
"loss": 1.3622, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.9390021795599081, |
|
"grad_norm": 5.890969276428223, |
|
"learning_rate": 0.00019101594866256381, |
|
"loss": 1.3905, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 0.9404950288119905, |
|
"grad_norm": 5.796853542327881, |
|
"learning_rate": 0.00019100166286664495, |
|
"loss": 1.3797, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.9419878780640731, |
|
"grad_norm": 4.130026817321777, |
|
"learning_rate": 0.00019098737707072614, |
|
"loss": 1.3365, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 0.9434807273161556, |
|
"grad_norm": 3.65081524848938, |
|
"learning_rate": 0.00019097309127480728, |
|
"loss": 1.4347, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.9449735765682381, |
|
"grad_norm": 4.91404390335083, |
|
"learning_rate": 0.00019095880547888847, |
|
"loss": 1.4056, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 0.9464664258203207, |
|
"grad_norm": 4.7272114753723145, |
|
"learning_rate": 0.00019094451968296964, |
|
"loss": 1.2828, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.9479592750724032, |
|
"grad_norm": 5.111660957336426, |
|
"learning_rate": 0.0001909302338870508, |
|
"loss": 1.3486, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 0.9494521243244857, |
|
"grad_norm": 3.1206741333007812, |
|
"learning_rate": 0.00019091594809113197, |
|
"loss": 1.3416, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.9509449735765683, |
|
"grad_norm": 4.359163761138916, |
|
"learning_rate": 0.00019090166229521313, |
|
"loss": 1.3471, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 0.9524378228286507, |
|
"grad_norm": 4.667807102203369, |
|
"learning_rate": 0.0001908873764992943, |
|
"loss": 1.4042, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.9539306720807332, |
|
"grad_norm": 4.410369873046875, |
|
"learning_rate": 0.00019087309070337546, |
|
"loss": 1.3909, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 0.9554235213328158, |
|
"grad_norm": 4.987631320953369, |
|
"learning_rate": 0.00019085880490745663, |
|
"loss": 1.3548, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.9569163705848983, |
|
"grad_norm": 4.66121768951416, |
|
"learning_rate": 0.0001908445191115378, |
|
"loss": 1.3753, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 0.9584092198369809, |
|
"grad_norm": 6.010061740875244, |
|
"learning_rate": 0.00019083023331561896, |
|
"loss": 1.4128, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.9599020690890634, |
|
"grad_norm": 3.118246078491211, |
|
"learning_rate": 0.00019081594751970012, |
|
"loss": 1.3625, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 0.9613949183411459, |
|
"grad_norm": 3.821732759475708, |
|
"learning_rate": 0.00019080166172378128, |
|
"loss": 1.3691, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.9628877675932285, |
|
"grad_norm": 5.321105480194092, |
|
"learning_rate": 0.00019078737592786245, |
|
"loss": 1.3499, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 0.964380616845311, |
|
"grad_norm": 5.963992595672607, |
|
"learning_rate": 0.00019077309013194361, |
|
"loss": 1.3145, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.9658734660973934, |
|
"grad_norm": 4.071386337280273, |
|
"learning_rate": 0.00019075880433602478, |
|
"loss": 1.3215, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 0.967366315349476, |
|
"grad_norm": 3.7641048431396484, |
|
"learning_rate": 0.00019074451854010594, |
|
"loss": 1.3846, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.9688591646015585, |
|
"grad_norm": 5.985085487365723, |
|
"learning_rate": 0.0001907302327441871, |
|
"loss": 1.3383, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 0.970352013853641, |
|
"grad_norm": 6.602528095245361, |
|
"learning_rate": 0.0001907159469482683, |
|
"loss": 1.4195, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.9718448631057236, |
|
"grad_norm": 5.370469570159912, |
|
"learning_rate": 0.00019070166115234944, |
|
"loss": 1.3485, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 0.9733377123578061, |
|
"grad_norm": 4.588457107543945, |
|
"learning_rate": 0.00019068737535643063, |
|
"loss": 1.3896, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.9748305616098887, |
|
"grad_norm": 6.389341354370117, |
|
"learning_rate": 0.00019067308956051177, |
|
"loss": 1.3118, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 0.9763234108619712, |
|
"grad_norm": 4.051975250244141, |
|
"learning_rate": 0.00019065880376459296, |
|
"loss": 1.4065, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.9778162601140536, |
|
"grad_norm": 5.474813938140869, |
|
"learning_rate": 0.0001906445179686741, |
|
"loss": 1.3967, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 0.9793091093661362, |
|
"grad_norm": 4.033454895019531, |
|
"learning_rate": 0.0001906302321727553, |
|
"loss": 1.3212, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.9808019586182187, |
|
"grad_norm": 4.46946382522583, |
|
"learning_rate": 0.00019061594637683645, |
|
"loss": 1.3917, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 0.9822948078703012, |
|
"grad_norm": 4.98955774307251, |
|
"learning_rate": 0.00019060166058091762, |
|
"loss": 1.3814, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.9837876571223838, |
|
"grad_norm": 4.879435062408447, |
|
"learning_rate": 0.00019058737478499878, |
|
"loss": 1.3486, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 0.9852805063744663, |
|
"grad_norm": 4.0941925048828125, |
|
"learning_rate": 0.00019057308898907995, |
|
"loss": 1.3289, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.9867733556265488, |
|
"grad_norm": 5.2992706298828125, |
|
"learning_rate": 0.0001905588031931611, |
|
"loss": 1.3458, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 0.9882662048786314, |
|
"grad_norm": 4.284411430358887, |
|
"learning_rate": 0.00019054451739724228, |
|
"loss": 1.3932, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.9897590541307139, |
|
"grad_norm": 4.969634532928467, |
|
"learning_rate": 0.00019053023160132344, |
|
"loss": 1.4044, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 0.9912519033827965, |
|
"grad_norm": 5.532953262329102, |
|
"learning_rate": 0.0001905159458054046, |
|
"loss": 1.3658, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.9927447526348789, |
|
"grad_norm": 4.402670383453369, |
|
"learning_rate": 0.00019050166000948577, |
|
"loss": 1.343, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 0.9942376018869614, |
|
"grad_norm": 4.458900451660156, |
|
"learning_rate": 0.00019048737421356696, |
|
"loss": 1.4002, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.995730451139044, |
|
"grad_norm": 3.934340238571167, |
|
"learning_rate": 0.0001904730884176481, |
|
"loss": 1.413, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 0.9972233003911265, |
|
"grad_norm": 4.274466514587402, |
|
"learning_rate": 0.0001904588026217293, |
|
"loss": 1.3552, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.998716149643209, |
|
"grad_norm": 4.774006366729736, |
|
"learning_rate": 0.00019044451682581043, |
|
"loss": 1.3825, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 1.0002089988952916, |
|
"grad_norm": 3.870762825012207, |
|
"learning_rate": 0.00019043023102989162, |
|
"loss": 1.3715, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.001701848147374, |
|
"grad_norm": 3.595686912536621, |
|
"learning_rate": 0.00019041594523397276, |
|
"loss": 1.3352, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 1.0031946973994565, |
|
"grad_norm": 5.492071151733398, |
|
"learning_rate": 0.00019040165943805395, |
|
"loss": 1.2927, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.004687546651539, |
|
"grad_norm": 5.381584167480469, |
|
"learning_rate": 0.00019038737364213511, |
|
"loss": 1.2545, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 1.0061803959036217, |
|
"grad_norm": 5.983656883239746, |
|
"learning_rate": 0.00019037308784621628, |
|
"loss": 1.2393, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 1.0076732451557042, |
|
"grad_norm": 4.956115245819092, |
|
"learning_rate": 0.00019035880205029744, |
|
"loss": 1.2931, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 1.0091660944077867, |
|
"grad_norm": 4.604572296142578, |
|
"learning_rate": 0.0001903445162543786, |
|
"loss": 1.2786, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 1.0106589436598692, |
|
"grad_norm": 5.043067932128906, |
|
"learning_rate": 0.00019033023045845977, |
|
"loss": 1.2933, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 1.0121517929119517, |
|
"grad_norm": 4.53336238861084, |
|
"learning_rate": 0.00019031594466254094, |
|
"loss": 1.3114, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 1.0136446421640344, |
|
"grad_norm": 5.106605052947998, |
|
"learning_rate": 0.0001903016588666221, |
|
"loss": 1.2994, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 1.0151374914161169, |
|
"grad_norm": 5.796351909637451, |
|
"learning_rate": 0.00019028737307070327, |
|
"loss": 1.2555, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.0166303406681994, |
|
"grad_norm": 4.970977306365967, |
|
"learning_rate": 0.00019027308727478443, |
|
"loss": 1.3343, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 1.0181231899202818, |
|
"grad_norm": 4.233397960662842, |
|
"learning_rate": 0.00019025880147886562, |
|
"loss": 1.2485, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 1.0196160391723643, |
|
"grad_norm": 4.012045383453369, |
|
"learning_rate": 0.00019024451568294676, |
|
"loss": 1.2816, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 1.0211088884244468, |
|
"grad_norm": 4.715073585510254, |
|
"learning_rate": 0.00019023022988702795, |
|
"loss": 1.2541, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.0226017376765295, |
|
"grad_norm": 3.7285947799682617, |
|
"learning_rate": 0.0001902159440911091, |
|
"loss": 1.3094, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 1.024094586928612, |
|
"grad_norm": 4.012038707733154, |
|
"learning_rate": 0.00019020165829519028, |
|
"loss": 1.2791, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 1.0255874361806945, |
|
"grad_norm": 4.04899263381958, |
|
"learning_rate": 0.00019018737249927142, |
|
"loss": 1.3085, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 1.027080285432777, |
|
"grad_norm": 4.380834102630615, |
|
"learning_rate": 0.0001901730867033526, |
|
"loss": 1.3212, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 1.0285731346848594, |
|
"grad_norm": 5.275148868560791, |
|
"learning_rate": 0.00019015880090743378, |
|
"loss": 1.2749, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 1.0300659839369422, |
|
"grad_norm": 4.162272930145264, |
|
"learning_rate": 0.00019014451511151494, |
|
"loss": 1.296, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.0315588331890246, |
|
"grad_norm": 4.128006935119629, |
|
"learning_rate": 0.0001901302293155961, |
|
"loss": 1.2547, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 1.0330516824411071, |
|
"grad_norm": 3.930121421813965, |
|
"learning_rate": 0.00019011594351967727, |
|
"loss": 1.2831, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 1.0345445316931896, |
|
"grad_norm": 4.515873908996582, |
|
"learning_rate": 0.00019010165772375843, |
|
"loss": 1.2747, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 1.036037380945272, |
|
"grad_norm": 4.484467506408691, |
|
"learning_rate": 0.0001900873719278396, |
|
"loss": 1.289, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 1.0375302301973546, |
|
"grad_norm": 3.8937742710113525, |
|
"learning_rate": 0.00019007308613192076, |
|
"loss": 1.2939, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 1.0390230794494373, |
|
"grad_norm": 4.367059230804443, |
|
"learning_rate": 0.00019005880033600193, |
|
"loss": 1.2522, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.0405159287015198, |
|
"grad_norm": 5.143396854400635, |
|
"learning_rate": 0.0001900445145400831, |
|
"loss": 1.2996, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 1.0420087779536022, |
|
"grad_norm": 5.060800552368164, |
|
"learning_rate": 0.00019003022874416426, |
|
"loss": 1.3122, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 1.0435016272056847, |
|
"grad_norm": 4.775914192199707, |
|
"learning_rate": 0.00019001594294824542, |
|
"loss": 1.2595, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 1.0449944764577672, |
|
"grad_norm": 5.256932258605957, |
|
"learning_rate": 0.0001900016571523266, |
|
"loss": 1.3148, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.04648732570985, |
|
"grad_norm": 4.253088474273682, |
|
"learning_rate": 0.00018998737135640775, |
|
"loss": 1.2575, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 1.0479801749619324, |
|
"grad_norm": 4.784812927246094, |
|
"learning_rate": 0.00018997308556048892, |
|
"loss": 1.2911, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 1.049473024214015, |
|
"grad_norm": 3.9015183448791504, |
|
"learning_rate": 0.0001899587997645701, |
|
"loss": 1.2647, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 1.0509658734660974, |
|
"grad_norm": 4.729675769805908, |
|
"learning_rate": 0.00018994451396865125, |
|
"loss": 1.2876, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 1.0524587227181799, |
|
"grad_norm": 4.921074867248535, |
|
"learning_rate": 0.00018993022817273244, |
|
"loss": 1.2956, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 1.0539515719702623, |
|
"grad_norm": 4.466576099395752, |
|
"learning_rate": 0.00018991594237681357, |
|
"loss": 1.2938, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 1.055444421222345, |
|
"grad_norm": 4.142183780670166, |
|
"learning_rate": 0.00018990165658089477, |
|
"loss": 1.282, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 1.0569372704744275, |
|
"grad_norm": 4.372234344482422, |
|
"learning_rate": 0.0001898873707849759, |
|
"loss": 1.2793, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.05843011972651, |
|
"grad_norm": 5.349823474884033, |
|
"learning_rate": 0.0001898730849890571, |
|
"loss": 1.3678, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 1.0599229689785925, |
|
"grad_norm": 4.57612419128418, |
|
"learning_rate": 0.00018985879919313826, |
|
"loss": 1.3256, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.061415818230675, |
|
"grad_norm": 5.3342156410217285, |
|
"learning_rate": 0.00018984451339721942, |
|
"loss": 1.2897, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 1.0629086674827577, |
|
"grad_norm": 5.361871242523193, |
|
"learning_rate": 0.0001898302276013006, |
|
"loss": 1.2933, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 1.0644015167348402, |
|
"grad_norm": 3.6657068729400635, |
|
"learning_rate": 0.00018981594180538175, |
|
"loss": 1.3359, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 1.0658943659869227, |
|
"grad_norm": 4.428374290466309, |
|
"learning_rate": 0.00018980165600946292, |
|
"loss": 1.3278, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.0673872152390051, |
|
"grad_norm": 5.079537391662598, |
|
"learning_rate": 0.00018978737021354408, |
|
"loss": 1.2699, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 1.0688800644910876, |
|
"grad_norm": 5.303153038024902, |
|
"learning_rate": 0.00018977308441762525, |
|
"loss": 1.2985, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.0703729137431701, |
|
"grad_norm": 4.319901943206787, |
|
"learning_rate": 0.0001897587986217064, |
|
"loss": 1.2661, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 1.0718657629952528, |
|
"grad_norm": 6.0582451820373535, |
|
"learning_rate": 0.00018974451282578758, |
|
"loss": 1.2837, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 1.0733586122473353, |
|
"grad_norm": 5.738245964050293, |
|
"learning_rate": 0.00018973022702986877, |
|
"loss": 1.3075, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 1.0748514614994178, |
|
"grad_norm": 4.602933883666992, |
|
"learning_rate": 0.0001897159412339499, |
|
"loss": 1.2585, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.0763443107515003, |
|
"grad_norm": 4.273153305053711, |
|
"learning_rate": 0.0001897016554380311, |
|
"loss": 1.3382, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 1.0778371600035828, |
|
"grad_norm": 4.225873947143555, |
|
"learning_rate": 0.00018968736964211224, |
|
"loss": 1.2964, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 1.0793300092556652, |
|
"grad_norm": 4.828727722167969, |
|
"learning_rate": 0.00018967308384619343, |
|
"loss": 1.3734, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 1.080822858507748, |
|
"grad_norm": 3.7251532077789307, |
|
"learning_rate": 0.00018965879805027457, |
|
"loss": 1.2706, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.0823157077598304, |
|
"grad_norm": 4.92055606842041, |
|
"learning_rate": 0.00018964451225435576, |
|
"loss": 1.3187, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 1.083808557011913, |
|
"grad_norm": 4.827970027923584, |
|
"learning_rate": 0.00018963022645843692, |
|
"loss": 1.2574, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.0853014062639954, |
|
"grad_norm": 4.742151737213135, |
|
"learning_rate": 0.00018961594066251809, |
|
"loss": 1.3076, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 1.086794255516078, |
|
"grad_norm": 3.70794939994812, |
|
"learning_rate": 0.00018960165486659925, |
|
"loss": 1.3075, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.0882871047681606, |
|
"grad_norm": 4.062809944152832, |
|
"learning_rate": 0.00018958736907068042, |
|
"loss": 1.2628, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 1.089779954020243, |
|
"grad_norm": 5.072466850280762, |
|
"learning_rate": 0.00018957308327476158, |
|
"loss": 1.3278, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.0912728032723256, |
|
"grad_norm": 4.530171871185303, |
|
"learning_rate": 0.00018955879747884274, |
|
"loss": 1.3233, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 1.092765652524408, |
|
"grad_norm": 3.9391825199127197, |
|
"learning_rate": 0.0001895445116829239, |
|
"loss": 1.2973, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.0942585017764905, |
|
"grad_norm": 6.774008274078369, |
|
"learning_rate": 0.00018953022588700507, |
|
"loss": 1.2801, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 1.0957513510285732, |
|
"grad_norm": 4.565464496612549, |
|
"learning_rate": 0.00018951594009108624, |
|
"loss": 1.3718, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 1.0972442002806557, |
|
"grad_norm": 5.68241548538208, |
|
"learning_rate": 0.00018950165429516743, |
|
"loss": 1.2928, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 1.0987370495327382, |
|
"grad_norm": 4.235448360443115, |
|
"learning_rate": 0.00018948736849924857, |
|
"loss": 1.3097, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.1002298987848207, |
|
"grad_norm": 4.672369003295898, |
|
"learning_rate": 0.00018947308270332976, |
|
"loss": 1.2875, |
|
"step": 36850 |
|
}, |
|
{ |
|
"epoch": 1.1017227480369032, |
|
"grad_norm": 6.449750900268555, |
|
"learning_rate": 0.0001894587969074109, |
|
"loss": 1.2823, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.1032155972889857, |
|
"grad_norm": 5.093149662017822, |
|
"learning_rate": 0.0001894445111114921, |
|
"loss": 1.315, |
|
"step": 36950 |
|
}, |
|
{ |
|
"epoch": 1.1047084465410684, |
|
"grad_norm": 5.433828353881836, |
|
"learning_rate": 0.00018943022531557323, |
|
"loss": 1.2781, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.1062012957931509, |
|
"grad_norm": 5.176681995391846, |
|
"learning_rate": 0.00018941593951965442, |
|
"loss": 1.2652, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 1.1076941450452333, |
|
"grad_norm": 4.09026575088501, |
|
"learning_rate": 0.00018940165372373558, |
|
"loss": 1.3045, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 1.1091869942973158, |
|
"grad_norm": 5.223085880279541, |
|
"learning_rate": 0.00018938736792781675, |
|
"loss": 1.3009, |
|
"step": 37150 |
|
}, |
|
{ |
|
"epoch": 1.1106798435493983, |
|
"grad_norm": 5.105752944946289, |
|
"learning_rate": 0.0001893730821318979, |
|
"loss": 1.2915, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.1121726928014808, |
|
"grad_norm": 3.945962905883789, |
|
"learning_rate": 0.00018935879633597908, |
|
"loss": 1.341, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 1.1136655420535635, |
|
"grad_norm": 4.856802463531494, |
|
"learning_rate": 0.00018934451054006024, |
|
"loss": 1.3318, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 1.115158391305646, |
|
"grad_norm": 4.858597755432129, |
|
"learning_rate": 0.0001893302247441414, |
|
"loss": 1.2756, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 1.1166512405577285, |
|
"grad_norm": 4.594838619232178, |
|
"learning_rate": 0.00018931593894822257, |
|
"loss": 1.2712, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.118144089809811, |
|
"grad_norm": 4.745405673980713, |
|
"learning_rate": 0.00018930165315230374, |
|
"loss": 1.2878, |
|
"step": 37450 |
|
}, |
|
{ |
|
"epoch": 1.1196369390618934, |
|
"grad_norm": 4.112718105316162, |
|
"learning_rate": 0.0001892873673563849, |
|
"loss": 1.3337, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.1211297883139761, |
|
"grad_norm": 4.297769069671631, |
|
"learning_rate": 0.00018927308156046607, |
|
"loss": 1.3324, |
|
"step": 37550 |
|
}, |
|
{ |
|
"epoch": 1.1226226375660586, |
|
"grad_norm": 5.46763801574707, |
|
"learning_rate": 0.00018925879576454723, |
|
"loss": 1.276, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.124115486818141, |
|
"grad_norm": 5.648976802825928, |
|
"learning_rate": 0.0001892445099686284, |
|
"loss": 1.3238, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 1.1256083360702236, |
|
"grad_norm": 4.185100078582764, |
|
"learning_rate": 0.00018923022417270956, |
|
"loss": 1.2848, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 1.127101185322306, |
|
"grad_norm": 4.676313877105713, |
|
"learning_rate": 0.00018921593837679072, |
|
"loss": 1.3038, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 1.1285940345743888, |
|
"grad_norm": 4.426568508148193, |
|
"learning_rate": 0.00018920165258087192, |
|
"loss": 1.3343, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.1300868838264713, |
|
"grad_norm": 4.887205600738525, |
|
"learning_rate": 0.00018918736678495305, |
|
"loss": 1.3456, |
|
"step": 37850 |
|
}, |
|
{ |
|
"epoch": 1.1315797330785538, |
|
"grad_norm": 5.455615043640137, |
|
"learning_rate": 0.00018917308098903424, |
|
"loss": 1.3258, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 1.1330725823306362, |
|
"grad_norm": 5.676678657531738, |
|
"learning_rate": 0.00018915879519311538, |
|
"loss": 1.3702, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 1.1345654315827187, |
|
"grad_norm": 3.9528415203094482, |
|
"learning_rate": 0.00018914450939719657, |
|
"loss": 1.3097, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.1360582808348014, |
|
"grad_norm": 4.538026332855225, |
|
"learning_rate": 0.0001891302236012777, |
|
"loss": 1.316, |
|
"step": 38050 |
|
}, |
|
{ |
|
"epoch": 1.137551130086884, |
|
"grad_norm": 4.46422815322876, |
|
"learning_rate": 0.0001891159378053589, |
|
"loss": 1.2479, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 1.1390439793389664, |
|
"grad_norm": 4.823958396911621, |
|
"learning_rate": 0.00018910165200944007, |
|
"loss": 1.2616, |
|
"step": 38150 |
|
}, |
|
{ |
|
"epoch": 1.1405368285910489, |
|
"grad_norm": 5.7128167152404785, |
|
"learning_rate": 0.00018908736621352123, |
|
"loss": 1.2853, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 1.1420296778431314, |
|
"grad_norm": 3.5420877933502197, |
|
"learning_rate": 0.0001890730804176024, |
|
"loss": 1.3008, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 1.1435225270952138, |
|
"grad_norm": 4.617981910705566, |
|
"learning_rate": 0.00018905879462168356, |
|
"loss": 1.2888, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 1.1450153763472963, |
|
"grad_norm": 4.542544364929199, |
|
"learning_rate": 0.00018904450882576473, |
|
"loss": 1.3009, |
|
"step": 38350 |
|
}, |
|
{ |
|
"epoch": 1.146508225599379, |
|
"grad_norm": 4.301428318023682, |
|
"learning_rate": 0.0001890302230298459, |
|
"loss": 1.2834, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.1480010748514615, |
|
"grad_norm": 4.136764049530029, |
|
"learning_rate": 0.00018901593723392706, |
|
"loss": 1.3895, |
|
"step": 38450 |
|
}, |
|
{ |
|
"epoch": 1.149493924103544, |
|
"grad_norm": 5.59256649017334, |
|
"learning_rate": 0.00018900165143800822, |
|
"loss": 1.295, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.1509867733556265, |
|
"grad_norm": 3.4925365447998047, |
|
"learning_rate": 0.00018898736564208939, |
|
"loss": 1.3385, |
|
"step": 38550 |
|
}, |
|
{ |
|
"epoch": 1.152479622607709, |
|
"grad_norm": 4.884555816650391, |
|
"learning_rate": 0.00018897307984617058, |
|
"loss": 1.2776, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 1.1539724718597917, |
|
"grad_norm": 3.866908550262451, |
|
"learning_rate": 0.00018895879405025171, |
|
"loss": 1.3173, |
|
"step": 38650 |
|
}, |
|
{ |
|
"epoch": 1.1554653211118742, |
|
"grad_norm": 4.8104939460754395, |
|
"learning_rate": 0.0001889445082543329, |
|
"loss": 1.2988, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 1.1569581703639567, |
|
"grad_norm": 3.814675807952881, |
|
"learning_rate": 0.00018893022245841404, |
|
"loss": 1.348, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 1.1584510196160391, |
|
"grad_norm": 5.720306873321533, |
|
"learning_rate": 0.00018891593666249524, |
|
"loss": 1.2844, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 1.1599438688681216, |
|
"grad_norm": 4.06850528717041, |
|
"learning_rate": 0.00018890165086657637, |
|
"loss": 1.3514, |
|
"step": 38850 |
|
}, |
|
{ |
|
"epoch": 1.1614367181202043, |
|
"grad_norm": 6.193358898162842, |
|
"learning_rate": 0.00018888736507065757, |
|
"loss": 1.3587, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 1.1629295673722868, |
|
"grad_norm": 4.8998212814331055, |
|
"learning_rate": 0.00018887307927473873, |
|
"loss": 1.317, |
|
"step": 38950 |
|
}, |
|
{ |
|
"epoch": 1.1644224166243693, |
|
"grad_norm": 4.4293107986450195, |
|
"learning_rate": 0.0001888587934788199, |
|
"loss": 1.3473, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.1659152658764518, |
|
"grad_norm": 6.795536994934082, |
|
"learning_rate": 0.00018884450768290106, |
|
"loss": 1.2793, |
|
"step": 39050 |
|
}, |
|
{ |
|
"epoch": 1.1674081151285343, |
|
"grad_norm": 4.158294200897217, |
|
"learning_rate": 0.00018883022188698222, |
|
"loss": 1.3175, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 1.168900964380617, |
|
"grad_norm": 5.839204788208008, |
|
"learning_rate": 0.0001888159360910634, |
|
"loss": 1.2931, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 1.1703938136326995, |
|
"grad_norm": 6.633917331695557, |
|
"learning_rate": 0.00018880165029514455, |
|
"loss": 1.3053, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 1.171886662884782, |
|
"grad_norm": 4.409125328063965, |
|
"learning_rate": 0.00018878736449922572, |
|
"loss": 1.3074, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 1.1733795121368644, |
|
"grad_norm": 4.820318698883057, |
|
"learning_rate": 0.00018877307870330688, |
|
"loss": 1.329, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 1.174872361388947, |
|
"grad_norm": 5.104337215423584, |
|
"learning_rate": 0.00018875879290738805, |
|
"loss": 1.2628, |
|
"step": 39350 |
|
}, |
|
{ |
|
"epoch": 1.1763652106410294, |
|
"grad_norm": 5.449405670166016, |
|
"learning_rate": 0.00018874450711146924, |
|
"loss": 1.2672, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 1.1778580598931119, |
|
"grad_norm": 5.3521504402160645, |
|
"learning_rate": 0.00018873022131555038, |
|
"loss": 1.3236, |
|
"step": 39450 |
|
}, |
|
{ |
|
"epoch": 1.1793509091451946, |
|
"grad_norm": 5.748175144195557, |
|
"learning_rate": 0.00018871593551963157, |
|
"loss": 1.284, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.180843758397277, |
|
"grad_norm": 4.418118953704834, |
|
"learning_rate": 0.0001887016497237127, |
|
"loss": 1.299, |
|
"step": 39550 |
|
}, |
|
{ |
|
"epoch": 1.1823366076493596, |
|
"grad_norm": 4.267107009887695, |
|
"learning_rate": 0.0001886873639277939, |
|
"loss": 1.3287, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.183829456901442, |
|
"grad_norm": 4.607259273529053, |
|
"learning_rate": 0.00018867307813187503, |
|
"loss": 1.3087, |
|
"step": 39650 |
|
}, |
|
{ |
|
"epoch": 1.1853223061535245, |
|
"grad_norm": 4.254667282104492, |
|
"learning_rate": 0.00018865879233595623, |
|
"loss": 1.3015, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 1.1868151554056072, |
|
"grad_norm": 5.307114124298096, |
|
"learning_rate": 0.0001886445065400374, |
|
"loss": 1.2802, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 1.1883080046576897, |
|
"grad_norm": 3.9305639266967773, |
|
"learning_rate": 0.00018863022074411856, |
|
"loss": 1.2975, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 1.1898008539097722, |
|
"grad_norm": 4.650544166564941, |
|
"learning_rate": 0.00018861593494819972, |
|
"loss": 1.3239, |
|
"step": 39850 |
|
}, |
|
{ |
|
"epoch": 1.1912937031618547, |
|
"grad_norm": 4.182717800140381, |
|
"learning_rate": 0.00018860164915228086, |
|
"loss": 1.3948, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 1.1927865524139372, |
|
"grad_norm": 5.322524547576904, |
|
"learning_rate": 0.00018858736335636205, |
|
"loss": 1.3411, |
|
"step": 39950 |
|
}, |
|
{ |
|
"epoch": 1.1942794016660199, |
|
"grad_norm": 5.221969127655029, |
|
"learning_rate": 0.0001885730775604432, |
|
"loss": 1.3047, |
|
"step": 40000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 700001, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 21, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.011899217623384e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|