|
{ |
|
"best_metric": 3.1822056770324707, |
|
"best_model_checkpoint": "swardify-t5-base/checkpoint-777", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 777, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.023166023166023165, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0, |
|
"loss": 9.0761, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04633204633204633, |
|
"grad_norm": 51.163658142089844, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 8.493, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0694980694980695, |
|
"grad_norm": 34.79372787475586, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 9.1207, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09266409266409266, |
|
"grad_norm": 29.063369750976562, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 6.2012, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.11583011583011583, |
|
"grad_norm": 108.47474670410156, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 8.0014, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.138996138996139, |
|
"grad_norm": 40.11521911621094, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 6.7531, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.16216216216216217, |
|
"grad_norm": 21.81500244140625, |
|
"learning_rate": 2.307692307692308e-05, |
|
"loss": 6.1148, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.18532818532818532, |
|
"grad_norm": 118.32696533203125, |
|
"learning_rate": 2.6923076923076923e-05, |
|
"loss": 5.8517, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2084942084942085, |
|
"grad_norm": 31.352462768554688, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 6.7338, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.23166023166023167, |
|
"grad_norm": 50.72134017944336, |
|
"learning_rate": 3.461538461538462e-05, |
|
"loss": 6.1796, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2548262548262548, |
|
"grad_norm": 12.974916458129883, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 5.5243, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.277992277992278, |
|
"grad_norm": 63.90497970581055, |
|
"learning_rate": 4.230769230769231e-05, |
|
"loss": 6.7863, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.30115830115830117, |
|
"grad_norm": 13.123330116271973, |
|
"learning_rate": 4.5512820512820516e-05, |
|
"loss": 4.2131, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.32432432432432434, |
|
"grad_norm": 27.849990844726562, |
|
"learning_rate": 4.935897435897436e-05, |
|
"loss": 6.5816, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.3474903474903475, |
|
"grad_norm": 69.52152252197266, |
|
"learning_rate": 4.964234620886982e-05, |
|
"loss": 6.1139, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37065637065637064, |
|
"grad_norm": 30.392309188842773, |
|
"learning_rate": 4.921316165951359e-05, |
|
"loss": 5.971, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3938223938223938, |
|
"grad_norm": 20.638032913208008, |
|
"learning_rate": 4.878397711015737e-05, |
|
"loss": 4.9646, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.416988416988417, |
|
"grad_norm": 21.338809967041016, |
|
"learning_rate": 4.8354792560801146e-05, |
|
"loss": 5.8206, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.44015444015444016, |
|
"grad_norm": 18.209985733032227, |
|
"learning_rate": 4.792560801144492e-05, |
|
"loss": 4.6264, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"grad_norm": 18.156341552734375, |
|
"learning_rate": 4.74964234620887e-05, |
|
"loss": 3.7616, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4864864864864865, |
|
"grad_norm": 9.134263038635254, |
|
"learning_rate": 4.7067238912732475e-05, |
|
"loss": 3.9017, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5096525096525096, |
|
"grad_norm": 11.322493553161621, |
|
"learning_rate": 4.663805436337626e-05, |
|
"loss": 4.5921, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5328185328185329, |
|
"grad_norm": 20.7261905670166, |
|
"learning_rate": 4.620886981402003e-05, |
|
"loss": 4.4158, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.555984555984556, |
|
"grad_norm": 11.311534881591797, |
|
"learning_rate": 4.577968526466381e-05, |
|
"loss": 4.0813, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.5791505791505791, |
|
"grad_norm": 16.623924255371094, |
|
"learning_rate": 4.5350500715307586e-05, |
|
"loss": 4.7313, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6023166023166023, |
|
"grad_norm": 15.561365127563477, |
|
"learning_rate": 4.492131616595136e-05, |
|
"loss": 3.9911, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6254826254826255, |
|
"grad_norm": 15.34063720703125, |
|
"learning_rate": 4.449213161659514e-05, |
|
"loss": 4.8839, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.6486486486486487, |
|
"grad_norm": 17.363418579101562, |
|
"learning_rate": 4.4062947067238915e-05, |
|
"loss": 4.4482, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.6718146718146718, |
|
"grad_norm": 10.494095802307129, |
|
"learning_rate": 4.363376251788269e-05, |
|
"loss": 3.4289, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.694980694980695, |
|
"grad_norm": 13.092146873474121, |
|
"learning_rate": 4.320457796852647e-05, |
|
"loss": 3.826, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7181467181467182, |
|
"grad_norm": 9.929197311401367, |
|
"learning_rate": 4.2775393419170244e-05, |
|
"loss": 4.997, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.7413127413127413, |
|
"grad_norm": 8.76804256439209, |
|
"learning_rate": 4.234620886981403e-05, |
|
"loss": 4.3643, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.7644787644787645, |
|
"grad_norm": 10.548382759094238, |
|
"learning_rate": 4.1917024320457796e-05, |
|
"loss": 3.9267, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.7876447876447876, |
|
"grad_norm": 12.139382362365723, |
|
"learning_rate": 4.148783977110158e-05, |
|
"loss": 3.9444, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 8.579113960266113, |
|
"learning_rate": 4.105865522174535e-05, |
|
"loss": 3.578, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.833976833976834, |
|
"grad_norm": 11.02019214630127, |
|
"learning_rate": 4.062947067238913e-05, |
|
"loss": 4.0247, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 29.3343505859375, |
|
"learning_rate": 4.02002861230329e-05, |
|
"loss": 3.4999, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.8803088803088803, |
|
"grad_norm": 8.917579650878906, |
|
"learning_rate": 3.9771101573676684e-05, |
|
"loss": 3.8837, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.9034749034749034, |
|
"grad_norm": 7.548088550567627, |
|
"learning_rate": 3.9341917024320454e-05, |
|
"loss": 3.847, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"grad_norm": 7.723937034606934, |
|
"learning_rate": 3.891273247496424e-05, |
|
"loss": 2.783, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.9498069498069498, |
|
"grad_norm": 19.019685745239258, |
|
"learning_rate": 3.848354792560801e-05, |
|
"loss": 3.9959, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.972972972972973, |
|
"grad_norm": 11.181546211242676, |
|
"learning_rate": 3.805436337625179e-05, |
|
"loss": 3.4081, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.9961389961389961, |
|
"grad_norm": 9.951409339904785, |
|
"learning_rate": 3.7625178826895566e-05, |
|
"loss": 3.4448, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 6.6154, |
|
"eval_loss": 3.645592212677002, |
|
"eval_rouge1": 9.7015, |
|
"eval_rouge2": 5.6335, |
|
"eval_rougeL": 9.3547, |
|
"eval_rougeLsum": 9.5155, |
|
"eval_runtime": 4.9627, |
|
"eval_samples_per_second": 13.098, |
|
"eval_steps_per_second": 3.426, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.0193050193050193, |
|
"grad_norm": 9.180315971374512, |
|
"learning_rate": 3.719599427753934e-05, |
|
"loss": 3.8576, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.0424710424710424, |
|
"grad_norm": 9.768823623657227, |
|
"learning_rate": 3.676680972818312e-05, |
|
"loss": 3.5363, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.0656370656370657, |
|
"grad_norm": 9.186162948608398, |
|
"learning_rate": 3.6337625178826894e-05, |
|
"loss": 2.9161, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.0888030888030888, |
|
"grad_norm": 5.980693817138672, |
|
"learning_rate": 3.590844062947068e-05, |
|
"loss": 3.8179, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.111969111969112, |
|
"grad_norm": 20.829463958740234, |
|
"learning_rate": 3.5479256080114454e-05, |
|
"loss": 2.6779, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.135135135135135, |
|
"grad_norm": 15.735648155212402, |
|
"learning_rate": 3.505007153075823e-05, |
|
"loss": 3.7684, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"grad_norm": 5.63276481628418, |
|
"learning_rate": 3.4620886981402006e-05, |
|
"loss": 2.7927, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1814671814671815, |
|
"grad_norm": 5.006065845489502, |
|
"learning_rate": 3.419170243204578e-05, |
|
"loss": 2.8692, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.2046332046332047, |
|
"grad_norm": 10.745657920837402, |
|
"learning_rate": 3.376251788268956e-05, |
|
"loss": 3.1925, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.2277992277992278, |
|
"grad_norm": 11.546165466308594, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 3.9131, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.250965250965251, |
|
"grad_norm": 7.276663780212402, |
|
"learning_rate": 3.290414878397711e-05, |
|
"loss": 3.6568, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.2741312741312742, |
|
"grad_norm": 4.786994457244873, |
|
"learning_rate": 3.247496423462089e-05, |
|
"loss": 3.5466, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.2972972972972974, |
|
"grad_norm": 11.583497047424316, |
|
"learning_rate": 3.2045779685264664e-05, |
|
"loss": 3.1339, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.3204633204633205, |
|
"grad_norm": 11.542532920837402, |
|
"learning_rate": 3.161659513590845e-05, |
|
"loss": 3.8298, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.3436293436293436, |
|
"grad_norm": 12.93191146850586, |
|
"learning_rate": 3.1187410586552216e-05, |
|
"loss": 2.9133, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.3667953667953667, |
|
"grad_norm": 10.973305702209473, |
|
"learning_rate": 3.0758226037196e-05, |
|
"loss": 3.9245, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.3899613899613898, |
|
"grad_norm": 8.375068664550781, |
|
"learning_rate": 3.0329041487839772e-05, |
|
"loss": 2.5798, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.4131274131274132, |
|
"grad_norm": 11.476892471313477, |
|
"learning_rate": 2.9899856938483552e-05, |
|
"loss": 2.5942, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.4362934362934363, |
|
"grad_norm": 11.129337310791016, |
|
"learning_rate": 2.9470672389127325e-05, |
|
"loss": 3.0223, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.4594594594594594, |
|
"grad_norm": 14.000101089477539, |
|
"learning_rate": 2.9041487839771104e-05, |
|
"loss": 3.0297, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.4826254826254825, |
|
"grad_norm": 15.382340431213379, |
|
"learning_rate": 2.8612303290414884e-05, |
|
"loss": 3.7875, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.505791505791506, |
|
"grad_norm": 12.228459358215332, |
|
"learning_rate": 2.8183118741058657e-05, |
|
"loss": 3.8071, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.528957528957529, |
|
"grad_norm": 12.940893173217773, |
|
"learning_rate": 2.7753934191702436e-05, |
|
"loss": 2.9065, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.5521235521235521, |
|
"grad_norm": 16.97939682006836, |
|
"learning_rate": 2.732474964234621e-05, |
|
"loss": 3.2808, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.5752895752895753, |
|
"grad_norm": 17.717403411865234, |
|
"learning_rate": 2.689556509298999e-05, |
|
"loss": 3.4982, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.5984555984555984, |
|
"grad_norm": 8.225591659545898, |
|
"learning_rate": 2.6466380543633762e-05, |
|
"loss": 3.6047, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 11.827160835266113, |
|
"learning_rate": 2.603719599427754e-05, |
|
"loss": 3.219, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.6447876447876448, |
|
"grad_norm": 14.637051582336426, |
|
"learning_rate": 2.5608011444921314e-05, |
|
"loss": 3.7435, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.667953667953668, |
|
"grad_norm": 7.794189453125, |
|
"learning_rate": 2.5178826895565094e-05, |
|
"loss": 3.4536, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.691119691119691, |
|
"grad_norm": 14.05357837677002, |
|
"learning_rate": 2.474964234620887e-05, |
|
"loss": 3.2552, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 9.823927879333496, |
|
"learning_rate": 2.4320457796852646e-05, |
|
"loss": 3.4115, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.7374517374517375, |
|
"grad_norm": 45.474586486816406, |
|
"learning_rate": 2.3891273247496423e-05, |
|
"loss": 3.3538, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.7606177606177607, |
|
"grad_norm": 23.240049362182617, |
|
"learning_rate": 2.3462088698140202e-05, |
|
"loss": 3.5271, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.7837837837837838, |
|
"grad_norm": 13.07448959350586, |
|
"learning_rate": 2.303290414878398e-05, |
|
"loss": 3.6199, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.806949806949807, |
|
"grad_norm": 14.969829559326172, |
|
"learning_rate": 2.2603719599427755e-05, |
|
"loss": 4.2248, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.83011583011583, |
|
"grad_norm": 11.915005683898926, |
|
"learning_rate": 2.217453505007153e-05, |
|
"loss": 3.6249, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.8532818532818531, |
|
"grad_norm": 14.839444160461426, |
|
"learning_rate": 2.1745350500715307e-05, |
|
"loss": 3.214, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.8764478764478765, |
|
"grad_norm": 12.58840274810791, |
|
"learning_rate": 2.1316165951359084e-05, |
|
"loss": 3.0665, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.8996138996138996, |
|
"grad_norm": 14.892538070678711, |
|
"learning_rate": 2.0886981402002863e-05, |
|
"loss": 3.6376, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.922779922779923, |
|
"grad_norm": 8.315190315246582, |
|
"learning_rate": 2.045779685264664e-05, |
|
"loss": 2.5824, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.945945945945946, |
|
"grad_norm": 12.847142219543457, |
|
"learning_rate": 2.0028612303290416e-05, |
|
"loss": 2.5557, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.9691119691119692, |
|
"grad_norm": 11.103631019592285, |
|
"learning_rate": 1.9599427753934195e-05, |
|
"loss": 3.4455, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.9922779922779923, |
|
"grad_norm": 11.917842864990234, |
|
"learning_rate": 1.917024320457797e-05, |
|
"loss": 3.6309, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 6.1077, |
|
"eval_loss": 3.2486257553100586, |
|
"eval_rouge1": 9.7128, |
|
"eval_rouge2": 5.6141, |
|
"eval_rougeL": 9.3222, |
|
"eval_rougeLsum": 9.4984, |
|
"eval_runtime": 4.6056, |
|
"eval_samples_per_second": 14.113, |
|
"eval_steps_per_second": 3.691, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 2.0154440154440154, |
|
"grad_norm": 5.787186622619629, |
|
"learning_rate": 1.8741058655221748e-05, |
|
"loss": 3.2913, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 2.0386100386100385, |
|
"grad_norm": 10.957201957702637, |
|
"learning_rate": 1.8311874105865524e-05, |
|
"loss": 2.195, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.0617760617760617, |
|
"grad_norm": 11.480974197387695, |
|
"learning_rate": 1.78826895565093e-05, |
|
"loss": 2.8584, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 2.0849420849420848, |
|
"grad_norm": 9.375560760498047, |
|
"learning_rate": 1.7453505007153077e-05, |
|
"loss": 3.2412, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.108108108108108, |
|
"grad_norm": 11.552406311035156, |
|
"learning_rate": 1.7024320457796853e-05, |
|
"loss": 2.9892, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.1312741312741315, |
|
"grad_norm": 8.1609468460083, |
|
"learning_rate": 1.6595135908440632e-05, |
|
"loss": 3.3593, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.1544401544401546, |
|
"grad_norm": 11.221341133117676, |
|
"learning_rate": 1.616595135908441e-05, |
|
"loss": 2.9685, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.1776061776061777, |
|
"grad_norm": 10.078996658325195, |
|
"learning_rate": 1.5736766809728185e-05, |
|
"loss": 2.7808, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.200772200772201, |
|
"grad_norm": 10.252344131469727, |
|
"learning_rate": 1.530758226037196e-05, |
|
"loss": 3.3327, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.223938223938224, |
|
"grad_norm": 13.17142391204834, |
|
"learning_rate": 1.4878397711015737e-05, |
|
"loss": 3.4719, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.247104247104247, |
|
"grad_norm": 13.214423179626465, |
|
"learning_rate": 1.4449213161659514e-05, |
|
"loss": 3.1222, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.27027027027027, |
|
"grad_norm": 10.087789535522461, |
|
"learning_rate": 1.402002861230329e-05, |
|
"loss": 2.5103, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.2934362934362933, |
|
"grad_norm": 12.04149055480957, |
|
"learning_rate": 1.3590844062947066e-05, |
|
"loss": 3.4174, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"grad_norm": 12.798707962036133, |
|
"learning_rate": 1.3161659513590846e-05, |
|
"loss": 3.4742, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.33976833976834, |
|
"grad_norm": 13.191904067993164, |
|
"learning_rate": 1.2732474964234622e-05, |
|
"loss": 3.3361, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 2.362934362934363, |
|
"grad_norm": 11.446030616760254, |
|
"learning_rate": 1.2303290414878398e-05, |
|
"loss": 2.8801, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 2.386100386100386, |
|
"grad_norm": 9.028064727783203, |
|
"learning_rate": 1.1874105865522175e-05, |
|
"loss": 2.5872, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.4092664092664093, |
|
"grad_norm": 11.909852981567383, |
|
"learning_rate": 1.1444921316165953e-05, |
|
"loss": 2.9751, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.4324324324324325, |
|
"grad_norm": 11.196946144104004, |
|
"learning_rate": 1.1015736766809729e-05, |
|
"loss": 2.6607, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.4555984555984556, |
|
"grad_norm": 6.7401442527771, |
|
"learning_rate": 1.0586552217453507e-05, |
|
"loss": 2.9422, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.4787644787644787, |
|
"grad_norm": 9.36950969696045, |
|
"learning_rate": 1.0157367668097283e-05, |
|
"loss": 3.5546, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.501930501930502, |
|
"grad_norm": 16.527103424072266, |
|
"learning_rate": 9.72818311874106e-06, |
|
"loss": 2.3793, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.525096525096525, |
|
"grad_norm": 7.8193135261535645, |
|
"learning_rate": 9.298998569384835e-06, |
|
"loss": 2.8579, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.5482625482625485, |
|
"grad_norm": 12.883255004882812, |
|
"learning_rate": 8.869814020028613e-06, |
|
"loss": 2.817, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 10.683404922485352, |
|
"learning_rate": 8.44062947067239e-06, |
|
"loss": 2.8759, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.5945945945945947, |
|
"grad_norm": 8.476865768432617, |
|
"learning_rate": 8.011444921316166e-06, |
|
"loss": 3.5438, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.617760617760618, |
|
"grad_norm": 16.314342498779297, |
|
"learning_rate": 7.582260371959943e-06, |
|
"loss": 3.4169, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.640926640926641, |
|
"grad_norm": 14.487051963806152, |
|
"learning_rate": 7.153075822603721e-06, |
|
"loss": 3.4511, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.664092664092664, |
|
"grad_norm": 13.450141906738281, |
|
"learning_rate": 6.723891273247497e-06, |
|
"loss": 2.8801, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.687258687258687, |
|
"grad_norm": 14.612557411193848, |
|
"learning_rate": 6.2947067238912735e-06, |
|
"loss": 2.6347, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.7104247104247103, |
|
"grad_norm": 11.321943283081055, |
|
"learning_rate": 5.8655221745350506e-06, |
|
"loss": 2.8296, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.7335907335907335, |
|
"grad_norm": 18.955806732177734, |
|
"learning_rate": 5.436337625178827e-06, |
|
"loss": 3.0904, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.756756756756757, |
|
"grad_norm": 6.251610279083252, |
|
"learning_rate": 5.007153075822604e-06, |
|
"loss": 2.2731, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.7799227799227797, |
|
"grad_norm": 8.317915916442871, |
|
"learning_rate": 4.577968526466381e-06, |
|
"loss": 2.6778, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.8030888030888033, |
|
"grad_norm": 12.207304000854492, |
|
"learning_rate": 4.148783977110158e-06, |
|
"loss": 3.3065, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.8262548262548264, |
|
"grad_norm": 16.136972427368164, |
|
"learning_rate": 3.7195994277539344e-06, |
|
"loss": 3.9406, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.8494208494208495, |
|
"grad_norm": 13.954407691955566, |
|
"learning_rate": 3.2904148783977115e-06, |
|
"loss": 2.9646, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.8725868725868726, |
|
"grad_norm": 12.064271926879883, |
|
"learning_rate": 2.861230329041488e-06, |
|
"loss": 3.1461, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.8957528957528957, |
|
"grad_norm": 11.247983932495117, |
|
"learning_rate": 2.432045779685265e-06, |
|
"loss": 3.4405, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.918918918918919, |
|
"grad_norm": 8.756580352783203, |
|
"learning_rate": 2.0028612303290415e-06, |
|
"loss": 3.6948, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.942084942084942, |
|
"grad_norm": 10.008057594299316, |
|
"learning_rate": 1.5736766809728184e-06, |
|
"loss": 3.2626, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.965250965250965, |
|
"grad_norm": 5.3783650398254395, |
|
"learning_rate": 1.1444921316165953e-06, |
|
"loss": 2.7083, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.988416988416988, |
|
"grad_norm": 8.535452842712402, |
|
"learning_rate": 7.15307582260372e-07, |
|
"loss": 3.0117, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 6.0615, |
|
"eval_loss": 3.1822056770324707, |
|
"eval_rouge1": 9.5428, |
|
"eval_rouge2": 5.4799, |
|
"eval_rougeL": 9.1926, |
|
"eval_rougeLsum": 9.3965, |
|
"eval_runtime": 4.675, |
|
"eval_samples_per_second": 13.904, |
|
"eval_steps_per_second": 3.636, |
|
"step": 777 |
|
} |
|
], |
|
"logging_steps": 6, |
|
"max_steps": 777, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 18382916321280.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|