|
{ |
|
"best_metric": 28.696460806301626, |
|
"best_model_checkpoint": "./whisper-tiny-hi/checkpoint-45000", |
|
"epoch": 14.46952886316128, |
|
"eval_steps": 5000, |
|
"global_step": 45000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06431902235086026, |
|
"grad_norm": 4.596249103546143, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.7191, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12863804470172052, |
|
"grad_norm": 3.4318807125091553, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9245, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1929570670525808, |
|
"grad_norm": 3.58982253074646, |
|
"learning_rate": 6e-06, |
|
"loss": 0.6853, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.25727608940344104, |
|
"grad_norm": 3.402555227279663, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.5814, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.32159511175430133, |
|
"grad_norm": 3.178177833557129, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5202, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3859141341051616, |
|
"grad_norm": 3.04317569732666, |
|
"learning_rate": 9.96730957829356e-06, |
|
"loss": 0.4719, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.45023315645602185, |
|
"grad_norm": 3.090500831604004, |
|
"learning_rate": 9.93461915658712e-06, |
|
"loss": 0.4444, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5145521788068821, |
|
"grad_norm": 3.2675209045410156, |
|
"learning_rate": 9.90192873488068e-06, |
|
"loss": 0.4275, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5788712011577424, |
|
"grad_norm": 2.859715223312378, |
|
"learning_rate": 9.869238313174241e-06, |
|
"loss": 0.4076, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6431902235086027, |
|
"grad_norm": 2.6303207874298096, |
|
"learning_rate": 9.836547891467801e-06, |
|
"loss": 0.3917, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.707509245859463, |
|
"grad_norm": 3.0497164726257324, |
|
"learning_rate": 9.804020921869893e-06, |
|
"loss": 0.381, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7718282682103232, |
|
"grad_norm": 2.6169981956481934, |
|
"learning_rate": 9.771330500163453e-06, |
|
"loss": 0.3719, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8361472905611834, |
|
"grad_norm": 2.708853006362915, |
|
"learning_rate": 9.738640078457014e-06, |
|
"loss": 0.3635, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9004663129120437, |
|
"grad_norm": 2.8617489337921143, |
|
"learning_rate": 9.705949656750572e-06, |
|
"loss": 0.354, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.964785335262904, |
|
"grad_norm": 2.758882999420166, |
|
"learning_rate": 9.673259235044132e-06, |
|
"loss": 0.3474, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0289435600578871, |
|
"grad_norm": 2.680941343307495, |
|
"learning_rate": 9.640568813337693e-06, |
|
"loss": 0.3395, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.0932625824087474, |
|
"grad_norm": 2.918921709060669, |
|
"learning_rate": 9.607878391631253e-06, |
|
"loss": 0.3299, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.1575816047596077, |
|
"grad_norm": 2.6285266876220703, |
|
"learning_rate": 9.575187969924813e-06, |
|
"loss": 0.3231, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.221900627110468, |
|
"grad_norm": 2.9504473209381104, |
|
"learning_rate": 9.542497548218372e-06, |
|
"loss": 0.3217, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.2862196494613283, |
|
"grad_norm": 2.8353495597839355, |
|
"learning_rate": 9.509807126511932e-06, |
|
"loss": 0.32, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3505386718121883, |
|
"grad_norm": 2.4936184883117676, |
|
"learning_rate": 9.477280156914024e-06, |
|
"loss": 0.3141, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.4148576941630489, |
|
"grad_norm": 2.6884140968322754, |
|
"learning_rate": 9.444589735207586e-06, |
|
"loss": 0.3086, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.479176716513909, |
|
"grad_norm": 2.8961310386657715, |
|
"learning_rate": 9.411899313501145e-06, |
|
"loss": 0.308, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.5434957388647692, |
|
"grad_norm": 2.5737783908843994, |
|
"learning_rate": 9.379208891794705e-06, |
|
"loss": 0.3019, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.6078147612156295, |
|
"grad_norm": 2.5709033012390137, |
|
"learning_rate": 9.346518470088265e-06, |
|
"loss": 0.2974, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6078147612156295, |
|
"eval_loss": 0.2771373987197876, |
|
"eval_runtime": 5695.1238, |
|
"eval_samples_per_second": 5.115, |
|
"eval_steps_per_second": 0.639, |
|
"eval_wer": 36.94923502637938, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6721337835664898, |
|
"grad_norm": 2.3421154022216797, |
|
"learning_rate": 9.313828048381824e-06, |
|
"loss": 0.2959, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.73645280591735, |
|
"grad_norm": 2.662564277648926, |
|
"learning_rate": 9.281137626675386e-06, |
|
"loss": 0.2941, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.8007718282682104, |
|
"grad_norm": 2.4682416915893555, |
|
"learning_rate": 9.248447204968944e-06, |
|
"loss": 0.2932, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.8650908506190707, |
|
"grad_norm": 2.6210873126983643, |
|
"learning_rate": 9.215756783262505e-06, |
|
"loss": 0.2895, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.9294098729699307, |
|
"grad_norm": 2.6296238899230957, |
|
"learning_rate": 9.183066361556065e-06, |
|
"loss": 0.286, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.9937288953207912, |
|
"grad_norm": 2.486929178237915, |
|
"learning_rate": 9.150539391958157e-06, |
|
"loss": 0.2846, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.0578871201157742, |
|
"grad_norm": 2.202758550643921, |
|
"learning_rate": 9.117848970251717e-06, |
|
"loss": 0.2754, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.1222061424666343, |
|
"grad_norm": 2.4207451343536377, |
|
"learning_rate": 9.085158548545276e-06, |
|
"loss": 0.2745, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.186525164817495, |
|
"grad_norm": 2.1694822311401367, |
|
"learning_rate": 9.052468126838838e-06, |
|
"loss": 0.2732, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.250844187168355, |
|
"grad_norm": 2.7656798362731934, |
|
"learning_rate": 9.019777705132396e-06, |
|
"loss": 0.2709, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.3151632095192154, |
|
"grad_norm": 2.2421224117279053, |
|
"learning_rate": 8.987087283425957e-06, |
|
"loss": 0.2691, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.3794822318700755, |
|
"grad_norm": 2.469956874847412, |
|
"learning_rate": 8.954396861719517e-06, |
|
"loss": 0.268, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.443801254220936, |
|
"grad_norm": 2.4373421669006348, |
|
"learning_rate": 8.921706440013077e-06, |
|
"loss": 0.2656, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.508120276571796, |
|
"grad_norm": 2.5110926628112793, |
|
"learning_rate": 8.889016018306636e-06, |
|
"loss": 0.2668, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.5724392989226565, |
|
"grad_norm": 2.559202194213867, |
|
"learning_rate": 8.856325596600198e-06, |
|
"loss": 0.2623, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.6367583212735166, |
|
"grad_norm": 2.6701242923736572, |
|
"learning_rate": 8.82379862700229e-06, |
|
"loss": 0.2642, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.7010773436243767, |
|
"grad_norm": 2.3247900009155273, |
|
"learning_rate": 8.791108205295848e-06, |
|
"loss": 0.2621, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.765396365975237, |
|
"grad_norm": 2.2806427478790283, |
|
"learning_rate": 8.75841778358941e-06, |
|
"loss": 0.2601, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.8297153883260977, |
|
"grad_norm": 2.487159013748169, |
|
"learning_rate": 8.725727361882969e-06, |
|
"loss": 0.2596, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.8940344106769578, |
|
"grad_norm": 2.2094194889068604, |
|
"learning_rate": 8.693036940176529e-06, |
|
"loss": 0.2587, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.958353433027818, |
|
"grad_norm": 2.349698781967163, |
|
"learning_rate": 8.660346518470088e-06, |
|
"loss": 0.2569, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.0225116578228013, |
|
"grad_norm": 2.1951253414154053, |
|
"learning_rate": 8.62765609676365e-06, |
|
"loss": 0.2507, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.0868306801736614, |
|
"grad_norm": 2.2560884952545166, |
|
"learning_rate": 8.594965675057208e-06, |
|
"loss": 0.2469, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.1511497025245214, |
|
"grad_norm": 2.3260977268218994, |
|
"learning_rate": 8.562275253350769e-06, |
|
"loss": 0.2457, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.215468724875382, |
|
"grad_norm": 2.318385601043701, |
|
"learning_rate": 8.529584831644329e-06, |
|
"loss": 0.2463, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.215468724875382, |
|
"eval_loss": 0.23996804654598236, |
|
"eval_runtime": 5706.3154, |
|
"eval_samples_per_second": 5.105, |
|
"eval_steps_per_second": 0.638, |
|
"eval_wer": 33.16859977100777, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.279787747226242, |
|
"grad_norm": 2.278331756591797, |
|
"learning_rate": 8.496894409937889e-06, |
|
"loss": 0.2471, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.3441067695771025, |
|
"grad_norm": 2.1796886920928955, |
|
"learning_rate": 8.46436744033998e-06, |
|
"loss": 0.244, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.4084257919279626, |
|
"grad_norm": 2.3623690605163574, |
|
"learning_rate": 8.43167701863354e-06, |
|
"loss": 0.2453, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.472744814278823, |
|
"grad_norm": 2.242501974105835, |
|
"learning_rate": 8.398986596927102e-06, |
|
"loss": 0.2436, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.537063836629683, |
|
"grad_norm": 2.4463303089141846, |
|
"learning_rate": 8.366296175220661e-06, |
|
"loss": 0.244, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.6013828589805437, |
|
"grad_norm": 2.387897253036499, |
|
"learning_rate": 8.333605753514221e-06, |
|
"loss": 0.2436, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.6657018813314037, |
|
"grad_norm": 2.386038303375244, |
|
"learning_rate": 8.30091533180778e-06, |
|
"loss": 0.2431, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 3.730020903682264, |
|
"grad_norm": 2.2631173133850098, |
|
"learning_rate": 8.26822491010134e-06, |
|
"loss": 0.2399, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.7943399260331243, |
|
"grad_norm": 2.458087921142578, |
|
"learning_rate": 8.2355344883949e-06, |
|
"loss": 0.2409, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.858658948383985, |
|
"grad_norm": 2.400782585144043, |
|
"learning_rate": 8.202844066688462e-06, |
|
"loss": 0.2396, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.922977970734845, |
|
"grad_norm": 2.1437551975250244, |
|
"learning_rate": 8.170153644982021e-06, |
|
"loss": 0.2375, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.987296993085705, |
|
"grad_norm": 2.8162059783935547, |
|
"learning_rate": 8.137626675384113e-06, |
|
"loss": 0.2385, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 4.051455217880688, |
|
"grad_norm": 2.505800485610962, |
|
"learning_rate": 8.104936253677673e-06, |
|
"loss": 0.2317, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 4.1157742402315485, |
|
"grad_norm": 2.2435059547424316, |
|
"learning_rate": 8.072245831971233e-06, |
|
"loss": 0.2301, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 4.1800932625824085, |
|
"grad_norm": 2.208909034729004, |
|
"learning_rate": 8.039555410264793e-06, |
|
"loss": 0.2305, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.244412284933269, |
|
"grad_norm": 2.0333073139190674, |
|
"learning_rate": 8.006864988558352e-06, |
|
"loss": 0.2283, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 4.30873130728413, |
|
"grad_norm": 2.437255620956421, |
|
"learning_rate": 7.974174566851914e-06, |
|
"loss": 0.2285, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 4.37305032963499, |
|
"grad_norm": 2.1313934326171875, |
|
"learning_rate": 7.941484145145473e-06, |
|
"loss": 0.2268, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 4.43736935198585, |
|
"grad_norm": 2.3695502281188965, |
|
"learning_rate": 7.908793723439033e-06, |
|
"loss": 0.2281, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 4.50168837433671, |
|
"grad_norm": 2.3714799880981445, |
|
"learning_rate": 7.876103301732593e-06, |
|
"loss": 0.2285, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.566007396687571, |
|
"grad_norm": 2.148329019546509, |
|
"learning_rate": 7.843412880026154e-06, |
|
"loss": 0.2285, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 4.630326419038431, |
|
"grad_norm": 2.6452717781066895, |
|
"learning_rate": 7.810885910428245e-06, |
|
"loss": 0.2286, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 4.694645441389291, |
|
"grad_norm": 2.203331470489502, |
|
"learning_rate": 7.778195488721804e-06, |
|
"loss": 0.2262, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 4.758964463740151, |
|
"grad_norm": 2.239518642425537, |
|
"learning_rate": 7.745505067015366e-06, |
|
"loss": 0.2255, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 4.823283486091011, |
|
"grad_norm": 2.2322895526885986, |
|
"learning_rate": 7.712814645308925e-06, |
|
"loss": 0.2284, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.823283486091011, |
|
"eval_loss": 0.22340841591358185, |
|
"eval_runtime": 6283.1507, |
|
"eval_samples_per_second": 4.636, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 31.49793737232363, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.887602508441872, |
|
"grad_norm": 2.171865224838257, |
|
"learning_rate": 7.680124223602485e-06, |
|
"loss": 0.2286, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 4.951921530792732, |
|
"grad_norm": 2.2217211723327637, |
|
"learning_rate": 7.647433801896045e-06, |
|
"loss": 0.2237, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 5.0160797555877155, |
|
"grad_norm": 2.2957966327667236, |
|
"learning_rate": 7.614743380189605e-06, |
|
"loss": 0.2247, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 5.0803987779385755, |
|
"grad_norm": 2.2841222286224365, |
|
"learning_rate": 7.582052958483165e-06, |
|
"loss": 0.2184, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 5.144717800289436, |
|
"grad_norm": 2.9461894035339355, |
|
"learning_rate": 7.5493625367767255e-06, |
|
"loss": 0.2176, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.209036822640296, |
|
"grad_norm": 2.2747585773468018, |
|
"learning_rate": 7.516672115070285e-06, |
|
"loss": 0.2171, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 5.273355844991156, |
|
"grad_norm": 2.220026969909668, |
|
"learning_rate": 7.4841451454723765e-06, |
|
"loss": 0.217, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 5.337674867342017, |
|
"grad_norm": 2.335299253463745, |
|
"learning_rate": 7.451454723765937e-06, |
|
"loss": 0.218, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 5.401993889692877, |
|
"grad_norm": 2.2069528102874756, |
|
"learning_rate": 7.418764302059497e-06, |
|
"loss": 0.2172, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 5.466312912043737, |
|
"grad_norm": 2.500458002090454, |
|
"learning_rate": 7.386073880353057e-06, |
|
"loss": 0.2174, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.530631934394597, |
|
"grad_norm": 2.2738373279571533, |
|
"learning_rate": 7.353383458646617e-06, |
|
"loss": 0.2186, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 5.594950956745458, |
|
"grad_norm": 2.0394363403320312, |
|
"learning_rate": 7.3206930369401776e-06, |
|
"loss": 0.2168, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 5.659269979096318, |
|
"grad_norm": 2.0548949241638184, |
|
"learning_rate": 7.288002615233737e-06, |
|
"loss": 0.2167, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 5.723589001447178, |
|
"grad_norm": 2.0610129833221436, |
|
"learning_rate": 7.255312193527298e-06, |
|
"loss": 0.2154, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 5.787908023798038, |
|
"grad_norm": 2.0888965129852295, |
|
"learning_rate": 7.222621771820857e-06, |
|
"loss": 0.2157, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.852227046148899, |
|
"grad_norm": 2.3282668590545654, |
|
"learning_rate": 7.189931350114417e-06, |
|
"loss": 0.2162, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 5.916546068499759, |
|
"grad_norm": 2.2571957111358643, |
|
"learning_rate": 7.157404380516509e-06, |
|
"loss": 0.2174, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 5.980865090850619, |
|
"grad_norm": 2.201342821121216, |
|
"learning_rate": 7.124713958810069e-06, |
|
"loss": 0.2137, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 6.045023315645603, |
|
"grad_norm": 2.3466155529022217, |
|
"learning_rate": 7.09202353710363e-06, |
|
"loss": 0.2098, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 6.109342337996463, |
|
"grad_norm": 2.0573270320892334, |
|
"learning_rate": 7.059333115397189e-06, |
|
"loss": 0.2089, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.173661360347323, |
|
"grad_norm": 2.026381492614746, |
|
"learning_rate": 7.02664269369075e-06, |
|
"loss": 0.2085, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 6.237980382698183, |
|
"grad_norm": 2.2646751403808594, |
|
"learning_rate": 6.993952271984309e-06, |
|
"loss": 0.2074, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 6.302299405049043, |
|
"grad_norm": 2.0940823554992676, |
|
"learning_rate": 6.961261850277869e-06, |
|
"loss": 0.2086, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 6.366618427399904, |
|
"grad_norm": 2.2303521633148193, |
|
"learning_rate": 6.928571428571429e-06, |
|
"loss": 0.2096, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 6.430937449750764, |
|
"grad_norm": 2.2108261585235596, |
|
"learning_rate": 6.8958810068649895e-06, |
|
"loss": 0.2089, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.430937449750764, |
|
"eval_loss": 0.2148449569940567, |
|
"eval_runtime": 6212.1239, |
|
"eval_samples_per_second": 4.689, |
|
"eval_steps_per_second": 0.586, |
|
"eval_wer": 30.543488717322866, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.495256472101624, |
|
"grad_norm": 2.3403568267822266, |
|
"learning_rate": 6.863190585158549e-06, |
|
"loss": 0.2096, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 6.559575494452484, |
|
"grad_norm": 2.071211576461792, |
|
"learning_rate": 6.830663615560641e-06, |
|
"loss": 0.2076, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 6.623894516803345, |
|
"grad_norm": 2.031785488128662, |
|
"learning_rate": 6.797973193854202e-06, |
|
"loss": 0.2075, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 6.688213539154205, |
|
"grad_norm": 2.1697540283203125, |
|
"learning_rate": 6.765282772147761e-06, |
|
"loss": 0.2086, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 6.752532561505065, |
|
"grad_norm": 2.1642816066741943, |
|
"learning_rate": 6.732592350441321e-06, |
|
"loss": 0.2094, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.816851583855925, |
|
"grad_norm": 2.093594789505005, |
|
"learning_rate": 6.699901928734881e-06, |
|
"loss": 0.2065, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 6.881170606206785, |
|
"grad_norm": 2.0487377643585205, |
|
"learning_rate": 6.6672115070284415e-06, |
|
"loss": 0.2074, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 6.945489628557646, |
|
"grad_norm": 2.0511105060577393, |
|
"learning_rate": 6.634521085322001e-06, |
|
"loss": 0.2072, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 7.009647853352629, |
|
"grad_norm": 2.1460208892822266, |
|
"learning_rate": 6.601830663615562e-06, |
|
"loss": 0.204, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 7.07396687570349, |
|
"grad_norm": 2.093196392059326, |
|
"learning_rate": 6.569140241909121e-06, |
|
"loss": 0.2013, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 7.13828589805435, |
|
"grad_norm": 2.342313766479492, |
|
"learning_rate": 6.536449820202682e-06, |
|
"loss": 0.1994, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 7.20260492040521, |
|
"grad_norm": 2.05419921875, |
|
"learning_rate": 6.503759398496241e-06, |
|
"loss": 0.2023, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 7.26692394275607, |
|
"grad_norm": 2.248352289199829, |
|
"learning_rate": 6.471232428898333e-06, |
|
"loss": 0.2024, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 7.33124296510693, |
|
"grad_norm": 2.308692216873169, |
|
"learning_rate": 6.4385420071918935e-06, |
|
"loss": 0.2007, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 7.395561987457791, |
|
"grad_norm": 2.0985047817230225, |
|
"learning_rate": 6.405851585485453e-06, |
|
"loss": 0.1989, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.459881009808651, |
|
"grad_norm": 2.1786727905273438, |
|
"learning_rate": 6.373161163779014e-06, |
|
"loss": 0.2009, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 7.524200032159511, |
|
"grad_norm": 2.358017921447754, |
|
"learning_rate": 6.3404707420725734e-06, |
|
"loss": 0.2011, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 7.588519054510371, |
|
"grad_norm": 2.221282482147217, |
|
"learning_rate": 6.307780320366134e-06, |
|
"loss": 0.2, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 7.652838076861232, |
|
"grad_norm": 2.1816442012786865, |
|
"learning_rate": 6.275089898659693e-06, |
|
"loss": 0.1995, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 7.717157099212092, |
|
"grad_norm": 2.1074419021606445, |
|
"learning_rate": 6.242399476953253e-06, |
|
"loss": 0.2008, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.781476121562952, |
|
"grad_norm": 2.12418532371521, |
|
"learning_rate": 6.209709055246813e-06, |
|
"loss": 0.2029, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 7.845795143913812, |
|
"grad_norm": 2.0285849571228027, |
|
"learning_rate": 6.177018633540374e-06, |
|
"loss": 0.2005, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 7.910114166264673, |
|
"grad_norm": 2.1275014877319336, |
|
"learning_rate": 6.144491663942466e-06, |
|
"loss": 0.2014, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 7.974433188615533, |
|
"grad_norm": 2.047292709350586, |
|
"learning_rate": 6.1118012422360254e-06, |
|
"loss": 0.1999, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 8.038591413410517, |
|
"grad_norm": 2.2886574268341064, |
|
"learning_rate": 6.079110820529586e-06, |
|
"loss": 0.197, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.038591413410517, |
|
"eval_loss": 0.20965221524238586, |
|
"eval_runtime": 6307.1939, |
|
"eval_samples_per_second": 4.619, |
|
"eval_steps_per_second": 0.577, |
|
"eval_wer": 29.90140507668599, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.102910435761377, |
|
"grad_norm": 2.158328056335449, |
|
"learning_rate": 6.046420398823145e-06, |
|
"loss": 0.1942, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 8.167229458112237, |
|
"grad_norm": 2.0762622356414795, |
|
"learning_rate": 6.013729977116705e-06, |
|
"loss": 0.1941, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 8.231548480463097, |
|
"grad_norm": 2.2431387901306152, |
|
"learning_rate": 5.981039555410265e-06, |
|
"loss": 0.1951, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 8.295867502813957, |
|
"grad_norm": 1.9715090990066528, |
|
"learning_rate": 5.948349133703826e-06, |
|
"loss": 0.1946, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 8.360186525164817, |
|
"grad_norm": 2.1715543270111084, |
|
"learning_rate": 5.915658711997385e-06, |
|
"loss": 0.1949, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.424505547515677, |
|
"grad_norm": 2.1786696910858154, |
|
"learning_rate": 5.882968290290946e-06, |
|
"loss": 0.1954, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 8.488824569866537, |
|
"grad_norm": 2.2478559017181396, |
|
"learning_rate": 5.8502778685845056e-06, |
|
"loss": 0.1943, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 8.553143592217399, |
|
"grad_norm": 2.0193216800689697, |
|
"learning_rate": 5.817750898986597e-06, |
|
"loss": 0.1931, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 8.61746261456826, |
|
"grad_norm": 2.024120330810547, |
|
"learning_rate": 5.785060477280157e-06, |
|
"loss": 0.1944, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 8.68178163691912, |
|
"grad_norm": 1.9868948459625244, |
|
"learning_rate": 5.752370055573717e-06, |
|
"loss": 0.1967, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.74610065926998, |
|
"grad_norm": 2.0132243633270264, |
|
"learning_rate": 5.719679633867278e-06, |
|
"loss": 0.194, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 8.81041968162084, |
|
"grad_norm": 2.1960537433624268, |
|
"learning_rate": 5.686989212160837e-06, |
|
"loss": 0.1945, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 8.8747387039717, |
|
"grad_norm": 2.118748903274536, |
|
"learning_rate": 5.654298790454398e-06, |
|
"loss": 0.1943, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 8.93905772632256, |
|
"grad_norm": 2.0620296001434326, |
|
"learning_rate": 5.621608368747958e-06, |
|
"loss": 0.1948, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 9.003215951117543, |
|
"grad_norm": 2.0099258422851562, |
|
"learning_rate": 5.588917947041518e-06, |
|
"loss": 0.1953, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 9.067534973468403, |
|
"grad_norm": 2.1706490516662598, |
|
"learning_rate": 5.556227525335077e-06, |
|
"loss": 0.187, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 9.131853995819263, |
|
"grad_norm": 1.989235758781433, |
|
"learning_rate": 5.523537103628637e-06, |
|
"loss": 0.1896, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 9.196173018170123, |
|
"grad_norm": 2.4199020862579346, |
|
"learning_rate": 5.49101013403073e-06, |
|
"loss": 0.1889, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 9.260492040520983, |
|
"grad_norm": 2.318398952484131, |
|
"learning_rate": 5.458319712324289e-06, |
|
"loss": 0.1899, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 9.324811062871845, |
|
"grad_norm": 2.014559745788574, |
|
"learning_rate": 5.42562929061785e-06, |
|
"loss": 0.1899, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.389130085222705, |
|
"grad_norm": 2.0770111083984375, |
|
"learning_rate": 5.39293886891141e-06, |
|
"loss": 0.1904, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 9.453449107573565, |
|
"grad_norm": 2.1191565990448, |
|
"learning_rate": 5.36024844720497e-06, |
|
"loss": 0.1903, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 9.517768129924425, |
|
"grad_norm": 1.8838344812393188, |
|
"learning_rate": 5.327558025498529e-06, |
|
"loss": 0.191, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 9.582087152275285, |
|
"grad_norm": 2.064694404602051, |
|
"learning_rate": 5.2948676037920895e-06, |
|
"loss": 0.189, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 9.646406174626145, |
|
"grad_norm": 2.2259907722473145, |
|
"learning_rate": 5.262177182085649e-06, |
|
"loss": 0.1896, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.646406174626145, |
|
"eval_loss": 0.20603837072849274, |
|
"eval_runtime": 6469.2735, |
|
"eval_samples_per_second": 4.503, |
|
"eval_steps_per_second": 0.563, |
|
"eval_wer": 29.357595694795236, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.710725196977005, |
|
"grad_norm": 2.174774169921875, |
|
"learning_rate": 5.229486760379209e-06, |
|
"loss": 0.1898, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 9.775044219327865, |
|
"grad_norm": 2.170811414718628, |
|
"learning_rate": 5.1967963386727695e-06, |
|
"loss": 0.1885, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 9.839363241678726, |
|
"grad_norm": 2.454228401184082, |
|
"learning_rate": 5.164105916966329e-06, |
|
"loss": 0.1889, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 9.903682264029587, |
|
"grad_norm": 2.278226613998413, |
|
"learning_rate": 5.131578947368422e-06, |
|
"loss": 0.1891, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 9.968001286380447, |
|
"grad_norm": 2.151634931564331, |
|
"learning_rate": 5.098888525661981e-06, |
|
"loss": 0.1881, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 10.032159511175431, |
|
"grad_norm": 2.1410274505615234, |
|
"learning_rate": 5.0661981039555416e-06, |
|
"loss": 0.1861, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 10.096478533526291, |
|
"grad_norm": 2.1889536380767822, |
|
"learning_rate": 5.033507682249101e-06, |
|
"loss": 0.184, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 10.160797555877151, |
|
"grad_norm": 2.2102479934692383, |
|
"learning_rate": 5.000817260542662e-06, |
|
"loss": 0.1849, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 10.225116578228011, |
|
"grad_norm": 2.0018393993377686, |
|
"learning_rate": 4.9681268388362215e-06, |
|
"loss": 0.1833, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 10.289435600578871, |
|
"grad_norm": 2.114179849624634, |
|
"learning_rate": 4.935436417129781e-06, |
|
"loss": 0.1831, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 10.353754622929731, |
|
"grad_norm": 1.9474581480026245, |
|
"learning_rate": 4.902745995423342e-06, |
|
"loss": 0.1842, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 10.418073645280591, |
|
"grad_norm": 2.0356032848358154, |
|
"learning_rate": 4.8700555737169014e-06, |
|
"loss": 0.1852, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 10.482392667631451, |
|
"grad_norm": 2.0535342693328857, |
|
"learning_rate": 4.837365152010461e-06, |
|
"loss": 0.1866, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 10.546711689982311, |
|
"grad_norm": 2.2602412700653076, |
|
"learning_rate": 4.804838182412553e-06, |
|
"loss": 0.185, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 10.611030712333173, |
|
"grad_norm": 2.098829507827759, |
|
"learning_rate": 4.772147760706114e-06, |
|
"loss": 0.1844, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 10.675349734684033, |
|
"grad_norm": 2.0167388916015625, |
|
"learning_rate": 4.7394573389996735e-06, |
|
"loss": 0.1871, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 10.739668757034893, |
|
"grad_norm": 2.245697259902954, |
|
"learning_rate": 4.706766917293233e-06, |
|
"loss": 0.1866, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 10.803987779385753, |
|
"grad_norm": 1.9759703874588013, |
|
"learning_rate": 4.674076495586794e-06, |
|
"loss": 0.1849, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 10.868306801736614, |
|
"grad_norm": 2.1898162364959717, |
|
"learning_rate": 4.6413860738803535e-06, |
|
"loss": 0.1856, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 10.932625824087474, |
|
"grad_norm": 2.0218801498413086, |
|
"learning_rate": 4.608695652173913e-06, |
|
"loss": 0.1847, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 10.996944846438334, |
|
"grad_norm": 1.9428986310958862, |
|
"learning_rate": 4.576005230467474e-06, |
|
"loss": 0.1863, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 11.061103071233317, |
|
"grad_norm": 1.9156979322433472, |
|
"learning_rate": 4.543314808761033e-06, |
|
"loss": 0.1788, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 11.125422093584177, |
|
"grad_norm": 2.0453121662139893, |
|
"learning_rate": 4.510624387054593e-06, |
|
"loss": 0.1817, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 11.189741115935037, |
|
"grad_norm": 1.9229934215545654, |
|
"learning_rate": 4.477933965348154e-06, |
|
"loss": 0.1808, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 11.254060138285897, |
|
"grad_norm": 2.319345235824585, |
|
"learning_rate": 4.445406995750246e-06, |
|
"loss": 0.1793, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 11.254060138285897, |
|
"eval_loss": 0.20413178205490112, |
|
"eval_runtime": 6186.7151, |
|
"eval_samples_per_second": 4.708, |
|
"eval_steps_per_second": 0.589, |
|
"eval_wer": 29.14897509945317, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 11.31837916063676, |
|
"grad_norm": 2.07478404045105, |
|
"learning_rate": 4.4127165740438055e-06, |
|
"loss": 0.1798, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 11.38269818298762, |
|
"grad_norm": 2.0194761753082275, |
|
"learning_rate": 4.380026152337365e-06, |
|
"loss": 0.1792, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 11.44701720533848, |
|
"grad_norm": 1.903509497642517, |
|
"learning_rate": 4.347335730630926e-06, |
|
"loss": 0.1796, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 11.51133622768934, |
|
"grad_norm": 2.0315921306610107, |
|
"learning_rate": 4.314645308924485e-06, |
|
"loss": 0.1819, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 11.5756552500402, |
|
"grad_norm": 2.0309042930603027, |
|
"learning_rate": 4.281954887218046e-06, |
|
"loss": 0.1805, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 11.63997427239106, |
|
"grad_norm": 2.0719332695007324, |
|
"learning_rate": 4.249264465511606e-06, |
|
"loss": 0.1807, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 11.70429329474192, |
|
"grad_norm": 2.17317271232605, |
|
"learning_rate": 4.216574043805165e-06, |
|
"loss": 0.1832, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 11.76861231709278, |
|
"grad_norm": 2.0271966457366943, |
|
"learning_rate": 4.183883622098726e-06, |
|
"loss": 0.1804, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 11.83293133944364, |
|
"grad_norm": 2.235299825668335, |
|
"learning_rate": 4.151193200392286e-06, |
|
"loss": 0.1826, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 11.897250361794502, |
|
"grad_norm": 1.8323442935943604, |
|
"learning_rate": 4.118666230794378e-06, |
|
"loss": 0.1822, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 11.961569384145362, |
|
"grad_norm": 2.1653242111206055, |
|
"learning_rate": 4.0859758090879374e-06, |
|
"loss": 0.1829, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 12.025727608940343, |
|
"grad_norm": 2.1361212730407715, |
|
"learning_rate": 4.053285387381498e-06, |
|
"loss": 0.1792, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 12.090046631291205, |
|
"grad_norm": 2.1540911197662354, |
|
"learning_rate": 4.020594965675058e-06, |
|
"loss": 0.1779, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 12.154365653642065, |
|
"grad_norm": 2.157705783843994, |
|
"learning_rate": 3.987904543968617e-06, |
|
"loss": 0.1754, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 12.218684675992925, |
|
"grad_norm": 2.100783586502075, |
|
"learning_rate": 3.955214122262178e-06, |
|
"loss": 0.1761, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 12.283003698343785, |
|
"grad_norm": 1.9860684871673584, |
|
"learning_rate": 3.922523700555738e-06, |
|
"loss": 0.178, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 12.347322720694645, |
|
"grad_norm": 2.138315200805664, |
|
"learning_rate": 3.889833278849297e-06, |
|
"loss": 0.1769, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 12.411641743045505, |
|
"grad_norm": 1.8979172706604004, |
|
"learning_rate": 3.857142857142858e-06, |
|
"loss": 0.1761, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 12.475960765396366, |
|
"grad_norm": 2.144052505493164, |
|
"learning_rate": 3.8244524354364175e-06, |
|
"loss": 0.1772, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 12.540279787747226, |
|
"grad_norm": 2.2078232765197754, |
|
"learning_rate": 3.7919254658385097e-06, |
|
"loss": 0.1784, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 12.604598810098086, |
|
"grad_norm": 2.0689291954040527, |
|
"learning_rate": 3.75923504413207e-06, |
|
"loss": 0.179, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 12.668917832448948, |
|
"grad_norm": 2.1173605918884277, |
|
"learning_rate": 3.7265446224256295e-06, |
|
"loss": 0.1761, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 12.733236854799808, |
|
"grad_norm": 2.0470852851867676, |
|
"learning_rate": 3.6938542007191896e-06, |
|
"loss": 0.1791, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 12.797555877150668, |
|
"grad_norm": 2.237996816635132, |
|
"learning_rate": 3.6611637790127497e-06, |
|
"loss": 0.1791, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 12.861874899501528, |
|
"grad_norm": 2.107485055923462, |
|
"learning_rate": 3.62847335730631e-06, |
|
"loss": 0.1776, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 12.861874899501528, |
|
"eval_loss": 0.20320001244544983, |
|
"eval_runtime": 5730.0624, |
|
"eval_samples_per_second": 5.084, |
|
"eval_steps_per_second": 0.636, |
|
"eval_wer": 29.061075282609227, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 12.926193921852388, |
|
"grad_norm": 2.1215012073516846, |
|
"learning_rate": 3.5957829355998696e-06, |
|
"loss": 0.1792, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 12.990512944203248, |
|
"grad_norm": 2.1944947242736816, |
|
"learning_rate": 3.5630925138934297e-06, |
|
"loss": 0.177, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 13.054671168998231, |
|
"grad_norm": 2.1703405380249023, |
|
"learning_rate": 3.53040209218699e-06, |
|
"loss": 0.1754, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 13.118990191349091, |
|
"grad_norm": 2.224743366241455, |
|
"learning_rate": 3.49771167048055e-06, |
|
"loss": 0.1741, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 13.183309213699951, |
|
"grad_norm": 2.370253324508667, |
|
"learning_rate": 3.4650212487741096e-06, |
|
"loss": 0.1727, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 13.247628236050812, |
|
"grad_norm": 1.8502309322357178, |
|
"learning_rate": 3.4324942791762018e-06, |
|
"loss": 0.1732, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 13.311947258401672, |
|
"grad_norm": 2.1553802490234375, |
|
"learning_rate": 3.399803857469762e-06, |
|
"loss": 0.1731, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 13.376266280752532, |
|
"grad_norm": 2.113837242126465, |
|
"learning_rate": 3.3671134357633216e-06, |
|
"loss": 0.1743, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 13.440585303103393, |
|
"grad_norm": 1.9942282438278198, |
|
"learning_rate": 3.3344230140568817e-06, |
|
"loss": 0.1742, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 13.504904325454254, |
|
"grad_norm": 2.089869976043701, |
|
"learning_rate": 3.301732592350442e-06, |
|
"loss": 0.1754, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 13.569223347805114, |
|
"grad_norm": 1.8944735527038574, |
|
"learning_rate": 3.269042170644002e-06, |
|
"loss": 0.1742, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 13.633542370155974, |
|
"grad_norm": 2.06595516204834, |
|
"learning_rate": 3.2363517489375616e-06, |
|
"loss": 0.1736, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 13.697861392506834, |
|
"grad_norm": 2.0173680782318115, |
|
"learning_rate": 3.2036613272311218e-06, |
|
"loss": 0.1738, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 13.762180414857694, |
|
"grad_norm": 2.098628282546997, |
|
"learning_rate": 3.170970905524682e-06, |
|
"loss": 0.1735, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 13.826499437208554, |
|
"grad_norm": 2.226824998855591, |
|
"learning_rate": 3.138280483818241e-06, |
|
"loss": 0.1757, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 13.890818459559414, |
|
"grad_norm": 2.020033121109009, |
|
"learning_rate": 3.1057535142203337e-06, |
|
"loss": 0.1755, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 13.955137481910274, |
|
"grad_norm": 2.29543399810791, |
|
"learning_rate": 3.073063092513894e-06, |
|
"loss": 0.1762, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 14.019295706705257, |
|
"grad_norm": 1.9776209592819214, |
|
"learning_rate": 3.040372670807454e-06, |
|
"loss": 0.1735, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 14.083614729056118, |
|
"grad_norm": 1.7997843027114868, |
|
"learning_rate": 3.0076822491010137e-06, |
|
"loss": 0.1704, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 14.14793375140698, |
|
"grad_norm": 1.968531847000122, |
|
"learning_rate": 2.9749918273945738e-06, |
|
"loss": 0.1717, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 14.21225277375784, |
|
"grad_norm": 2.061990737915039, |
|
"learning_rate": 2.942301405688134e-06, |
|
"loss": 0.1726, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 14.2765717961087, |
|
"grad_norm": 2.0133299827575684, |
|
"learning_rate": 2.909610983981694e-06, |
|
"loss": 0.1714, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 14.34089081845956, |
|
"grad_norm": 2.0535309314727783, |
|
"learning_rate": 2.8769205622752537e-06, |
|
"loss": 0.1717, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 14.40520984081042, |
|
"grad_norm": 2.1071929931640625, |
|
"learning_rate": 2.8442301405688134e-06, |
|
"loss": 0.1722, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 14.46952886316128, |
|
"grad_norm": 2.0523617267608643, |
|
"learning_rate": 2.811539718862373e-06, |
|
"loss": 0.17, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 14.46952886316128, |
|
"eval_loss": 0.20234042406082153, |
|
"eval_runtime": 5780.3456, |
|
"eval_samples_per_second": 5.039, |
|
"eval_steps_per_second": 0.63, |
|
"eval_wer": 28.696460806301626, |
|
"step": 45000 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 62180, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4178134111256576e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|