whisper-tiny-vaani-hindi / trainer_state.json
SujithPulikodan's picture
Upload 11 files
59751c6 verified
{
"best_metric": 28.696460806301626,
"best_model_checkpoint": "./whisper-tiny-hi/checkpoint-45000",
"epoch": 14.46952886316128,
"eval_steps": 5000,
"global_step": 45000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06431902235086026,
"grad_norm": 4.596249103546143,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.7191,
"step": 200
},
{
"epoch": 0.12863804470172052,
"grad_norm": 3.4318807125091553,
"learning_rate": 4.000000000000001e-06,
"loss": 0.9245,
"step": 400
},
{
"epoch": 0.1929570670525808,
"grad_norm": 3.58982253074646,
"learning_rate": 6e-06,
"loss": 0.6853,
"step": 600
},
{
"epoch": 0.25727608940344104,
"grad_norm": 3.402555227279663,
"learning_rate": 8.000000000000001e-06,
"loss": 0.5814,
"step": 800
},
{
"epoch": 0.32159511175430133,
"grad_norm": 3.178177833557129,
"learning_rate": 1e-05,
"loss": 0.5202,
"step": 1000
},
{
"epoch": 0.3859141341051616,
"grad_norm": 3.04317569732666,
"learning_rate": 9.96730957829356e-06,
"loss": 0.4719,
"step": 1200
},
{
"epoch": 0.45023315645602185,
"grad_norm": 3.090500831604004,
"learning_rate": 9.93461915658712e-06,
"loss": 0.4444,
"step": 1400
},
{
"epoch": 0.5145521788068821,
"grad_norm": 3.2675209045410156,
"learning_rate": 9.90192873488068e-06,
"loss": 0.4275,
"step": 1600
},
{
"epoch": 0.5788712011577424,
"grad_norm": 2.859715223312378,
"learning_rate": 9.869238313174241e-06,
"loss": 0.4076,
"step": 1800
},
{
"epoch": 0.6431902235086027,
"grad_norm": 2.6303207874298096,
"learning_rate": 9.836547891467801e-06,
"loss": 0.3917,
"step": 2000
},
{
"epoch": 0.707509245859463,
"grad_norm": 3.0497164726257324,
"learning_rate": 9.804020921869893e-06,
"loss": 0.381,
"step": 2200
},
{
"epoch": 0.7718282682103232,
"grad_norm": 2.6169981956481934,
"learning_rate": 9.771330500163453e-06,
"loss": 0.3719,
"step": 2400
},
{
"epoch": 0.8361472905611834,
"grad_norm": 2.708853006362915,
"learning_rate": 9.738640078457014e-06,
"loss": 0.3635,
"step": 2600
},
{
"epoch": 0.9004663129120437,
"grad_norm": 2.8617489337921143,
"learning_rate": 9.705949656750572e-06,
"loss": 0.354,
"step": 2800
},
{
"epoch": 0.964785335262904,
"grad_norm": 2.758882999420166,
"learning_rate": 9.673259235044132e-06,
"loss": 0.3474,
"step": 3000
},
{
"epoch": 1.0289435600578871,
"grad_norm": 2.680941343307495,
"learning_rate": 9.640568813337693e-06,
"loss": 0.3395,
"step": 3200
},
{
"epoch": 1.0932625824087474,
"grad_norm": 2.918921709060669,
"learning_rate": 9.607878391631253e-06,
"loss": 0.3299,
"step": 3400
},
{
"epoch": 1.1575816047596077,
"grad_norm": 2.6285266876220703,
"learning_rate": 9.575187969924813e-06,
"loss": 0.3231,
"step": 3600
},
{
"epoch": 1.221900627110468,
"grad_norm": 2.9504473209381104,
"learning_rate": 9.542497548218372e-06,
"loss": 0.3217,
"step": 3800
},
{
"epoch": 1.2862196494613283,
"grad_norm": 2.8353495597839355,
"learning_rate": 9.509807126511932e-06,
"loss": 0.32,
"step": 4000
},
{
"epoch": 1.3505386718121883,
"grad_norm": 2.4936184883117676,
"learning_rate": 9.477280156914024e-06,
"loss": 0.3141,
"step": 4200
},
{
"epoch": 1.4148576941630489,
"grad_norm": 2.6884140968322754,
"learning_rate": 9.444589735207586e-06,
"loss": 0.3086,
"step": 4400
},
{
"epoch": 1.479176716513909,
"grad_norm": 2.8961310386657715,
"learning_rate": 9.411899313501145e-06,
"loss": 0.308,
"step": 4600
},
{
"epoch": 1.5434957388647692,
"grad_norm": 2.5737783908843994,
"learning_rate": 9.379208891794705e-06,
"loss": 0.3019,
"step": 4800
},
{
"epoch": 1.6078147612156295,
"grad_norm": 2.5709033012390137,
"learning_rate": 9.346518470088265e-06,
"loss": 0.2974,
"step": 5000
},
{
"epoch": 1.6078147612156295,
"eval_loss": 0.2771373987197876,
"eval_runtime": 5695.1238,
"eval_samples_per_second": 5.115,
"eval_steps_per_second": 0.639,
"eval_wer": 36.94923502637938,
"step": 5000
},
{
"epoch": 1.6721337835664898,
"grad_norm": 2.3421154022216797,
"learning_rate": 9.313828048381824e-06,
"loss": 0.2959,
"step": 5200
},
{
"epoch": 1.73645280591735,
"grad_norm": 2.662564277648926,
"learning_rate": 9.281137626675386e-06,
"loss": 0.2941,
"step": 5400
},
{
"epoch": 1.8007718282682104,
"grad_norm": 2.4682416915893555,
"learning_rate": 9.248447204968944e-06,
"loss": 0.2932,
"step": 5600
},
{
"epoch": 1.8650908506190707,
"grad_norm": 2.6210873126983643,
"learning_rate": 9.215756783262505e-06,
"loss": 0.2895,
"step": 5800
},
{
"epoch": 1.9294098729699307,
"grad_norm": 2.6296238899230957,
"learning_rate": 9.183066361556065e-06,
"loss": 0.286,
"step": 6000
},
{
"epoch": 1.9937288953207912,
"grad_norm": 2.486929178237915,
"learning_rate": 9.150539391958157e-06,
"loss": 0.2846,
"step": 6200
},
{
"epoch": 2.0578871201157742,
"grad_norm": 2.202758550643921,
"learning_rate": 9.117848970251717e-06,
"loss": 0.2754,
"step": 6400
},
{
"epoch": 2.1222061424666343,
"grad_norm": 2.4207451343536377,
"learning_rate": 9.085158548545276e-06,
"loss": 0.2745,
"step": 6600
},
{
"epoch": 2.186525164817495,
"grad_norm": 2.1694822311401367,
"learning_rate": 9.052468126838838e-06,
"loss": 0.2732,
"step": 6800
},
{
"epoch": 2.250844187168355,
"grad_norm": 2.7656798362731934,
"learning_rate": 9.019777705132396e-06,
"loss": 0.2709,
"step": 7000
},
{
"epoch": 2.3151632095192154,
"grad_norm": 2.2421224117279053,
"learning_rate": 8.987087283425957e-06,
"loss": 0.2691,
"step": 7200
},
{
"epoch": 2.3794822318700755,
"grad_norm": 2.469956874847412,
"learning_rate": 8.954396861719517e-06,
"loss": 0.268,
"step": 7400
},
{
"epoch": 2.443801254220936,
"grad_norm": 2.4373421669006348,
"learning_rate": 8.921706440013077e-06,
"loss": 0.2656,
"step": 7600
},
{
"epoch": 2.508120276571796,
"grad_norm": 2.5110926628112793,
"learning_rate": 8.889016018306636e-06,
"loss": 0.2668,
"step": 7800
},
{
"epoch": 2.5724392989226565,
"grad_norm": 2.559202194213867,
"learning_rate": 8.856325596600198e-06,
"loss": 0.2623,
"step": 8000
},
{
"epoch": 2.6367583212735166,
"grad_norm": 2.6701242923736572,
"learning_rate": 8.82379862700229e-06,
"loss": 0.2642,
"step": 8200
},
{
"epoch": 2.7010773436243767,
"grad_norm": 2.3247900009155273,
"learning_rate": 8.791108205295848e-06,
"loss": 0.2621,
"step": 8400
},
{
"epoch": 2.765396365975237,
"grad_norm": 2.2806427478790283,
"learning_rate": 8.75841778358941e-06,
"loss": 0.2601,
"step": 8600
},
{
"epoch": 2.8297153883260977,
"grad_norm": 2.487159013748169,
"learning_rate": 8.725727361882969e-06,
"loss": 0.2596,
"step": 8800
},
{
"epoch": 2.8940344106769578,
"grad_norm": 2.2094194889068604,
"learning_rate": 8.693036940176529e-06,
"loss": 0.2587,
"step": 9000
},
{
"epoch": 2.958353433027818,
"grad_norm": 2.349698781967163,
"learning_rate": 8.660346518470088e-06,
"loss": 0.2569,
"step": 9200
},
{
"epoch": 3.0225116578228013,
"grad_norm": 2.1951253414154053,
"learning_rate": 8.62765609676365e-06,
"loss": 0.2507,
"step": 9400
},
{
"epoch": 3.0868306801736614,
"grad_norm": 2.2560884952545166,
"learning_rate": 8.594965675057208e-06,
"loss": 0.2469,
"step": 9600
},
{
"epoch": 3.1511497025245214,
"grad_norm": 2.3260977268218994,
"learning_rate": 8.562275253350769e-06,
"loss": 0.2457,
"step": 9800
},
{
"epoch": 3.215468724875382,
"grad_norm": 2.318385601043701,
"learning_rate": 8.529584831644329e-06,
"loss": 0.2463,
"step": 10000
},
{
"epoch": 3.215468724875382,
"eval_loss": 0.23996804654598236,
"eval_runtime": 5706.3154,
"eval_samples_per_second": 5.105,
"eval_steps_per_second": 0.638,
"eval_wer": 33.16859977100777,
"step": 10000
},
{
"epoch": 3.279787747226242,
"grad_norm": 2.278331756591797,
"learning_rate": 8.496894409937889e-06,
"loss": 0.2471,
"step": 10200
},
{
"epoch": 3.3441067695771025,
"grad_norm": 2.1796886920928955,
"learning_rate": 8.46436744033998e-06,
"loss": 0.244,
"step": 10400
},
{
"epoch": 3.4084257919279626,
"grad_norm": 2.3623690605163574,
"learning_rate": 8.43167701863354e-06,
"loss": 0.2453,
"step": 10600
},
{
"epoch": 3.472744814278823,
"grad_norm": 2.242501974105835,
"learning_rate": 8.398986596927102e-06,
"loss": 0.2436,
"step": 10800
},
{
"epoch": 3.537063836629683,
"grad_norm": 2.4463303089141846,
"learning_rate": 8.366296175220661e-06,
"loss": 0.244,
"step": 11000
},
{
"epoch": 3.6013828589805437,
"grad_norm": 2.387897253036499,
"learning_rate": 8.333605753514221e-06,
"loss": 0.2436,
"step": 11200
},
{
"epoch": 3.6657018813314037,
"grad_norm": 2.386038303375244,
"learning_rate": 8.30091533180778e-06,
"loss": 0.2431,
"step": 11400
},
{
"epoch": 3.730020903682264,
"grad_norm": 2.2631173133850098,
"learning_rate": 8.26822491010134e-06,
"loss": 0.2399,
"step": 11600
},
{
"epoch": 3.7943399260331243,
"grad_norm": 2.458087921142578,
"learning_rate": 8.2355344883949e-06,
"loss": 0.2409,
"step": 11800
},
{
"epoch": 3.858658948383985,
"grad_norm": 2.400782585144043,
"learning_rate": 8.202844066688462e-06,
"loss": 0.2396,
"step": 12000
},
{
"epoch": 3.922977970734845,
"grad_norm": 2.1437551975250244,
"learning_rate": 8.170153644982021e-06,
"loss": 0.2375,
"step": 12200
},
{
"epoch": 3.987296993085705,
"grad_norm": 2.8162059783935547,
"learning_rate": 8.137626675384113e-06,
"loss": 0.2385,
"step": 12400
},
{
"epoch": 4.051455217880688,
"grad_norm": 2.505800485610962,
"learning_rate": 8.104936253677673e-06,
"loss": 0.2317,
"step": 12600
},
{
"epoch": 4.1157742402315485,
"grad_norm": 2.2435059547424316,
"learning_rate": 8.072245831971233e-06,
"loss": 0.2301,
"step": 12800
},
{
"epoch": 4.1800932625824085,
"grad_norm": 2.208909034729004,
"learning_rate": 8.039555410264793e-06,
"loss": 0.2305,
"step": 13000
},
{
"epoch": 4.244412284933269,
"grad_norm": 2.0333073139190674,
"learning_rate": 8.006864988558352e-06,
"loss": 0.2283,
"step": 13200
},
{
"epoch": 4.30873130728413,
"grad_norm": 2.437255620956421,
"learning_rate": 7.974174566851914e-06,
"loss": 0.2285,
"step": 13400
},
{
"epoch": 4.37305032963499,
"grad_norm": 2.1313934326171875,
"learning_rate": 7.941484145145473e-06,
"loss": 0.2268,
"step": 13600
},
{
"epoch": 4.43736935198585,
"grad_norm": 2.3695502281188965,
"learning_rate": 7.908793723439033e-06,
"loss": 0.2281,
"step": 13800
},
{
"epoch": 4.50168837433671,
"grad_norm": 2.3714799880981445,
"learning_rate": 7.876103301732593e-06,
"loss": 0.2285,
"step": 14000
},
{
"epoch": 4.566007396687571,
"grad_norm": 2.148329019546509,
"learning_rate": 7.843412880026154e-06,
"loss": 0.2285,
"step": 14200
},
{
"epoch": 4.630326419038431,
"grad_norm": 2.6452717781066895,
"learning_rate": 7.810885910428245e-06,
"loss": 0.2286,
"step": 14400
},
{
"epoch": 4.694645441389291,
"grad_norm": 2.203331470489502,
"learning_rate": 7.778195488721804e-06,
"loss": 0.2262,
"step": 14600
},
{
"epoch": 4.758964463740151,
"grad_norm": 2.239518642425537,
"learning_rate": 7.745505067015366e-06,
"loss": 0.2255,
"step": 14800
},
{
"epoch": 4.823283486091011,
"grad_norm": 2.2322895526885986,
"learning_rate": 7.712814645308925e-06,
"loss": 0.2284,
"step": 15000
},
{
"epoch": 4.823283486091011,
"eval_loss": 0.22340841591358185,
"eval_runtime": 6283.1507,
"eval_samples_per_second": 4.636,
"eval_steps_per_second": 0.58,
"eval_wer": 31.49793737232363,
"step": 15000
},
{
"epoch": 4.887602508441872,
"grad_norm": 2.171865224838257,
"learning_rate": 7.680124223602485e-06,
"loss": 0.2286,
"step": 15200
},
{
"epoch": 4.951921530792732,
"grad_norm": 2.2217211723327637,
"learning_rate": 7.647433801896045e-06,
"loss": 0.2237,
"step": 15400
},
{
"epoch": 5.0160797555877155,
"grad_norm": 2.2957966327667236,
"learning_rate": 7.614743380189605e-06,
"loss": 0.2247,
"step": 15600
},
{
"epoch": 5.0803987779385755,
"grad_norm": 2.2841222286224365,
"learning_rate": 7.582052958483165e-06,
"loss": 0.2184,
"step": 15800
},
{
"epoch": 5.144717800289436,
"grad_norm": 2.9461894035339355,
"learning_rate": 7.5493625367767255e-06,
"loss": 0.2176,
"step": 16000
},
{
"epoch": 5.209036822640296,
"grad_norm": 2.2747585773468018,
"learning_rate": 7.516672115070285e-06,
"loss": 0.2171,
"step": 16200
},
{
"epoch": 5.273355844991156,
"grad_norm": 2.220026969909668,
"learning_rate": 7.4841451454723765e-06,
"loss": 0.217,
"step": 16400
},
{
"epoch": 5.337674867342017,
"grad_norm": 2.335299253463745,
"learning_rate": 7.451454723765937e-06,
"loss": 0.218,
"step": 16600
},
{
"epoch": 5.401993889692877,
"grad_norm": 2.2069528102874756,
"learning_rate": 7.418764302059497e-06,
"loss": 0.2172,
"step": 16800
},
{
"epoch": 5.466312912043737,
"grad_norm": 2.500458002090454,
"learning_rate": 7.386073880353057e-06,
"loss": 0.2174,
"step": 17000
},
{
"epoch": 5.530631934394597,
"grad_norm": 2.2738373279571533,
"learning_rate": 7.353383458646617e-06,
"loss": 0.2186,
"step": 17200
},
{
"epoch": 5.594950956745458,
"grad_norm": 2.0394363403320312,
"learning_rate": 7.3206930369401776e-06,
"loss": 0.2168,
"step": 17400
},
{
"epoch": 5.659269979096318,
"grad_norm": 2.0548949241638184,
"learning_rate": 7.288002615233737e-06,
"loss": 0.2167,
"step": 17600
},
{
"epoch": 5.723589001447178,
"grad_norm": 2.0610129833221436,
"learning_rate": 7.255312193527298e-06,
"loss": 0.2154,
"step": 17800
},
{
"epoch": 5.787908023798038,
"grad_norm": 2.0888965129852295,
"learning_rate": 7.222621771820857e-06,
"loss": 0.2157,
"step": 18000
},
{
"epoch": 5.852227046148899,
"grad_norm": 2.3282668590545654,
"learning_rate": 7.189931350114417e-06,
"loss": 0.2162,
"step": 18200
},
{
"epoch": 5.916546068499759,
"grad_norm": 2.2571957111358643,
"learning_rate": 7.157404380516509e-06,
"loss": 0.2174,
"step": 18400
},
{
"epoch": 5.980865090850619,
"grad_norm": 2.201342821121216,
"learning_rate": 7.124713958810069e-06,
"loss": 0.2137,
"step": 18600
},
{
"epoch": 6.045023315645603,
"grad_norm": 2.3466155529022217,
"learning_rate": 7.09202353710363e-06,
"loss": 0.2098,
"step": 18800
},
{
"epoch": 6.109342337996463,
"grad_norm": 2.0573270320892334,
"learning_rate": 7.059333115397189e-06,
"loss": 0.2089,
"step": 19000
},
{
"epoch": 6.173661360347323,
"grad_norm": 2.026381492614746,
"learning_rate": 7.02664269369075e-06,
"loss": 0.2085,
"step": 19200
},
{
"epoch": 6.237980382698183,
"grad_norm": 2.2646751403808594,
"learning_rate": 6.993952271984309e-06,
"loss": 0.2074,
"step": 19400
},
{
"epoch": 6.302299405049043,
"grad_norm": 2.0940823554992676,
"learning_rate": 6.961261850277869e-06,
"loss": 0.2086,
"step": 19600
},
{
"epoch": 6.366618427399904,
"grad_norm": 2.2303521633148193,
"learning_rate": 6.928571428571429e-06,
"loss": 0.2096,
"step": 19800
},
{
"epoch": 6.430937449750764,
"grad_norm": 2.2108261585235596,
"learning_rate": 6.8958810068649895e-06,
"loss": 0.2089,
"step": 20000
},
{
"epoch": 6.430937449750764,
"eval_loss": 0.2148449569940567,
"eval_runtime": 6212.1239,
"eval_samples_per_second": 4.689,
"eval_steps_per_second": 0.586,
"eval_wer": 30.543488717322866,
"step": 20000
},
{
"epoch": 6.495256472101624,
"grad_norm": 2.3403568267822266,
"learning_rate": 6.863190585158549e-06,
"loss": 0.2096,
"step": 20200
},
{
"epoch": 6.559575494452484,
"grad_norm": 2.071211576461792,
"learning_rate": 6.830663615560641e-06,
"loss": 0.2076,
"step": 20400
},
{
"epoch": 6.623894516803345,
"grad_norm": 2.031785488128662,
"learning_rate": 6.797973193854202e-06,
"loss": 0.2075,
"step": 20600
},
{
"epoch": 6.688213539154205,
"grad_norm": 2.1697540283203125,
"learning_rate": 6.765282772147761e-06,
"loss": 0.2086,
"step": 20800
},
{
"epoch": 6.752532561505065,
"grad_norm": 2.1642816066741943,
"learning_rate": 6.732592350441321e-06,
"loss": 0.2094,
"step": 21000
},
{
"epoch": 6.816851583855925,
"grad_norm": 2.093594789505005,
"learning_rate": 6.699901928734881e-06,
"loss": 0.2065,
"step": 21200
},
{
"epoch": 6.881170606206785,
"grad_norm": 2.0487377643585205,
"learning_rate": 6.6672115070284415e-06,
"loss": 0.2074,
"step": 21400
},
{
"epoch": 6.945489628557646,
"grad_norm": 2.0511105060577393,
"learning_rate": 6.634521085322001e-06,
"loss": 0.2072,
"step": 21600
},
{
"epoch": 7.009647853352629,
"grad_norm": 2.1460208892822266,
"learning_rate": 6.601830663615562e-06,
"loss": 0.204,
"step": 21800
},
{
"epoch": 7.07396687570349,
"grad_norm": 2.093196392059326,
"learning_rate": 6.569140241909121e-06,
"loss": 0.2013,
"step": 22000
},
{
"epoch": 7.13828589805435,
"grad_norm": 2.342313766479492,
"learning_rate": 6.536449820202682e-06,
"loss": 0.1994,
"step": 22200
},
{
"epoch": 7.20260492040521,
"grad_norm": 2.05419921875,
"learning_rate": 6.503759398496241e-06,
"loss": 0.2023,
"step": 22400
},
{
"epoch": 7.26692394275607,
"grad_norm": 2.248352289199829,
"learning_rate": 6.471232428898333e-06,
"loss": 0.2024,
"step": 22600
},
{
"epoch": 7.33124296510693,
"grad_norm": 2.308692216873169,
"learning_rate": 6.4385420071918935e-06,
"loss": 0.2007,
"step": 22800
},
{
"epoch": 7.395561987457791,
"grad_norm": 2.0985047817230225,
"learning_rate": 6.405851585485453e-06,
"loss": 0.1989,
"step": 23000
},
{
"epoch": 7.459881009808651,
"grad_norm": 2.1786727905273438,
"learning_rate": 6.373161163779014e-06,
"loss": 0.2009,
"step": 23200
},
{
"epoch": 7.524200032159511,
"grad_norm": 2.358017921447754,
"learning_rate": 6.3404707420725734e-06,
"loss": 0.2011,
"step": 23400
},
{
"epoch": 7.588519054510371,
"grad_norm": 2.221282482147217,
"learning_rate": 6.307780320366134e-06,
"loss": 0.2,
"step": 23600
},
{
"epoch": 7.652838076861232,
"grad_norm": 2.1816442012786865,
"learning_rate": 6.275089898659693e-06,
"loss": 0.1995,
"step": 23800
},
{
"epoch": 7.717157099212092,
"grad_norm": 2.1074419021606445,
"learning_rate": 6.242399476953253e-06,
"loss": 0.2008,
"step": 24000
},
{
"epoch": 7.781476121562952,
"grad_norm": 2.12418532371521,
"learning_rate": 6.209709055246813e-06,
"loss": 0.2029,
"step": 24200
},
{
"epoch": 7.845795143913812,
"grad_norm": 2.0285849571228027,
"learning_rate": 6.177018633540374e-06,
"loss": 0.2005,
"step": 24400
},
{
"epoch": 7.910114166264673,
"grad_norm": 2.1275014877319336,
"learning_rate": 6.144491663942466e-06,
"loss": 0.2014,
"step": 24600
},
{
"epoch": 7.974433188615533,
"grad_norm": 2.047292709350586,
"learning_rate": 6.1118012422360254e-06,
"loss": 0.1999,
"step": 24800
},
{
"epoch": 8.038591413410517,
"grad_norm": 2.2886574268341064,
"learning_rate": 6.079110820529586e-06,
"loss": 0.197,
"step": 25000
},
{
"epoch": 8.038591413410517,
"eval_loss": 0.20965221524238586,
"eval_runtime": 6307.1939,
"eval_samples_per_second": 4.619,
"eval_steps_per_second": 0.577,
"eval_wer": 29.90140507668599,
"step": 25000
},
{
"epoch": 8.102910435761377,
"grad_norm": 2.158328056335449,
"learning_rate": 6.046420398823145e-06,
"loss": 0.1942,
"step": 25200
},
{
"epoch": 8.167229458112237,
"grad_norm": 2.0762622356414795,
"learning_rate": 6.013729977116705e-06,
"loss": 0.1941,
"step": 25400
},
{
"epoch": 8.231548480463097,
"grad_norm": 2.2431387901306152,
"learning_rate": 5.981039555410265e-06,
"loss": 0.1951,
"step": 25600
},
{
"epoch": 8.295867502813957,
"grad_norm": 1.9715090990066528,
"learning_rate": 5.948349133703826e-06,
"loss": 0.1946,
"step": 25800
},
{
"epoch": 8.360186525164817,
"grad_norm": 2.1715543270111084,
"learning_rate": 5.915658711997385e-06,
"loss": 0.1949,
"step": 26000
},
{
"epoch": 8.424505547515677,
"grad_norm": 2.1786696910858154,
"learning_rate": 5.882968290290946e-06,
"loss": 0.1954,
"step": 26200
},
{
"epoch": 8.488824569866537,
"grad_norm": 2.2478559017181396,
"learning_rate": 5.8502778685845056e-06,
"loss": 0.1943,
"step": 26400
},
{
"epoch": 8.553143592217399,
"grad_norm": 2.0193216800689697,
"learning_rate": 5.817750898986597e-06,
"loss": 0.1931,
"step": 26600
},
{
"epoch": 8.61746261456826,
"grad_norm": 2.024120330810547,
"learning_rate": 5.785060477280157e-06,
"loss": 0.1944,
"step": 26800
},
{
"epoch": 8.68178163691912,
"grad_norm": 1.9868948459625244,
"learning_rate": 5.752370055573717e-06,
"loss": 0.1967,
"step": 27000
},
{
"epoch": 8.74610065926998,
"grad_norm": 2.0132243633270264,
"learning_rate": 5.719679633867278e-06,
"loss": 0.194,
"step": 27200
},
{
"epoch": 8.81041968162084,
"grad_norm": 2.1960537433624268,
"learning_rate": 5.686989212160837e-06,
"loss": 0.1945,
"step": 27400
},
{
"epoch": 8.8747387039717,
"grad_norm": 2.118748903274536,
"learning_rate": 5.654298790454398e-06,
"loss": 0.1943,
"step": 27600
},
{
"epoch": 8.93905772632256,
"grad_norm": 2.0620296001434326,
"learning_rate": 5.621608368747958e-06,
"loss": 0.1948,
"step": 27800
},
{
"epoch": 9.003215951117543,
"grad_norm": 2.0099258422851562,
"learning_rate": 5.588917947041518e-06,
"loss": 0.1953,
"step": 28000
},
{
"epoch": 9.067534973468403,
"grad_norm": 2.1706490516662598,
"learning_rate": 5.556227525335077e-06,
"loss": 0.187,
"step": 28200
},
{
"epoch": 9.131853995819263,
"grad_norm": 1.989235758781433,
"learning_rate": 5.523537103628637e-06,
"loss": 0.1896,
"step": 28400
},
{
"epoch": 9.196173018170123,
"grad_norm": 2.4199020862579346,
"learning_rate": 5.49101013403073e-06,
"loss": 0.1889,
"step": 28600
},
{
"epoch": 9.260492040520983,
"grad_norm": 2.318398952484131,
"learning_rate": 5.458319712324289e-06,
"loss": 0.1899,
"step": 28800
},
{
"epoch": 9.324811062871845,
"grad_norm": 2.014559745788574,
"learning_rate": 5.42562929061785e-06,
"loss": 0.1899,
"step": 29000
},
{
"epoch": 9.389130085222705,
"grad_norm": 2.0770111083984375,
"learning_rate": 5.39293886891141e-06,
"loss": 0.1904,
"step": 29200
},
{
"epoch": 9.453449107573565,
"grad_norm": 2.1191565990448,
"learning_rate": 5.36024844720497e-06,
"loss": 0.1903,
"step": 29400
},
{
"epoch": 9.517768129924425,
"grad_norm": 1.8838344812393188,
"learning_rate": 5.327558025498529e-06,
"loss": 0.191,
"step": 29600
},
{
"epoch": 9.582087152275285,
"grad_norm": 2.064694404602051,
"learning_rate": 5.2948676037920895e-06,
"loss": 0.189,
"step": 29800
},
{
"epoch": 9.646406174626145,
"grad_norm": 2.2259907722473145,
"learning_rate": 5.262177182085649e-06,
"loss": 0.1896,
"step": 30000
},
{
"epoch": 9.646406174626145,
"eval_loss": 0.20603837072849274,
"eval_runtime": 6469.2735,
"eval_samples_per_second": 4.503,
"eval_steps_per_second": 0.563,
"eval_wer": 29.357595694795236,
"step": 30000
},
{
"epoch": 9.710725196977005,
"grad_norm": 2.174774169921875,
"learning_rate": 5.229486760379209e-06,
"loss": 0.1898,
"step": 30200
},
{
"epoch": 9.775044219327865,
"grad_norm": 2.170811414718628,
"learning_rate": 5.1967963386727695e-06,
"loss": 0.1885,
"step": 30400
},
{
"epoch": 9.839363241678726,
"grad_norm": 2.454228401184082,
"learning_rate": 5.164105916966329e-06,
"loss": 0.1889,
"step": 30600
},
{
"epoch": 9.903682264029587,
"grad_norm": 2.278226613998413,
"learning_rate": 5.131578947368422e-06,
"loss": 0.1891,
"step": 30800
},
{
"epoch": 9.968001286380447,
"grad_norm": 2.151634931564331,
"learning_rate": 5.098888525661981e-06,
"loss": 0.1881,
"step": 31000
},
{
"epoch": 10.032159511175431,
"grad_norm": 2.1410274505615234,
"learning_rate": 5.0661981039555416e-06,
"loss": 0.1861,
"step": 31200
},
{
"epoch": 10.096478533526291,
"grad_norm": 2.1889536380767822,
"learning_rate": 5.033507682249101e-06,
"loss": 0.184,
"step": 31400
},
{
"epoch": 10.160797555877151,
"grad_norm": 2.2102479934692383,
"learning_rate": 5.000817260542662e-06,
"loss": 0.1849,
"step": 31600
},
{
"epoch": 10.225116578228011,
"grad_norm": 2.0018393993377686,
"learning_rate": 4.9681268388362215e-06,
"loss": 0.1833,
"step": 31800
},
{
"epoch": 10.289435600578871,
"grad_norm": 2.114179849624634,
"learning_rate": 4.935436417129781e-06,
"loss": 0.1831,
"step": 32000
},
{
"epoch": 10.353754622929731,
"grad_norm": 1.9474581480026245,
"learning_rate": 4.902745995423342e-06,
"loss": 0.1842,
"step": 32200
},
{
"epoch": 10.418073645280591,
"grad_norm": 2.0356032848358154,
"learning_rate": 4.8700555737169014e-06,
"loss": 0.1852,
"step": 32400
},
{
"epoch": 10.482392667631451,
"grad_norm": 2.0535342693328857,
"learning_rate": 4.837365152010461e-06,
"loss": 0.1866,
"step": 32600
},
{
"epoch": 10.546711689982311,
"grad_norm": 2.2602412700653076,
"learning_rate": 4.804838182412553e-06,
"loss": 0.185,
"step": 32800
},
{
"epoch": 10.611030712333173,
"grad_norm": 2.098829507827759,
"learning_rate": 4.772147760706114e-06,
"loss": 0.1844,
"step": 33000
},
{
"epoch": 10.675349734684033,
"grad_norm": 2.0167388916015625,
"learning_rate": 4.7394573389996735e-06,
"loss": 0.1871,
"step": 33200
},
{
"epoch": 10.739668757034893,
"grad_norm": 2.245697259902954,
"learning_rate": 4.706766917293233e-06,
"loss": 0.1866,
"step": 33400
},
{
"epoch": 10.803987779385753,
"grad_norm": 1.9759703874588013,
"learning_rate": 4.674076495586794e-06,
"loss": 0.1849,
"step": 33600
},
{
"epoch": 10.868306801736614,
"grad_norm": 2.1898162364959717,
"learning_rate": 4.6413860738803535e-06,
"loss": 0.1856,
"step": 33800
},
{
"epoch": 10.932625824087474,
"grad_norm": 2.0218801498413086,
"learning_rate": 4.608695652173913e-06,
"loss": 0.1847,
"step": 34000
},
{
"epoch": 10.996944846438334,
"grad_norm": 1.9428986310958862,
"learning_rate": 4.576005230467474e-06,
"loss": 0.1863,
"step": 34200
},
{
"epoch": 11.061103071233317,
"grad_norm": 1.9156979322433472,
"learning_rate": 4.543314808761033e-06,
"loss": 0.1788,
"step": 34400
},
{
"epoch": 11.125422093584177,
"grad_norm": 2.0453121662139893,
"learning_rate": 4.510624387054593e-06,
"loss": 0.1817,
"step": 34600
},
{
"epoch": 11.189741115935037,
"grad_norm": 1.9229934215545654,
"learning_rate": 4.477933965348154e-06,
"loss": 0.1808,
"step": 34800
},
{
"epoch": 11.254060138285897,
"grad_norm": 2.319345235824585,
"learning_rate": 4.445406995750246e-06,
"loss": 0.1793,
"step": 35000
},
{
"epoch": 11.254060138285897,
"eval_loss": 0.20413178205490112,
"eval_runtime": 6186.7151,
"eval_samples_per_second": 4.708,
"eval_steps_per_second": 0.589,
"eval_wer": 29.14897509945317,
"step": 35000
},
{
"epoch": 11.31837916063676,
"grad_norm": 2.07478404045105,
"learning_rate": 4.4127165740438055e-06,
"loss": 0.1798,
"step": 35200
},
{
"epoch": 11.38269818298762,
"grad_norm": 2.0194761753082275,
"learning_rate": 4.380026152337365e-06,
"loss": 0.1792,
"step": 35400
},
{
"epoch": 11.44701720533848,
"grad_norm": 1.903509497642517,
"learning_rate": 4.347335730630926e-06,
"loss": 0.1796,
"step": 35600
},
{
"epoch": 11.51133622768934,
"grad_norm": 2.0315921306610107,
"learning_rate": 4.314645308924485e-06,
"loss": 0.1819,
"step": 35800
},
{
"epoch": 11.5756552500402,
"grad_norm": 2.0309042930603027,
"learning_rate": 4.281954887218046e-06,
"loss": 0.1805,
"step": 36000
},
{
"epoch": 11.63997427239106,
"grad_norm": 2.0719332695007324,
"learning_rate": 4.249264465511606e-06,
"loss": 0.1807,
"step": 36200
},
{
"epoch": 11.70429329474192,
"grad_norm": 2.17317271232605,
"learning_rate": 4.216574043805165e-06,
"loss": 0.1832,
"step": 36400
},
{
"epoch": 11.76861231709278,
"grad_norm": 2.0271966457366943,
"learning_rate": 4.183883622098726e-06,
"loss": 0.1804,
"step": 36600
},
{
"epoch": 11.83293133944364,
"grad_norm": 2.235299825668335,
"learning_rate": 4.151193200392286e-06,
"loss": 0.1826,
"step": 36800
},
{
"epoch": 11.897250361794502,
"grad_norm": 1.8323442935943604,
"learning_rate": 4.118666230794378e-06,
"loss": 0.1822,
"step": 37000
},
{
"epoch": 11.961569384145362,
"grad_norm": 2.1653242111206055,
"learning_rate": 4.0859758090879374e-06,
"loss": 0.1829,
"step": 37200
},
{
"epoch": 12.025727608940343,
"grad_norm": 2.1361212730407715,
"learning_rate": 4.053285387381498e-06,
"loss": 0.1792,
"step": 37400
},
{
"epoch": 12.090046631291205,
"grad_norm": 2.1540911197662354,
"learning_rate": 4.020594965675058e-06,
"loss": 0.1779,
"step": 37600
},
{
"epoch": 12.154365653642065,
"grad_norm": 2.157705783843994,
"learning_rate": 3.987904543968617e-06,
"loss": 0.1754,
"step": 37800
},
{
"epoch": 12.218684675992925,
"grad_norm": 2.100783586502075,
"learning_rate": 3.955214122262178e-06,
"loss": 0.1761,
"step": 38000
},
{
"epoch": 12.283003698343785,
"grad_norm": 1.9860684871673584,
"learning_rate": 3.922523700555738e-06,
"loss": 0.178,
"step": 38200
},
{
"epoch": 12.347322720694645,
"grad_norm": 2.138315200805664,
"learning_rate": 3.889833278849297e-06,
"loss": 0.1769,
"step": 38400
},
{
"epoch": 12.411641743045505,
"grad_norm": 1.8979172706604004,
"learning_rate": 3.857142857142858e-06,
"loss": 0.1761,
"step": 38600
},
{
"epoch": 12.475960765396366,
"grad_norm": 2.144052505493164,
"learning_rate": 3.8244524354364175e-06,
"loss": 0.1772,
"step": 38800
},
{
"epoch": 12.540279787747226,
"grad_norm": 2.2078232765197754,
"learning_rate": 3.7919254658385097e-06,
"loss": 0.1784,
"step": 39000
},
{
"epoch": 12.604598810098086,
"grad_norm": 2.0689291954040527,
"learning_rate": 3.75923504413207e-06,
"loss": 0.179,
"step": 39200
},
{
"epoch": 12.668917832448948,
"grad_norm": 2.1173605918884277,
"learning_rate": 3.7265446224256295e-06,
"loss": 0.1761,
"step": 39400
},
{
"epoch": 12.733236854799808,
"grad_norm": 2.0470852851867676,
"learning_rate": 3.6938542007191896e-06,
"loss": 0.1791,
"step": 39600
},
{
"epoch": 12.797555877150668,
"grad_norm": 2.237996816635132,
"learning_rate": 3.6611637790127497e-06,
"loss": 0.1791,
"step": 39800
},
{
"epoch": 12.861874899501528,
"grad_norm": 2.107485055923462,
"learning_rate": 3.62847335730631e-06,
"loss": 0.1776,
"step": 40000
},
{
"epoch": 12.861874899501528,
"eval_loss": 0.20320001244544983,
"eval_runtime": 5730.0624,
"eval_samples_per_second": 5.084,
"eval_steps_per_second": 0.636,
"eval_wer": 29.061075282609227,
"step": 40000
},
{
"epoch": 12.926193921852388,
"grad_norm": 2.1215012073516846,
"learning_rate": 3.5957829355998696e-06,
"loss": 0.1792,
"step": 40200
},
{
"epoch": 12.990512944203248,
"grad_norm": 2.1944947242736816,
"learning_rate": 3.5630925138934297e-06,
"loss": 0.177,
"step": 40400
},
{
"epoch": 13.054671168998231,
"grad_norm": 2.1703405380249023,
"learning_rate": 3.53040209218699e-06,
"loss": 0.1754,
"step": 40600
},
{
"epoch": 13.118990191349091,
"grad_norm": 2.224743366241455,
"learning_rate": 3.49771167048055e-06,
"loss": 0.1741,
"step": 40800
},
{
"epoch": 13.183309213699951,
"grad_norm": 2.370253324508667,
"learning_rate": 3.4650212487741096e-06,
"loss": 0.1727,
"step": 41000
},
{
"epoch": 13.247628236050812,
"grad_norm": 1.8502309322357178,
"learning_rate": 3.4324942791762018e-06,
"loss": 0.1732,
"step": 41200
},
{
"epoch": 13.311947258401672,
"grad_norm": 2.1553802490234375,
"learning_rate": 3.399803857469762e-06,
"loss": 0.1731,
"step": 41400
},
{
"epoch": 13.376266280752532,
"grad_norm": 2.113837242126465,
"learning_rate": 3.3671134357633216e-06,
"loss": 0.1743,
"step": 41600
},
{
"epoch": 13.440585303103393,
"grad_norm": 1.9942282438278198,
"learning_rate": 3.3344230140568817e-06,
"loss": 0.1742,
"step": 41800
},
{
"epoch": 13.504904325454254,
"grad_norm": 2.089869976043701,
"learning_rate": 3.301732592350442e-06,
"loss": 0.1754,
"step": 42000
},
{
"epoch": 13.569223347805114,
"grad_norm": 1.8944735527038574,
"learning_rate": 3.269042170644002e-06,
"loss": 0.1742,
"step": 42200
},
{
"epoch": 13.633542370155974,
"grad_norm": 2.06595516204834,
"learning_rate": 3.2363517489375616e-06,
"loss": 0.1736,
"step": 42400
},
{
"epoch": 13.697861392506834,
"grad_norm": 2.0173680782318115,
"learning_rate": 3.2036613272311218e-06,
"loss": 0.1738,
"step": 42600
},
{
"epoch": 13.762180414857694,
"grad_norm": 2.098628282546997,
"learning_rate": 3.170970905524682e-06,
"loss": 0.1735,
"step": 42800
},
{
"epoch": 13.826499437208554,
"grad_norm": 2.226824998855591,
"learning_rate": 3.138280483818241e-06,
"loss": 0.1757,
"step": 43000
},
{
"epoch": 13.890818459559414,
"grad_norm": 2.020033121109009,
"learning_rate": 3.1057535142203337e-06,
"loss": 0.1755,
"step": 43200
},
{
"epoch": 13.955137481910274,
"grad_norm": 2.29543399810791,
"learning_rate": 3.073063092513894e-06,
"loss": 0.1762,
"step": 43400
},
{
"epoch": 14.019295706705257,
"grad_norm": 1.9776209592819214,
"learning_rate": 3.040372670807454e-06,
"loss": 0.1735,
"step": 43600
},
{
"epoch": 14.083614729056118,
"grad_norm": 1.7997843027114868,
"learning_rate": 3.0076822491010137e-06,
"loss": 0.1704,
"step": 43800
},
{
"epoch": 14.14793375140698,
"grad_norm": 1.968531847000122,
"learning_rate": 2.9749918273945738e-06,
"loss": 0.1717,
"step": 44000
},
{
"epoch": 14.21225277375784,
"grad_norm": 2.061990737915039,
"learning_rate": 2.942301405688134e-06,
"loss": 0.1726,
"step": 44200
},
{
"epoch": 14.2765717961087,
"grad_norm": 2.0133299827575684,
"learning_rate": 2.909610983981694e-06,
"loss": 0.1714,
"step": 44400
},
{
"epoch": 14.34089081845956,
"grad_norm": 2.0535309314727783,
"learning_rate": 2.8769205622752537e-06,
"loss": 0.1717,
"step": 44600
},
{
"epoch": 14.40520984081042,
"grad_norm": 2.1071929931640625,
"learning_rate": 2.8442301405688134e-06,
"loss": 0.1722,
"step": 44800
},
{
"epoch": 14.46952886316128,
"grad_norm": 2.0523617267608643,
"learning_rate": 2.811539718862373e-06,
"loss": 0.17,
"step": 45000
},
{
"epoch": 14.46952886316128,
"eval_loss": 0.20234042406082153,
"eval_runtime": 5780.3456,
"eval_samples_per_second": 5.039,
"eval_steps_per_second": 0.63,
"eval_wer": 28.696460806301626,
"step": 45000
}
],
"logging_steps": 200,
"max_steps": 62180,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4178134111256576e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}