{ "best_metric": 28.696460806301626, "best_model_checkpoint": "./whisper-tiny-hi/checkpoint-45000", "epoch": 14.46952886316128, "eval_steps": 5000, "global_step": 45000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06431902235086026, "grad_norm": 4.596249103546143, "learning_rate": 2.0000000000000003e-06, "loss": 1.7191, "step": 200 }, { "epoch": 0.12863804470172052, "grad_norm": 3.4318807125091553, "learning_rate": 4.000000000000001e-06, "loss": 0.9245, "step": 400 }, { "epoch": 0.1929570670525808, "grad_norm": 3.58982253074646, "learning_rate": 6e-06, "loss": 0.6853, "step": 600 }, { "epoch": 0.25727608940344104, "grad_norm": 3.402555227279663, "learning_rate": 8.000000000000001e-06, "loss": 0.5814, "step": 800 }, { "epoch": 0.32159511175430133, "grad_norm": 3.178177833557129, "learning_rate": 1e-05, "loss": 0.5202, "step": 1000 }, { "epoch": 0.3859141341051616, "grad_norm": 3.04317569732666, "learning_rate": 9.96730957829356e-06, "loss": 0.4719, "step": 1200 }, { "epoch": 0.45023315645602185, "grad_norm": 3.090500831604004, "learning_rate": 9.93461915658712e-06, "loss": 0.4444, "step": 1400 }, { "epoch": 0.5145521788068821, "grad_norm": 3.2675209045410156, "learning_rate": 9.90192873488068e-06, "loss": 0.4275, "step": 1600 }, { "epoch": 0.5788712011577424, "grad_norm": 2.859715223312378, "learning_rate": 9.869238313174241e-06, "loss": 0.4076, "step": 1800 }, { "epoch": 0.6431902235086027, "grad_norm": 2.6303207874298096, "learning_rate": 9.836547891467801e-06, "loss": 0.3917, "step": 2000 }, { "epoch": 0.707509245859463, "grad_norm": 3.0497164726257324, "learning_rate": 9.804020921869893e-06, "loss": 0.381, "step": 2200 }, { "epoch": 0.7718282682103232, "grad_norm": 2.6169981956481934, "learning_rate": 9.771330500163453e-06, "loss": 0.3719, "step": 2400 }, { "epoch": 0.8361472905611834, "grad_norm": 2.708853006362915, "learning_rate": 9.738640078457014e-06, "loss": 0.3635, "step": 2600 }, { "epoch": 0.9004663129120437, "grad_norm": 2.8617489337921143, "learning_rate": 9.705949656750572e-06, "loss": 0.354, "step": 2800 }, { "epoch": 0.964785335262904, "grad_norm": 2.758882999420166, "learning_rate": 9.673259235044132e-06, "loss": 0.3474, "step": 3000 }, { "epoch": 1.0289435600578871, "grad_norm": 2.680941343307495, "learning_rate": 9.640568813337693e-06, "loss": 0.3395, "step": 3200 }, { "epoch": 1.0932625824087474, "grad_norm": 2.918921709060669, "learning_rate": 9.607878391631253e-06, "loss": 0.3299, "step": 3400 }, { "epoch": 1.1575816047596077, "grad_norm": 2.6285266876220703, "learning_rate": 9.575187969924813e-06, "loss": 0.3231, "step": 3600 }, { "epoch": 1.221900627110468, "grad_norm": 2.9504473209381104, "learning_rate": 9.542497548218372e-06, "loss": 0.3217, "step": 3800 }, { "epoch": 1.2862196494613283, "grad_norm": 2.8353495597839355, "learning_rate": 9.509807126511932e-06, "loss": 0.32, "step": 4000 }, { "epoch": 1.3505386718121883, "grad_norm": 2.4936184883117676, "learning_rate": 9.477280156914024e-06, "loss": 0.3141, "step": 4200 }, { "epoch": 1.4148576941630489, "grad_norm": 2.6884140968322754, "learning_rate": 9.444589735207586e-06, "loss": 0.3086, "step": 4400 }, { "epoch": 1.479176716513909, "grad_norm": 2.8961310386657715, "learning_rate": 9.411899313501145e-06, "loss": 0.308, "step": 4600 }, { "epoch": 1.5434957388647692, "grad_norm": 2.5737783908843994, "learning_rate": 9.379208891794705e-06, "loss": 0.3019, "step": 4800 }, { "epoch": 1.6078147612156295, "grad_norm": 2.5709033012390137, "learning_rate": 9.346518470088265e-06, "loss": 0.2974, "step": 5000 }, { "epoch": 1.6078147612156295, "eval_loss": 0.2771373987197876, "eval_runtime": 5695.1238, "eval_samples_per_second": 5.115, "eval_steps_per_second": 0.639, "eval_wer": 36.94923502637938, "step": 5000 }, { "epoch": 1.6721337835664898, "grad_norm": 2.3421154022216797, "learning_rate": 9.313828048381824e-06, "loss": 0.2959, "step": 5200 }, { "epoch": 1.73645280591735, "grad_norm": 2.662564277648926, "learning_rate": 9.281137626675386e-06, "loss": 0.2941, "step": 5400 }, { "epoch": 1.8007718282682104, "grad_norm": 2.4682416915893555, "learning_rate": 9.248447204968944e-06, "loss": 0.2932, "step": 5600 }, { "epoch": 1.8650908506190707, "grad_norm": 2.6210873126983643, "learning_rate": 9.215756783262505e-06, "loss": 0.2895, "step": 5800 }, { "epoch": 1.9294098729699307, "grad_norm": 2.6296238899230957, "learning_rate": 9.183066361556065e-06, "loss": 0.286, "step": 6000 }, { "epoch": 1.9937288953207912, "grad_norm": 2.486929178237915, "learning_rate": 9.150539391958157e-06, "loss": 0.2846, "step": 6200 }, { "epoch": 2.0578871201157742, "grad_norm": 2.202758550643921, "learning_rate": 9.117848970251717e-06, "loss": 0.2754, "step": 6400 }, { "epoch": 2.1222061424666343, "grad_norm": 2.4207451343536377, "learning_rate": 9.085158548545276e-06, "loss": 0.2745, "step": 6600 }, { "epoch": 2.186525164817495, "grad_norm": 2.1694822311401367, "learning_rate": 9.052468126838838e-06, "loss": 0.2732, "step": 6800 }, { "epoch": 2.250844187168355, "grad_norm": 2.7656798362731934, "learning_rate": 9.019777705132396e-06, "loss": 0.2709, "step": 7000 }, { "epoch": 2.3151632095192154, "grad_norm": 2.2421224117279053, "learning_rate": 8.987087283425957e-06, "loss": 0.2691, "step": 7200 }, { "epoch": 2.3794822318700755, "grad_norm": 2.469956874847412, "learning_rate": 8.954396861719517e-06, "loss": 0.268, "step": 7400 }, { "epoch": 2.443801254220936, "grad_norm": 2.4373421669006348, "learning_rate": 8.921706440013077e-06, "loss": 0.2656, "step": 7600 }, { "epoch": 2.508120276571796, "grad_norm": 2.5110926628112793, "learning_rate": 8.889016018306636e-06, "loss": 0.2668, "step": 7800 }, { "epoch": 2.5724392989226565, "grad_norm": 2.559202194213867, "learning_rate": 8.856325596600198e-06, "loss": 0.2623, "step": 8000 }, { "epoch": 2.6367583212735166, "grad_norm": 2.6701242923736572, "learning_rate": 8.82379862700229e-06, "loss": 0.2642, "step": 8200 }, { "epoch": 2.7010773436243767, "grad_norm": 2.3247900009155273, "learning_rate": 8.791108205295848e-06, "loss": 0.2621, "step": 8400 }, { "epoch": 2.765396365975237, "grad_norm": 2.2806427478790283, "learning_rate": 8.75841778358941e-06, "loss": 0.2601, "step": 8600 }, { "epoch": 2.8297153883260977, "grad_norm": 2.487159013748169, "learning_rate": 8.725727361882969e-06, "loss": 0.2596, "step": 8800 }, { "epoch": 2.8940344106769578, "grad_norm": 2.2094194889068604, "learning_rate": 8.693036940176529e-06, "loss": 0.2587, "step": 9000 }, { "epoch": 2.958353433027818, "grad_norm": 2.349698781967163, "learning_rate": 8.660346518470088e-06, "loss": 0.2569, "step": 9200 }, { "epoch": 3.0225116578228013, "grad_norm": 2.1951253414154053, "learning_rate": 8.62765609676365e-06, "loss": 0.2507, "step": 9400 }, { "epoch": 3.0868306801736614, "grad_norm": 2.2560884952545166, "learning_rate": 8.594965675057208e-06, "loss": 0.2469, "step": 9600 }, { "epoch": 3.1511497025245214, "grad_norm": 2.3260977268218994, "learning_rate": 8.562275253350769e-06, "loss": 0.2457, "step": 9800 }, { "epoch": 3.215468724875382, "grad_norm": 2.318385601043701, "learning_rate": 8.529584831644329e-06, "loss": 0.2463, "step": 10000 }, { "epoch": 3.215468724875382, "eval_loss": 0.23996804654598236, "eval_runtime": 5706.3154, "eval_samples_per_second": 5.105, "eval_steps_per_second": 0.638, "eval_wer": 33.16859977100777, "step": 10000 }, { "epoch": 3.279787747226242, "grad_norm": 2.278331756591797, "learning_rate": 8.496894409937889e-06, "loss": 0.2471, "step": 10200 }, { "epoch": 3.3441067695771025, "grad_norm": 2.1796886920928955, "learning_rate": 8.46436744033998e-06, "loss": 0.244, "step": 10400 }, { "epoch": 3.4084257919279626, "grad_norm": 2.3623690605163574, "learning_rate": 8.43167701863354e-06, "loss": 0.2453, "step": 10600 }, { "epoch": 3.472744814278823, "grad_norm": 2.242501974105835, "learning_rate": 8.398986596927102e-06, "loss": 0.2436, "step": 10800 }, { "epoch": 3.537063836629683, "grad_norm": 2.4463303089141846, "learning_rate": 8.366296175220661e-06, "loss": 0.244, "step": 11000 }, { "epoch": 3.6013828589805437, "grad_norm": 2.387897253036499, "learning_rate": 8.333605753514221e-06, "loss": 0.2436, "step": 11200 }, { "epoch": 3.6657018813314037, "grad_norm": 2.386038303375244, "learning_rate": 8.30091533180778e-06, "loss": 0.2431, "step": 11400 }, { "epoch": 3.730020903682264, "grad_norm": 2.2631173133850098, "learning_rate": 8.26822491010134e-06, "loss": 0.2399, "step": 11600 }, { "epoch": 3.7943399260331243, "grad_norm": 2.458087921142578, "learning_rate": 8.2355344883949e-06, "loss": 0.2409, "step": 11800 }, { "epoch": 3.858658948383985, "grad_norm": 2.400782585144043, "learning_rate": 8.202844066688462e-06, "loss": 0.2396, "step": 12000 }, { "epoch": 3.922977970734845, "grad_norm": 2.1437551975250244, "learning_rate": 8.170153644982021e-06, "loss": 0.2375, "step": 12200 }, { "epoch": 3.987296993085705, "grad_norm": 2.8162059783935547, "learning_rate": 8.137626675384113e-06, "loss": 0.2385, "step": 12400 }, { "epoch": 4.051455217880688, "grad_norm": 2.505800485610962, "learning_rate": 8.104936253677673e-06, "loss": 0.2317, "step": 12600 }, { "epoch": 4.1157742402315485, "grad_norm": 2.2435059547424316, "learning_rate": 8.072245831971233e-06, "loss": 0.2301, "step": 12800 }, { "epoch": 4.1800932625824085, "grad_norm": 2.208909034729004, "learning_rate": 8.039555410264793e-06, "loss": 0.2305, "step": 13000 }, { "epoch": 4.244412284933269, "grad_norm": 2.0333073139190674, "learning_rate": 8.006864988558352e-06, "loss": 0.2283, "step": 13200 }, { "epoch": 4.30873130728413, "grad_norm": 2.437255620956421, "learning_rate": 7.974174566851914e-06, "loss": 0.2285, "step": 13400 }, { "epoch": 4.37305032963499, "grad_norm": 2.1313934326171875, "learning_rate": 7.941484145145473e-06, "loss": 0.2268, "step": 13600 }, { "epoch": 4.43736935198585, "grad_norm": 2.3695502281188965, "learning_rate": 7.908793723439033e-06, "loss": 0.2281, "step": 13800 }, { "epoch": 4.50168837433671, "grad_norm": 2.3714799880981445, "learning_rate": 7.876103301732593e-06, "loss": 0.2285, "step": 14000 }, { "epoch": 4.566007396687571, "grad_norm": 2.148329019546509, "learning_rate": 7.843412880026154e-06, "loss": 0.2285, "step": 14200 }, { "epoch": 4.630326419038431, "grad_norm": 2.6452717781066895, "learning_rate": 7.810885910428245e-06, "loss": 0.2286, "step": 14400 }, { "epoch": 4.694645441389291, "grad_norm": 2.203331470489502, "learning_rate": 7.778195488721804e-06, "loss": 0.2262, "step": 14600 }, { "epoch": 4.758964463740151, "grad_norm": 2.239518642425537, "learning_rate": 7.745505067015366e-06, "loss": 0.2255, "step": 14800 }, { "epoch": 4.823283486091011, "grad_norm": 2.2322895526885986, "learning_rate": 7.712814645308925e-06, "loss": 0.2284, "step": 15000 }, { "epoch": 4.823283486091011, "eval_loss": 0.22340841591358185, "eval_runtime": 6283.1507, "eval_samples_per_second": 4.636, "eval_steps_per_second": 0.58, "eval_wer": 31.49793737232363, "step": 15000 }, { "epoch": 4.887602508441872, "grad_norm": 2.171865224838257, "learning_rate": 7.680124223602485e-06, "loss": 0.2286, "step": 15200 }, { "epoch": 4.951921530792732, "grad_norm": 2.2217211723327637, "learning_rate": 7.647433801896045e-06, "loss": 0.2237, "step": 15400 }, { "epoch": 5.0160797555877155, "grad_norm": 2.2957966327667236, "learning_rate": 7.614743380189605e-06, "loss": 0.2247, "step": 15600 }, { "epoch": 5.0803987779385755, "grad_norm": 2.2841222286224365, "learning_rate": 7.582052958483165e-06, "loss": 0.2184, "step": 15800 }, { "epoch": 5.144717800289436, "grad_norm": 2.9461894035339355, "learning_rate": 7.5493625367767255e-06, "loss": 0.2176, "step": 16000 }, { "epoch": 5.209036822640296, "grad_norm": 2.2747585773468018, "learning_rate": 7.516672115070285e-06, "loss": 0.2171, "step": 16200 }, { "epoch": 5.273355844991156, "grad_norm": 2.220026969909668, "learning_rate": 7.4841451454723765e-06, "loss": 0.217, "step": 16400 }, { "epoch": 5.337674867342017, "grad_norm": 2.335299253463745, "learning_rate": 7.451454723765937e-06, "loss": 0.218, "step": 16600 }, { "epoch": 5.401993889692877, "grad_norm": 2.2069528102874756, "learning_rate": 7.418764302059497e-06, "loss": 0.2172, "step": 16800 }, { "epoch": 5.466312912043737, "grad_norm": 2.500458002090454, "learning_rate": 7.386073880353057e-06, "loss": 0.2174, "step": 17000 }, { "epoch": 5.530631934394597, "grad_norm": 2.2738373279571533, "learning_rate": 7.353383458646617e-06, "loss": 0.2186, "step": 17200 }, { "epoch": 5.594950956745458, "grad_norm": 2.0394363403320312, "learning_rate": 7.3206930369401776e-06, "loss": 0.2168, "step": 17400 }, { "epoch": 5.659269979096318, "grad_norm": 2.0548949241638184, "learning_rate": 7.288002615233737e-06, "loss": 0.2167, "step": 17600 }, { "epoch": 5.723589001447178, "grad_norm": 2.0610129833221436, "learning_rate": 7.255312193527298e-06, "loss": 0.2154, "step": 17800 }, { "epoch": 5.787908023798038, "grad_norm": 2.0888965129852295, "learning_rate": 7.222621771820857e-06, "loss": 0.2157, "step": 18000 }, { "epoch": 5.852227046148899, "grad_norm": 2.3282668590545654, "learning_rate": 7.189931350114417e-06, "loss": 0.2162, "step": 18200 }, { "epoch": 5.916546068499759, "grad_norm": 2.2571957111358643, "learning_rate": 7.157404380516509e-06, "loss": 0.2174, "step": 18400 }, { "epoch": 5.980865090850619, "grad_norm": 2.201342821121216, "learning_rate": 7.124713958810069e-06, "loss": 0.2137, "step": 18600 }, { "epoch": 6.045023315645603, "grad_norm": 2.3466155529022217, "learning_rate": 7.09202353710363e-06, "loss": 0.2098, "step": 18800 }, { "epoch": 6.109342337996463, "grad_norm": 2.0573270320892334, "learning_rate": 7.059333115397189e-06, "loss": 0.2089, "step": 19000 }, { "epoch": 6.173661360347323, "grad_norm": 2.026381492614746, "learning_rate": 7.02664269369075e-06, "loss": 0.2085, "step": 19200 }, { "epoch": 6.237980382698183, "grad_norm": 2.2646751403808594, "learning_rate": 6.993952271984309e-06, "loss": 0.2074, "step": 19400 }, { "epoch": 6.302299405049043, "grad_norm": 2.0940823554992676, "learning_rate": 6.961261850277869e-06, "loss": 0.2086, "step": 19600 }, { "epoch": 6.366618427399904, "grad_norm": 2.2303521633148193, "learning_rate": 6.928571428571429e-06, "loss": 0.2096, "step": 19800 }, { "epoch": 6.430937449750764, "grad_norm": 2.2108261585235596, "learning_rate": 6.8958810068649895e-06, "loss": 0.2089, "step": 20000 }, { "epoch": 6.430937449750764, "eval_loss": 0.2148449569940567, "eval_runtime": 6212.1239, "eval_samples_per_second": 4.689, "eval_steps_per_second": 0.586, "eval_wer": 30.543488717322866, "step": 20000 }, { "epoch": 6.495256472101624, "grad_norm": 2.3403568267822266, "learning_rate": 6.863190585158549e-06, "loss": 0.2096, "step": 20200 }, { "epoch": 6.559575494452484, "grad_norm": 2.071211576461792, "learning_rate": 6.830663615560641e-06, "loss": 0.2076, "step": 20400 }, { "epoch": 6.623894516803345, "grad_norm": 2.031785488128662, "learning_rate": 6.797973193854202e-06, "loss": 0.2075, "step": 20600 }, { "epoch": 6.688213539154205, "grad_norm": 2.1697540283203125, "learning_rate": 6.765282772147761e-06, "loss": 0.2086, "step": 20800 }, { "epoch": 6.752532561505065, "grad_norm": 2.1642816066741943, "learning_rate": 6.732592350441321e-06, "loss": 0.2094, "step": 21000 }, { "epoch": 6.816851583855925, "grad_norm": 2.093594789505005, "learning_rate": 6.699901928734881e-06, "loss": 0.2065, "step": 21200 }, { "epoch": 6.881170606206785, "grad_norm": 2.0487377643585205, "learning_rate": 6.6672115070284415e-06, "loss": 0.2074, "step": 21400 }, { "epoch": 6.945489628557646, "grad_norm": 2.0511105060577393, "learning_rate": 6.634521085322001e-06, "loss": 0.2072, "step": 21600 }, { "epoch": 7.009647853352629, "grad_norm": 2.1460208892822266, "learning_rate": 6.601830663615562e-06, "loss": 0.204, "step": 21800 }, { "epoch": 7.07396687570349, "grad_norm": 2.093196392059326, "learning_rate": 6.569140241909121e-06, "loss": 0.2013, "step": 22000 }, { "epoch": 7.13828589805435, "grad_norm": 2.342313766479492, "learning_rate": 6.536449820202682e-06, "loss": 0.1994, "step": 22200 }, { "epoch": 7.20260492040521, "grad_norm": 2.05419921875, "learning_rate": 6.503759398496241e-06, "loss": 0.2023, "step": 22400 }, { "epoch": 7.26692394275607, "grad_norm": 2.248352289199829, "learning_rate": 6.471232428898333e-06, "loss": 0.2024, "step": 22600 }, { "epoch": 7.33124296510693, "grad_norm": 2.308692216873169, "learning_rate": 6.4385420071918935e-06, "loss": 0.2007, "step": 22800 }, { "epoch": 7.395561987457791, "grad_norm": 2.0985047817230225, "learning_rate": 6.405851585485453e-06, "loss": 0.1989, "step": 23000 }, { "epoch": 7.459881009808651, "grad_norm": 2.1786727905273438, "learning_rate": 6.373161163779014e-06, "loss": 0.2009, "step": 23200 }, { "epoch": 7.524200032159511, "grad_norm": 2.358017921447754, "learning_rate": 6.3404707420725734e-06, "loss": 0.2011, "step": 23400 }, { "epoch": 7.588519054510371, "grad_norm": 2.221282482147217, "learning_rate": 6.307780320366134e-06, "loss": 0.2, "step": 23600 }, { "epoch": 7.652838076861232, "grad_norm": 2.1816442012786865, "learning_rate": 6.275089898659693e-06, "loss": 0.1995, "step": 23800 }, { "epoch": 7.717157099212092, "grad_norm": 2.1074419021606445, "learning_rate": 6.242399476953253e-06, "loss": 0.2008, "step": 24000 }, { "epoch": 7.781476121562952, "grad_norm": 2.12418532371521, "learning_rate": 6.209709055246813e-06, "loss": 0.2029, "step": 24200 }, { "epoch": 7.845795143913812, "grad_norm": 2.0285849571228027, "learning_rate": 6.177018633540374e-06, "loss": 0.2005, "step": 24400 }, { "epoch": 7.910114166264673, "grad_norm": 2.1275014877319336, "learning_rate": 6.144491663942466e-06, "loss": 0.2014, "step": 24600 }, { "epoch": 7.974433188615533, "grad_norm": 2.047292709350586, "learning_rate": 6.1118012422360254e-06, "loss": 0.1999, "step": 24800 }, { "epoch": 8.038591413410517, "grad_norm": 2.2886574268341064, "learning_rate": 6.079110820529586e-06, "loss": 0.197, "step": 25000 }, { "epoch": 8.038591413410517, "eval_loss": 0.20965221524238586, "eval_runtime": 6307.1939, "eval_samples_per_second": 4.619, "eval_steps_per_second": 0.577, "eval_wer": 29.90140507668599, "step": 25000 }, { "epoch": 8.102910435761377, "grad_norm": 2.158328056335449, "learning_rate": 6.046420398823145e-06, "loss": 0.1942, "step": 25200 }, { "epoch": 8.167229458112237, "grad_norm": 2.0762622356414795, "learning_rate": 6.013729977116705e-06, "loss": 0.1941, "step": 25400 }, { "epoch": 8.231548480463097, "grad_norm": 2.2431387901306152, "learning_rate": 5.981039555410265e-06, "loss": 0.1951, "step": 25600 }, { "epoch": 8.295867502813957, "grad_norm": 1.9715090990066528, "learning_rate": 5.948349133703826e-06, "loss": 0.1946, "step": 25800 }, { "epoch": 8.360186525164817, "grad_norm": 2.1715543270111084, "learning_rate": 5.915658711997385e-06, "loss": 0.1949, "step": 26000 }, { "epoch": 8.424505547515677, "grad_norm": 2.1786696910858154, "learning_rate": 5.882968290290946e-06, "loss": 0.1954, "step": 26200 }, { "epoch": 8.488824569866537, "grad_norm": 2.2478559017181396, "learning_rate": 5.8502778685845056e-06, "loss": 0.1943, "step": 26400 }, { "epoch": 8.553143592217399, "grad_norm": 2.0193216800689697, "learning_rate": 5.817750898986597e-06, "loss": 0.1931, "step": 26600 }, { "epoch": 8.61746261456826, "grad_norm": 2.024120330810547, "learning_rate": 5.785060477280157e-06, "loss": 0.1944, "step": 26800 }, { "epoch": 8.68178163691912, "grad_norm": 1.9868948459625244, "learning_rate": 5.752370055573717e-06, "loss": 0.1967, "step": 27000 }, { "epoch": 8.74610065926998, "grad_norm": 2.0132243633270264, "learning_rate": 5.719679633867278e-06, "loss": 0.194, "step": 27200 }, { "epoch": 8.81041968162084, "grad_norm": 2.1960537433624268, "learning_rate": 5.686989212160837e-06, "loss": 0.1945, "step": 27400 }, { "epoch": 8.8747387039717, "grad_norm": 2.118748903274536, "learning_rate": 5.654298790454398e-06, "loss": 0.1943, "step": 27600 }, { "epoch": 8.93905772632256, "grad_norm": 2.0620296001434326, "learning_rate": 5.621608368747958e-06, "loss": 0.1948, "step": 27800 }, { "epoch": 9.003215951117543, "grad_norm": 2.0099258422851562, "learning_rate": 5.588917947041518e-06, "loss": 0.1953, "step": 28000 }, { "epoch": 9.067534973468403, "grad_norm": 2.1706490516662598, "learning_rate": 5.556227525335077e-06, "loss": 0.187, "step": 28200 }, { "epoch": 9.131853995819263, "grad_norm": 1.989235758781433, "learning_rate": 5.523537103628637e-06, "loss": 0.1896, "step": 28400 }, { "epoch": 9.196173018170123, "grad_norm": 2.4199020862579346, "learning_rate": 5.49101013403073e-06, "loss": 0.1889, "step": 28600 }, { "epoch": 9.260492040520983, "grad_norm": 2.318398952484131, "learning_rate": 5.458319712324289e-06, "loss": 0.1899, "step": 28800 }, { "epoch": 9.324811062871845, "grad_norm": 2.014559745788574, "learning_rate": 5.42562929061785e-06, "loss": 0.1899, "step": 29000 }, { "epoch": 9.389130085222705, "grad_norm": 2.0770111083984375, "learning_rate": 5.39293886891141e-06, "loss": 0.1904, "step": 29200 }, { "epoch": 9.453449107573565, "grad_norm": 2.1191565990448, "learning_rate": 5.36024844720497e-06, "loss": 0.1903, "step": 29400 }, { "epoch": 9.517768129924425, "grad_norm": 1.8838344812393188, "learning_rate": 5.327558025498529e-06, "loss": 0.191, "step": 29600 }, { "epoch": 9.582087152275285, "grad_norm": 2.064694404602051, "learning_rate": 5.2948676037920895e-06, "loss": 0.189, "step": 29800 }, { "epoch": 9.646406174626145, "grad_norm": 2.2259907722473145, "learning_rate": 5.262177182085649e-06, "loss": 0.1896, "step": 30000 }, { "epoch": 9.646406174626145, "eval_loss": 0.20603837072849274, "eval_runtime": 6469.2735, "eval_samples_per_second": 4.503, "eval_steps_per_second": 0.563, "eval_wer": 29.357595694795236, "step": 30000 }, { "epoch": 9.710725196977005, "grad_norm": 2.174774169921875, "learning_rate": 5.229486760379209e-06, "loss": 0.1898, "step": 30200 }, { "epoch": 9.775044219327865, "grad_norm": 2.170811414718628, "learning_rate": 5.1967963386727695e-06, "loss": 0.1885, "step": 30400 }, { "epoch": 9.839363241678726, "grad_norm": 2.454228401184082, "learning_rate": 5.164105916966329e-06, "loss": 0.1889, "step": 30600 }, { "epoch": 9.903682264029587, "grad_norm": 2.278226613998413, "learning_rate": 5.131578947368422e-06, "loss": 0.1891, "step": 30800 }, { "epoch": 9.968001286380447, "grad_norm": 2.151634931564331, "learning_rate": 5.098888525661981e-06, "loss": 0.1881, "step": 31000 }, { "epoch": 10.032159511175431, "grad_norm": 2.1410274505615234, "learning_rate": 5.0661981039555416e-06, "loss": 0.1861, "step": 31200 }, { "epoch": 10.096478533526291, "grad_norm": 2.1889536380767822, "learning_rate": 5.033507682249101e-06, "loss": 0.184, "step": 31400 }, { "epoch": 10.160797555877151, "grad_norm": 2.2102479934692383, "learning_rate": 5.000817260542662e-06, "loss": 0.1849, "step": 31600 }, { "epoch": 10.225116578228011, "grad_norm": 2.0018393993377686, "learning_rate": 4.9681268388362215e-06, "loss": 0.1833, "step": 31800 }, { "epoch": 10.289435600578871, "grad_norm": 2.114179849624634, "learning_rate": 4.935436417129781e-06, "loss": 0.1831, "step": 32000 }, { "epoch": 10.353754622929731, "grad_norm": 1.9474581480026245, "learning_rate": 4.902745995423342e-06, "loss": 0.1842, "step": 32200 }, { "epoch": 10.418073645280591, "grad_norm": 2.0356032848358154, "learning_rate": 4.8700555737169014e-06, "loss": 0.1852, "step": 32400 }, { "epoch": 10.482392667631451, "grad_norm": 2.0535342693328857, "learning_rate": 4.837365152010461e-06, "loss": 0.1866, "step": 32600 }, { "epoch": 10.546711689982311, "grad_norm": 2.2602412700653076, "learning_rate": 4.804838182412553e-06, "loss": 0.185, "step": 32800 }, { "epoch": 10.611030712333173, "grad_norm": 2.098829507827759, "learning_rate": 4.772147760706114e-06, "loss": 0.1844, "step": 33000 }, { "epoch": 10.675349734684033, "grad_norm": 2.0167388916015625, "learning_rate": 4.7394573389996735e-06, "loss": 0.1871, "step": 33200 }, { "epoch": 10.739668757034893, "grad_norm": 2.245697259902954, "learning_rate": 4.706766917293233e-06, "loss": 0.1866, "step": 33400 }, { "epoch": 10.803987779385753, "grad_norm": 1.9759703874588013, "learning_rate": 4.674076495586794e-06, "loss": 0.1849, "step": 33600 }, { "epoch": 10.868306801736614, "grad_norm": 2.1898162364959717, "learning_rate": 4.6413860738803535e-06, "loss": 0.1856, "step": 33800 }, { "epoch": 10.932625824087474, "grad_norm": 2.0218801498413086, "learning_rate": 4.608695652173913e-06, "loss": 0.1847, "step": 34000 }, { "epoch": 10.996944846438334, "grad_norm": 1.9428986310958862, "learning_rate": 4.576005230467474e-06, "loss": 0.1863, "step": 34200 }, { "epoch": 11.061103071233317, "grad_norm": 1.9156979322433472, "learning_rate": 4.543314808761033e-06, "loss": 0.1788, "step": 34400 }, { "epoch": 11.125422093584177, "grad_norm": 2.0453121662139893, "learning_rate": 4.510624387054593e-06, "loss": 0.1817, "step": 34600 }, { "epoch": 11.189741115935037, "grad_norm": 1.9229934215545654, "learning_rate": 4.477933965348154e-06, "loss": 0.1808, "step": 34800 }, { "epoch": 11.254060138285897, "grad_norm": 2.319345235824585, "learning_rate": 4.445406995750246e-06, "loss": 0.1793, "step": 35000 }, { "epoch": 11.254060138285897, "eval_loss": 0.20413178205490112, "eval_runtime": 6186.7151, "eval_samples_per_second": 4.708, "eval_steps_per_second": 0.589, "eval_wer": 29.14897509945317, "step": 35000 }, { "epoch": 11.31837916063676, "grad_norm": 2.07478404045105, "learning_rate": 4.4127165740438055e-06, "loss": 0.1798, "step": 35200 }, { "epoch": 11.38269818298762, "grad_norm": 2.0194761753082275, "learning_rate": 4.380026152337365e-06, "loss": 0.1792, "step": 35400 }, { "epoch": 11.44701720533848, "grad_norm": 1.903509497642517, "learning_rate": 4.347335730630926e-06, "loss": 0.1796, "step": 35600 }, { "epoch": 11.51133622768934, "grad_norm": 2.0315921306610107, "learning_rate": 4.314645308924485e-06, "loss": 0.1819, "step": 35800 }, { "epoch": 11.5756552500402, "grad_norm": 2.0309042930603027, "learning_rate": 4.281954887218046e-06, "loss": 0.1805, "step": 36000 }, { "epoch": 11.63997427239106, "grad_norm": 2.0719332695007324, "learning_rate": 4.249264465511606e-06, "loss": 0.1807, "step": 36200 }, { "epoch": 11.70429329474192, "grad_norm": 2.17317271232605, "learning_rate": 4.216574043805165e-06, "loss": 0.1832, "step": 36400 }, { "epoch": 11.76861231709278, "grad_norm": 2.0271966457366943, "learning_rate": 4.183883622098726e-06, "loss": 0.1804, "step": 36600 }, { "epoch": 11.83293133944364, "grad_norm": 2.235299825668335, "learning_rate": 4.151193200392286e-06, "loss": 0.1826, "step": 36800 }, { "epoch": 11.897250361794502, "grad_norm": 1.8323442935943604, "learning_rate": 4.118666230794378e-06, "loss": 0.1822, "step": 37000 }, { "epoch": 11.961569384145362, "grad_norm": 2.1653242111206055, "learning_rate": 4.0859758090879374e-06, "loss": 0.1829, "step": 37200 }, { "epoch": 12.025727608940343, "grad_norm": 2.1361212730407715, "learning_rate": 4.053285387381498e-06, "loss": 0.1792, "step": 37400 }, { "epoch": 12.090046631291205, "grad_norm": 2.1540911197662354, "learning_rate": 4.020594965675058e-06, "loss": 0.1779, "step": 37600 }, { "epoch": 12.154365653642065, "grad_norm": 2.157705783843994, "learning_rate": 3.987904543968617e-06, "loss": 0.1754, "step": 37800 }, { "epoch": 12.218684675992925, "grad_norm": 2.100783586502075, "learning_rate": 3.955214122262178e-06, "loss": 0.1761, "step": 38000 }, { "epoch": 12.283003698343785, "grad_norm": 1.9860684871673584, "learning_rate": 3.922523700555738e-06, "loss": 0.178, "step": 38200 }, { "epoch": 12.347322720694645, "grad_norm": 2.138315200805664, "learning_rate": 3.889833278849297e-06, "loss": 0.1769, "step": 38400 }, { "epoch": 12.411641743045505, "grad_norm": 1.8979172706604004, "learning_rate": 3.857142857142858e-06, "loss": 0.1761, "step": 38600 }, { "epoch": 12.475960765396366, "grad_norm": 2.144052505493164, "learning_rate": 3.8244524354364175e-06, "loss": 0.1772, "step": 38800 }, { "epoch": 12.540279787747226, "grad_norm": 2.2078232765197754, "learning_rate": 3.7919254658385097e-06, "loss": 0.1784, "step": 39000 }, { "epoch": 12.604598810098086, "grad_norm": 2.0689291954040527, "learning_rate": 3.75923504413207e-06, "loss": 0.179, "step": 39200 }, { "epoch": 12.668917832448948, "grad_norm": 2.1173605918884277, "learning_rate": 3.7265446224256295e-06, "loss": 0.1761, "step": 39400 }, { "epoch": 12.733236854799808, "grad_norm": 2.0470852851867676, "learning_rate": 3.6938542007191896e-06, "loss": 0.1791, "step": 39600 }, { "epoch": 12.797555877150668, "grad_norm": 2.237996816635132, "learning_rate": 3.6611637790127497e-06, "loss": 0.1791, "step": 39800 }, { "epoch": 12.861874899501528, "grad_norm": 2.107485055923462, "learning_rate": 3.62847335730631e-06, "loss": 0.1776, "step": 40000 }, { "epoch": 12.861874899501528, "eval_loss": 0.20320001244544983, "eval_runtime": 5730.0624, "eval_samples_per_second": 5.084, "eval_steps_per_second": 0.636, "eval_wer": 29.061075282609227, "step": 40000 }, { "epoch": 12.926193921852388, "grad_norm": 2.1215012073516846, "learning_rate": 3.5957829355998696e-06, "loss": 0.1792, "step": 40200 }, { "epoch": 12.990512944203248, "grad_norm": 2.1944947242736816, "learning_rate": 3.5630925138934297e-06, "loss": 0.177, "step": 40400 }, { "epoch": 13.054671168998231, "grad_norm": 2.1703405380249023, "learning_rate": 3.53040209218699e-06, "loss": 0.1754, "step": 40600 }, { "epoch": 13.118990191349091, "grad_norm": 2.224743366241455, "learning_rate": 3.49771167048055e-06, "loss": 0.1741, "step": 40800 }, { "epoch": 13.183309213699951, "grad_norm": 2.370253324508667, "learning_rate": 3.4650212487741096e-06, "loss": 0.1727, "step": 41000 }, { "epoch": 13.247628236050812, "grad_norm": 1.8502309322357178, "learning_rate": 3.4324942791762018e-06, "loss": 0.1732, "step": 41200 }, { "epoch": 13.311947258401672, "grad_norm": 2.1553802490234375, "learning_rate": 3.399803857469762e-06, "loss": 0.1731, "step": 41400 }, { "epoch": 13.376266280752532, "grad_norm": 2.113837242126465, "learning_rate": 3.3671134357633216e-06, "loss": 0.1743, "step": 41600 }, { "epoch": 13.440585303103393, "grad_norm": 1.9942282438278198, "learning_rate": 3.3344230140568817e-06, "loss": 0.1742, "step": 41800 }, { "epoch": 13.504904325454254, "grad_norm": 2.089869976043701, "learning_rate": 3.301732592350442e-06, "loss": 0.1754, "step": 42000 }, { "epoch": 13.569223347805114, "grad_norm": 1.8944735527038574, "learning_rate": 3.269042170644002e-06, "loss": 0.1742, "step": 42200 }, { "epoch": 13.633542370155974, "grad_norm": 2.06595516204834, "learning_rate": 3.2363517489375616e-06, "loss": 0.1736, "step": 42400 }, { "epoch": 13.697861392506834, "grad_norm": 2.0173680782318115, "learning_rate": 3.2036613272311218e-06, "loss": 0.1738, "step": 42600 }, { "epoch": 13.762180414857694, "grad_norm": 2.098628282546997, "learning_rate": 3.170970905524682e-06, "loss": 0.1735, "step": 42800 }, { "epoch": 13.826499437208554, "grad_norm": 2.226824998855591, "learning_rate": 3.138280483818241e-06, "loss": 0.1757, "step": 43000 }, { "epoch": 13.890818459559414, "grad_norm": 2.020033121109009, "learning_rate": 3.1057535142203337e-06, "loss": 0.1755, "step": 43200 }, { "epoch": 13.955137481910274, "grad_norm": 2.29543399810791, "learning_rate": 3.073063092513894e-06, "loss": 0.1762, "step": 43400 }, { "epoch": 14.019295706705257, "grad_norm": 1.9776209592819214, "learning_rate": 3.040372670807454e-06, "loss": 0.1735, "step": 43600 }, { "epoch": 14.083614729056118, "grad_norm": 1.7997843027114868, "learning_rate": 3.0076822491010137e-06, "loss": 0.1704, "step": 43800 }, { "epoch": 14.14793375140698, "grad_norm": 1.968531847000122, "learning_rate": 2.9749918273945738e-06, "loss": 0.1717, "step": 44000 }, { "epoch": 14.21225277375784, "grad_norm": 2.061990737915039, "learning_rate": 2.942301405688134e-06, "loss": 0.1726, "step": 44200 }, { "epoch": 14.2765717961087, "grad_norm": 2.0133299827575684, "learning_rate": 2.909610983981694e-06, "loss": 0.1714, "step": 44400 }, { "epoch": 14.34089081845956, "grad_norm": 2.0535309314727783, "learning_rate": 2.8769205622752537e-06, "loss": 0.1717, "step": 44600 }, { "epoch": 14.40520984081042, "grad_norm": 2.1071929931640625, "learning_rate": 2.8442301405688134e-06, "loss": 0.1722, "step": 44800 }, { "epoch": 14.46952886316128, "grad_norm": 2.0523617267608643, "learning_rate": 2.811539718862373e-06, "loss": 0.17, "step": 45000 }, { "epoch": 14.46952886316128, "eval_loss": 0.20234042406082153, "eval_runtime": 5780.3456, "eval_samples_per_second": 5.039, "eval_steps_per_second": 0.63, "eval_wer": 28.696460806301626, "step": 45000 } ], "logging_steps": 200, "max_steps": 62180, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.4178134111256576e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }