{ "best_metric": null, "best_model_checkpoint": null, "epoch": 39.96271826333176, "eval_steps": 100.0, "global_step": 42360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4719207173194903, "grad_norm": 60.370758056640625, "learning_rate": 1.188e-06, "loss": 28.5881, "step": 500 }, { "epoch": 0.9438414346389806, "grad_norm": 47.990840911865234, "learning_rate": 2.3880000000000003e-06, "loss": 11.3302, "step": 1000 }, { "epoch": 1.0, "eval_cer": 1.1283783783783783, "eval_loss": 10.027889251708984, "eval_runtime": 143.8913, "eval_samples_per_second": 47.536, "eval_steps_per_second": 5.942, "eval_wer": 1.0, "step": 1060 }, { "epoch": 1.4152902312411515, "grad_norm": 40.47557067871094, "learning_rate": 3.588e-06, "loss": 9.2112, "step": 1500 }, { "epoch": 1.8872109485606419, "grad_norm": 25.79726219177246, "learning_rate": 4.788e-06, "loss": 7.0068, "step": 2000 }, { "epoch": 2.0, "eval_cer": 1.1283487434803223, "eval_loss": 5.3823137283325195, "eval_runtime": 138.9276, "eval_samples_per_second": 49.234, "eval_steps_per_second": 6.154, "eval_wer": 1.0, "step": 2120 }, { "epoch": 2.3586597451628126, "grad_norm": 10.715571403503418, "learning_rate": 5.988e-06, "loss": 4.9098, "step": 2500 }, { "epoch": 2.830580462482303, "grad_norm": 2.4069929122924805, "learning_rate": 7.1880000000000005e-06, "loss": 3.682, "step": 3000 }, { "epoch": 3.0, "eval_cer": 1.1283487434803223, "eval_loss": 3.189443349838257, "eval_runtime": 133.8287, "eval_samples_per_second": 51.11, "eval_steps_per_second": 6.389, "eval_wer": 1.0, "step": 3180 }, { "epoch": 3.302029259084474, "grad_norm": 1.6513175964355469, "learning_rate": 8.388e-06, "loss": 3.1332, "step": 3500 }, { "epoch": 3.773949976403964, "grad_norm": 1.9935938119888306, "learning_rate": 9.588e-06, "loss": 2.725, "step": 4000 }, { "epoch": 4.0, "eval_cer": 1.1283783783783783, "eval_loss": 2.161285400390625, "eval_runtime": 133.493, "eval_samples_per_second": 51.239, "eval_steps_per_second": 6.405, "eval_wer": 1.0, "step": 4240 }, { "epoch": 4.245398773006135, "grad_norm": 2.0250744819641113, "learning_rate": 1.0787999999999999e-05, "loss": 2.2125, "step": 4500 }, { "epoch": 4.717319490325625, "grad_norm": 6.084537506103516, "learning_rate": 1.1988000000000001e-05, "loss": 1.4317, "step": 5000 }, { "epoch": 5.0, "eval_cer": 1.1232219061166429, "eval_loss": 0.7817353010177612, "eval_runtime": 140.0025, "eval_samples_per_second": 48.856, "eval_steps_per_second": 6.107, "eval_wer": 1.0, "step": 5300 }, { "epoch": 5.188768286927796, "grad_norm": 8.009575843811035, "learning_rate": 1.3188e-05, "loss": 0.8716, "step": 5500 }, { "epoch": 5.660689004247287, "grad_norm": 6.703843116760254, "learning_rate": 1.4388000000000002e-05, "loss": 0.5734, "step": 6000 }, { "epoch": 6.0, "eval_cer": 1.0337541488857278, "eval_loss": 0.2728799879550934, "eval_runtime": 137.2562, "eval_samples_per_second": 49.834, "eval_steps_per_second": 6.229, "eval_wer": 0.9991228070175439, "step": 6360 }, { "epoch": 6.132137800849457, "grad_norm": 6.002254009246826, "learning_rate": 1.5588e-05, "loss": 0.4288, "step": 6500 }, { "epoch": 6.604058518168948, "grad_norm": 8.313450813293457, "learning_rate": 1.6788e-05, "loss": 0.3485, "step": 7000 }, { "epoch": 7.0, "eval_cer": 1.0258712660028448, "eval_loss": 0.16624058783054352, "eval_runtime": 68.7385, "eval_samples_per_second": 99.508, "eval_steps_per_second": 12.438, "eval_wer": 0.9988304093567252, "step": 7420 }, { "epoch": 7.0755073147711185, "grad_norm": 2.5701239109039307, "learning_rate": 1.7988e-05, "loss": 0.2995, "step": 7500 }, { "epoch": 7.547428032090608, "grad_norm": 5.836677551269531, "learning_rate": 1.9188e-05, "loss": 0.2631, "step": 8000 }, { "epoch": 8.0, "eval_cer": 1.0223743480322427, "eval_loss": 0.083324134349823, "eval_runtime": 70.5926, "eval_samples_per_second": 96.894, "eval_steps_per_second": 12.112, "eval_wer": 0.9988304093567252, "step": 8480 }, { "epoch": 8.018876828692779, "grad_norm": 5.063877105712891, "learning_rate": 2.0388e-05, "loss": 0.2388, "step": 8500 }, { "epoch": 8.49079754601227, "grad_norm": 3.660646677017212, "learning_rate": 2.1588e-05, "loss": 0.218, "step": 9000 }, { "epoch": 8.96271826333176, "grad_norm": 6.175236225128174, "learning_rate": 2.2788000000000003e-05, "loss": 0.2068, "step": 9500 }, { "epoch": 9.0, "eval_cer": 1.0209518729255571, "eval_loss": 0.06159723177552223, "eval_runtime": 68.413, "eval_samples_per_second": 99.981, "eval_steps_per_second": 12.498, "eval_wer": 0.9989766081871345, "step": 9540 }, { "epoch": 9.43416705993393, "grad_norm": 0.2773754894733429, "learning_rate": 2.3988e-05, "loss": 0.2023, "step": 10000 }, { "epoch": 9.906087777253422, "grad_norm": 1.5312055349349976, "learning_rate": 2.5188e-05, "loss": 0.1846, "step": 10500 }, { "epoch": 10.0, "eval_cer": 1.0198850165955429, "eval_loss": 0.07861880213022232, "eval_runtime": 59.5147, "eval_samples_per_second": 114.929, "eval_steps_per_second": 14.366, "eval_wer": 0.9988304093567252, "step": 10600 }, { "epoch": 10.377536573855592, "grad_norm": 6.2259297370910645, "learning_rate": 2.6388000000000002e-05, "loss": 0.1781, "step": 11000 }, { "epoch": 10.849457291175083, "grad_norm": 5.571898460388184, "learning_rate": 2.7585600000000002e-05, "loss": 0.1792, "step": 11500 }, { "epoch": 11.0, "eval_cer": 1.019825746799431, "eval_loss": 0.0471838042140007, "eval_runtime": 65.0557, "eval_samples_per_second": 105.141, "eval_steps_per_second": 13.143, "eval_wer": 0.9989766081871345, "step": 11660 }, { "epoch": 11.320906087777253, "grad_norm": 3.878492832183838, "learning_rate": 2.87856e-05, "loss": 0.1662, "step": 12000 }, { "epoch": 11.792826805096745, "grad_norm": 6.27423095703125, "learning_rate": 2.99856e-05, "loss": 0.1669, "step": 12500 }, { "epoch": 12.0, "eval_cer": 1.0206555239449977, "eval_loss": 0.05153690651059151, "eval_runtime": 65.9815, "eval_samples_per_second": 103.665, "eval_steps_per_second": 12.958, "eval_wer": 0.9988304093567252, "step": 12720 }, { "epoch": 12.264275601698914, "grad_norm": 3.428331136703491, "learning_rate": 2.9979744753439937e-05, "loss": 0.159, "step": 13000 }, { "epoch": 12.736196319018404, "grad_norm": 3.60664701461792, "learning_rate": 2.991821289408868e-05, "loss": 0.1609, "step": 13500 }, { "epoch": 13.0, "eval_cer": 1.0218705547652915, "eval_loss": 0.05291549116373062, "eval_runtime": 81.1439, "eval_samples_per_second": 84.295, "eval_steps_per_second": 10.537, "eval_wer": 0.9988304093567252, "step": 13780 }, { "epoch": 13.207645115620576, "grad_norm": 3.0601279735565186, "learning_rate": 2.981532510892707e-05, "loss": 0.156, "step": 14000 }, { "epoch": 13.679565832940066, "grad_norm": 1.1705658435821533, "learning_rate": 2.9671447959906427e-05, "loss": 0.1508, "step": 14500 }, { "epoch": 14.0, "eval_cer": 1.0183143669985775, "eval_loss": 0.043218065053224564, "eval_runtime": 72.3333, "eval_samples_per_second": 94.562, "eval_steps_per_second": 11.82, "eval_wer": 0.9988304093567252, "step": 14840 }, { "epoch": 14.151014629542237, "grad_norm": 3.3974997997283936, "learning_rate": 2.9486979510030355e-05, "loss": 0.1454, "step": 15000 }, { "epoch": 14.622935346861727, "grad_norm": 1.8044121265411377, "learning_rate": 2.9262430125717204e-05, "loss": 0.1427, "step": 15500 }, { "epoch": 15.0, "eval_cer": 1.0147878141299194, "eval_loss": 0.08599487692117691, "eval_runtime": 59.8056, "eval_samples_per_second": 114.37, "eval_steps_per_second": 14.296, "eval_wer": 0.9988304093567252, "step": 15900 }, { "epoch": 15.094384143463898, "grad_norm": 2.6493678092956543, "learning_rate": 2.899842106477605e-05, "loss": 0.1398, "step": 16000 }, { "epoch": 15.566304860783388, "grad_norm": 4.24639892578125, "learning_rate": 2.869568275758094e-05, "loss": 0.1316, "step": 16500 }, { "epoch": 16.0, "eval_cer": 1.0185218112849692, "eval_loss": 0.0349772572517395, "eval_runtime": 60.1896, "eval_samples_per_second": 113.641, "eval_steps_per_second": 14.205, "eval_wer": 0.9988304093567252, "step": 16960 }, { "epoch": 16.037753657385558, "grad_norm": 4.061686992645264, "learning_rate": 2.8355052786198856e-05, "loss": 0.1306, "step": 17000 }, { "epoch": 16.50967437470505, "grad_norm": 2.1003165245056152, "learning_rate": 2.7977473567062487e-05, "loss": 0.1248, "step": 17500 }, { "epoch": 16.98159509202454, "grad_norm": 3.3991761207580566, "learning_rate": 2.7563989743599158e-05, "loss": 0.1296, "step": 18000 }, { "epoch": 17.0, "eval_cer": 1.0190552394499763, "eval_loss": 0.04494578763842583, "eval_runtime": 86.2502, "eval_samples_per_second": 79.304, "eval_steps_per_second": 9.913, "eval_wer": 0.9988304093567252, "step": 18020 }, { "epoch": 17.45304388862671, "grad_norm": 3.5397164821624756, "learning_rate": 2.7116675667417045e-05, "loss": 0.1239, "step": 18500 }, { "epoch": 17.9249646059462, "grad_norm": 3.1016619205474854, "learning_rate": 2.663497648875447e-05, "loss": 0.1236, "step": 19000 }, { "epoch": 18.0, "eval_cer": 1.0170400663821717, "eval_loss": 0.04502090439200401, "eval_runtime": 67.5469, "eval_samples_per_second": 101.263, "eval_steps_per_second": 12.658, "eval_wer": 0.9988304093567252, "step": 19080 }, { "epoch": 18.396413402548372, "grad_norm": 1.3774545192718506, "learning_rate": 2.6121086974660007e-05, "loss": 0.1168, "step": 19500 }, { "epoch": 18.86833411986786, "grad_norm": 3.927860736846924, "learning_rate": 2.557642889644962e-05, "loss": 0.1149, "step": 20000 }, { "epoch": 19.0, "eval_cer": 1.018136557610242, "eval_loss": 0.03580431640148163, "eval_runtime": 62.0438, "eval_samples_per_second": 110.245, "eval_steps_per_second": 13.781, "eval_wer": 0.9989766081871345, "step": 20140 }, { "epoch": 19.339782916470032, "grad_norm": 1.4613689184188843, "learning_rate": 2.500368515269978e-05, "loss": 0.1097, "step": 20500 }, { "epoch": 19.811703633789524, "grad_norm": 1.7840094566345215, "learning_rate": 2.440214531030174e-05, "loss": 0.1149, "step": 21000 }, { "epoch": 20.0, "eval_cer": 1.0176327643432908, "eval_loss": 0.0317009836435318, "eval_runtime": 64.2537, "eval_samples_per_second": 106.453, "eval_steps_per_second": 13.307, "eval_wer": 0.9989766081871345, "step": 21200 }, { "epoch": 20.283152430391695, "grad_norm": 9.317741394042969, "learning_rate": 2.3774592676935842e-05, "loss": 0.1096, "step": 21500 }, { "epoch": 20.755073147711183, "grad_norm": 2.2778842449188232, "learning_rate": 2.3122763494165503e-05, "loss": 0.106, "step": 22000 }, { "epoch": 21.0, "eval_cer": 1.0170104314841157, "eval_loss": 0.03687233105301857, "eval_runtime": 70.0875, "eval_samples_per_second": 97.592, "eval_steps_per_second": 12.199, "eval_wer": 0.9988304093567252, "step": 22260 }, { "epoch": 21.226521944313355, "grad_norm": 5.864170551300049, "learning_rate": 2.2449830971856908e-05, "loss": 0.1042, "step": 22500 }, { "epoch": 21.698442661632846, "grad_norm": 1.5493875741958618, "learning_rate": 2.1754960402430945e-05, "loss": 0.102, "step": 23000 }, { "epoch": 22.0, "eval_cer": 1.018018018018018, "eval_loss": 0.034220367670059204, "eval_runtime": 60.8599, "eval_samples_per_second": 112.389, "eval_steps_per_second": 14.049, "eval_wer": 0.9988304093567252, "step": 23320 }, { "epoch": 22.169891458235018, "grad_norm": 2.6867339611053467, "learning_rate": 2.1041400973479267e-05, "loss": 0.099, "step": 23500 }, { "epoch": 22.641812175554506, "grad_norm": 6.666200637817383, "learning_rate": 2.031112688042812e-05, "loss": 0.1011, "step": 24000 }, { "epoch": 23.0, "eval_cer": 1.0178994784257942, "eval_loss": 0.04105741158127785, "eval_runtime": 63.1164, "eval_samples_per_second": 108.371, "eval_steps_per_second": 13.546, "eval_wer": 0.9988304093567252, "step": 24380 }, { "epoch": 23.113260972156677, "grad_norm": 2.3484959602355957, "learning_rate": 1.956766180067445e-05, "loss": 0.0991, "step": 24500 }, { "epoch": 23.58518168947617, "grad_norm": 3.396759510040283, "learning_rate": 1.881008353783706e-05, "loss": 0.0948, "step": 25000 }, { "epoch": 24.0, "eval_cer": 1.0163288288288288, "eval_loss": 0.031411658972501755, "eval_runtime": 60.6777, "eval_samples_per_second": 112.727, "eval_steps_per_second": 14.091, "eval_wer": 0.9988304093567252, "step": 25440 }, { "epoch": 24.05663048607834, "grad_norm": 0.44371920824050903, "learning_rate": 1.8041963967312025e-05, "loss": 0.0963, "step": 25500 }, { "epoch": 24.52855120339783, "grad_norm": 2.3664116859436035, "learning_rate": 1.7265428235347026e-05, "loss": 0.0906, "step": 26000 }, { "epoch": 25.0, "grad_norm": 2.171415090560913, "learning_rate": 1.6482624773072205e-05, "loss": 0.0934, "step": 26500 }, { "epoch": 25.0, "eval_cer": 1.017454954954955, "eval_loss": 0.030156882479786873, "eval_runtime": 61.4594, "eval_samples_per_second": 111.293, "eval_steps_per_second": 13.912, "eval_wer": 0.9988304093567252, "step": 26500 }, { "epoch": 25.47192071731949, "grad_norm": 3.737889051437378, "learning_rate": 1.5697295811286228e-05, "loss": 0.0844, "step": 27000 }, { "epoch": 25.94384143463898, "grad_norm": 5.188897609710693, "learning_rate": 1.4908467225519327e-05, "loss": 0.0843, "step": 27500 }, { "epoch": 26.0, "eval_cer": 1.0172475106685632, "eval_loss": 0.04401924088597298, "eval_runtime": 59.6879, "eval_samples_per_second": 114.596, "eval_steps_per_second": 14.325, "eval_wer": 0.9988304093567252, "step": 27560 }, { "epoch": 26.41529023124115, "grad_norm": 3.3139688968658447, "learning_rate": 1.4119891882267897e-05, "loss": 0.0789, "step": 28000 }, { "epoch": 26.887210948560643, "grad_norm": 3.2081096172332764, "learning_rate": 1.3333751522495247e-05, "loss": 0.0833, "step": 28500 }, { "epoch": 27.0, "eval_cer": 1.016743717401612, "eval_loss": 0.034057337790727615, "eval_runtime": 60.1277, "eval_samples_per_second": 113.758, "eval_steps_per_second": 14.22, "eval_wer": 0.9988304093567252, "step": 28620 }, { "epoch": 27.35865974516281, "grad_norm": 1.3224974870681763, "learning_rate": 1.2552221150328132e-05, "loss": 0.0749, "step": 29000 }, { "epoch": 27.830580462482303, "grad_norm": 0.9007667303085327, "learning_rate": 1.1779004344640104e-05, "loss": 0.0781, "step": 29500 }, { "epoch": 28.0, "eval_cer": 1.0157064959696538, "eval_loss": 0.05653638020157814, "eval_runtime": 63.0029, "eval_samples_per_second": 108.566, "eval_steps_per_second": 13.571, "eval_wer": 0.9988304093567252, "step": 29680 }, { "epoch": 28.302029259084474, "grad_norm": 1.9431039094924927, "learning_rate": 1.101314200491722e-05, "loss": 0.072, "step": 30000 }, { "epoch": 28.773949976403966, "grad_norm": 0.08316487073898315, "learning_rate": 1.0258310052432775e-05, "loss": 0.0741, "step": 30500 }, { "epoch": 29.0, "eval_cer": 1.0160917496443813, "eval_loss": 0.03570393845438957, "eval_runtime": 61.5019, "eval_samples_per_second": 111.216, "eval_steps_per_second": 13.902, "eval_wer": 0.9988304093567252, "step": 30740 }, { "epoch": 29.245398773006134, "grad_norm": 0.031967077404260635, "learning_rate": 9.516596870757504e-06, "loss": 0.0717, "step": 31000 }, { "epoch": 29.717319490325625, "grad_norm": 0.1671179085969925, "learning_rate": 8.79005454793263e-06, "loss": 0.0704, "step": 31500 }, { "epoch": 30.0, "eval_cer": 1.016239924134661, "eval_loss": 0.030619077384471893, "eval_runtime": 59.7711, "eval_samples_per_second": 114.437, "eval_steps_per_second": 14.305, "eval_wer": 0.9988304093567252, "step": 31800 }, { "epoch": 30.188768286927797, "grad_norm": 4.69877815246582, "learning_rate": 8.080693198985019e-06, "loss": 0.0723, "step": 32000 }, { "epoch": 30.660689004247285, "grad_norm": 1.8107829093933105, "learning_rate": 7.39319559220429e-06, "loss": 0.0678, "step": 32500 }, { "epoch": 31.0, "eval_cer": 1.0159435751541015, "eval_loss": 0.02890847437083721, "eval_runtime": 62.096, "eval_samples_per_second": 110.152, "eval_steps_per_second": 13.769, "eval_wer": 0.9988304093567252, "step": 32860 }, { "epoch": 31.132137800849456, "grad_norm": 2.0504305362701416, "learning_rate": 6.7239430275857026e-06, "loss": 0.0628, "step": 33000 }, { "epoch": 31.604058518168948, "grad_norm": 3.536367654800415, "learning_rate": 6.077587720237289e-06, "loss": 0.067, "step": 33500 }, { "epoch": 32.0, "eval_cer": 1.0158843053579896, "eval_loss": 0.027865121141076088, "eval_runtime": 60.0098, "eval_samples_per_second": 113.981, "eval_steps_per_second": 14.248, "eval_wer": 0.9988304093567252, "step": 33920 }, { "epoch": 32.075507314771116, "grad_norm": 0.30863332748413086, "learning_rate": 5.4559179328316745e-06, "loss": 0.0617, "step": 34000 }, { "epoch": 32.54742803209061, "grad_norm": 1.2579513788223267, "learning_rate": 4.860653630940242e-06, "loss": 0.0641, "step": 34500 }, { "epoch": 33.0, "eval_cer": 1.0159732100521575, "eval_loss": 0.032531462609767914, "eval_runtime": 61.6714, "eval_samples_per_second": 110.91, "eval_steps_per_second": 13.864, "eval_wer": 0.9988304093567252, "step": 34980 }, { "epoch": 33.01887682869278, "grad_norm": 1.1170421838760376, "learning_rate": 4.293441724427136e-06, "loss": 0.0582, "step": 35000 }, { "epoch": 33.49079754601227, "grad_norm": 2.8375914096832275, "learning_rate": 3.7558515109655663e-06, "loss": 0.0609, "step": 35500 }, { "epoch": 33.96271826333176, "grad_norm": 0.19227160513401031, "learning_rate": 3.249370334282695e-06, "loss": 0.0595, "step": 36000 }, { "epoch": 34.0, "eval_cer": 1.0166251778093884, "eval_loss": 0.033033788204193115, "eval_runtime": 59.4597, "eval_samples_per_second": 115.036, "eval_steps_per_second": 14.379, "eval_wer": 0.9988304093567252, "step": 36040 }, { "epoch": 34.434167059933934, "grad_norm": 1.7270156145095825, "learning_rate": 2.775399469145443e-06, "loss": 0.0614, "step": 36500 }, { "epoch": 34.90608777725342, "grad_norm": 0.2758292257785797, "learning_rate": 2.336095963596342e-06, "loss": 0.055, "step": 37000 }, { "epoch": 35.0, "eval_cer": 1.0164177335229967, "eval_loss": 0.030884480103850365, "eval_runtime": 58.7794, "eval_samples_per_second": 116.367, "eval_steps_per_second": 14.546, "eval_wer": 0.9988304093567252, "step": 37100 }, { "epoch": 35.37753657385559, "grad_norm": 2.685821771621704, "learning_rate": 1.9309149042102644e-06, "loss": 0.0564, "step": 37500 }, { "epoch": 35.849457291175085, "grad_norm": 2.9631125926971436, "learning_rate": 1.5618919095549582e-06, "loss": 0.055, "step": 38000 }, { "epoch": 36.0, "eval_cer": 1.016714082503556, "eval_loss": 0.03032144159078598, "eval_runtime": 71.6691, "eval_samples_per_second": 95.439, "eval_steps_per_second": 11.93, "eval_wer": 0.9988304093567252, "step": 38160 }, { "epoch": 36.32090608777725, "grad_norm": 0.7306509613990784, "learning_rate": 1.2300479506647383e-06, "loss": 0.0554, "step": 38500 }, { "epoch": 36.792826805096745, "grad_norm": 1.8589800596237183, "learning_rate": 9.363011358278406e-07, "loss": 0.0555, "step": 39000 }, { "epoch": 37.0, "eval_cer": 1.0165955429113325, "eval_loss": 0.03112892434000969, "eval_runtime": 70.4338, "eval_samples_per_second": 97.112, "eval_steps_per_second": 12.139, "eval_wer": 0.9988304093567252, "step": 39220 }, { "epoch": 37.26427560169891, "grad_norm": 1.8065155744552612, "learning_rate": 6.814641704683633e-07, "loss": 0.0568, "step": 39500 }, { "epoch": 37.736196319018404, "grad_norm": 1.0842024087905884, "learning_rate": 4.6663260289360743e-07, "loss": 0.0533, "step": 40000 }, { "epoch": 38.0, "eval_cer": 1.0165659080132765, "eval_loss": 0.031128019094467163, "eval_runtime": 62.073, "eval_samples_per_second": 110.193, "eval_steps_per_second": 13.774, "eval_wer": 0.9988304093567252, "step": 40280 }, { "epoch": 38.20764511562058, "grad_norm": 1.8371340036392212, "learning_rate": 2.915399553636205e-07, "loss": 0.0571, "step": 40500 }, { "epoch": 38.679565832940064, "grad_norm": 1.5724213123321533, "learning_rate": 1.5714100953393373e-07, "loss": 0.0528, "step": 41000 }, { "epoch": 39.0, "eval_cer": 1.0165659080132765, "eval_loss": 0.031000742688775063, "eval_runtime": 61.7575, "eval_samples_per_second": 110.756, "eval_steps_per_second": 13.844, "eval_wer": 0.9988304093567252, "step": 41340 }, { "epoch": 39.15101462954224, "grad_norm": 1.2510491609573364, "learning_rate": 6.380760518884532e-08, "loss": 0.0568, "step": 41500 }, { "epoch": 39.62293534686173, "grad_norm": 0.06529413163661957, "learning_rate": 1.1860630053380229e-08, "loss": 0.0509, "step": 42000 }, { "epoch": 39.96271826333176, "eval_cer": 1.0165659080132765, "eval_loss": 0.03096253238618374, "eval_runtime": 109.6526, "eval_samples_per_second": 62.379, "eval_steps_per_second": 7.797, "eval_wer": 0.9988304093567252, "step": 42360 }, { "epoch": 39.96271826333176, "step": 42360, "total_flos": 2.2101401353322127e+19, "train_loss": 0.9936401144753547, "train_runtime": 36758.8562, "train_samples_per_second": 73.778, "train_steps_per_second": 1.152 } ], "logging_steps": 500, "max_steps": 42360, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.2101401353322127e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }