{ "best_metric": 8.593125499600319, "best_model_checkpoint": "kotoba_v2_enc_logs_epoch2_2/checkpoint-20500", "epoch": 0.08395413705427207, "eval_steps": 500, "global_step": 24500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.4266994716029415e-05, "grad_norm": 1.0561553239822388, "learning_rate": 1e-05, "loss": 0.2361, "step": 10 }, { "epoch": 6.853398943205883e-05, "grad_norm": 1.1626238822937012, "learning_rate": 1e-05, "loss": 0.2265, "step": 20 }, { "epoch": 0.00010280098414808825, "grad_norm": 0.9845689535140991, "learning_rate": 1e-05, "loss": 0.2279, "step": 30 }, { "epoch": 0.00013706797886411766, "grad_norm": 1.142356276512146, "learning_rate": 1e-05, "loss": 0.2382, "step": 40 }, { "epoch": 0.00017133497358014707, "grad_norm": 1.0053240060806274, "learning_rate": 1e-05, "loss": 0.2473, "step": 50 }, { "epoch": 0.0002056019682961765, "grad_norm": 1.1098105907440186, "learning_rate": 1e-05, "loss": 0.2438, "step": 60 }, { "epoch": 0.0002398689630122059, "grad_norm": 1.191983699798584, "learning_rate": 1e-05, "loss": 0.2293, "step": 70 }, { "epoch": 0.0002741359577282353, "grad_norm": 1.1295104026794434, "learning_rate": 1e-05, "loss": 0.2362, "step": 80 }, { "epoch": 0.0003084029524442647, "grad_norm": 1.037972092628479, "learning_rate": 1e-05, "loss": 0.2455, "step": 90 }, { "epoch": 0.00034266994716029413, "grad_norm": 1.1975648403167725, "learning_rate": 1e-05, "loss": 0.2459, "step": 100 }, { "epoch": 0.00037693694187632354, "grad_norm": 1.0676342248916626, "learning_rate": 1e-05, "loss": 0.2271, "step": 110 }, { "epoch": 0.000411203936592353, "grad_norm": 1.0749495029449463, "learning_rate": 1e-05, "loss": 0.2417, "step": 120 }, { "epoch": 0.0004454709313083824, "grad_norm": 1.094260811805725, "learning_rate": 1e-05, "loss": 0.2354, "step": 130 }, { "epoch": 0.0004797379260244118, "grad_norm": 1.0395853519439697, "learning_rate": 1e-05, "loss": 0.2381, "step": 140 }, { "epoch": 0.0005140049207404412, "grad_norm": 1.2008885145187378, "learning_rate": 1e-05, "loss": 0.2354, "step": 150 }, { "epoch": 0.0005482719154564706, "grad_norm": 1.0647832155227661, "learning_rate": 1e-05, "loss": 0.2321, "step": 160 }, { "epoch": 0.0005825389101725, "grad_norm": 1.327071189880371, "learning_rate": 1e-05, "loss": 0.238, "step": 170 }, { "epoch": 0.0006168059048885295, "grad_norm": 1.1184055805206299, "learning_rate": 1e-05, "loss": 0.2242, "step": 180 }, { "epoch": 0.0006510728996045589, "grad_norm": 1.2512784004211426, "learning_rate": 1e-05, "loss": 0.2437, "step": 190 }, { "epoch": 0.0006853398943205883, "grad_norm": 1.0614465475082397, "learning_rate": 1e-05, "loss": 0.2382, "step": 200 }, { "epoch": 0.0007196068890366177, "grad_norm": 1.0607149600982666, "learning_rate": 1e-05, "loss": 0.2381, "step": 210 }, { "epoch": 0.0007538738837526471, "grad_norm": 1.0422028303146362, "learning_rate": 1e-05, "loss": 0.2294, "step": 220 }, { "epoch": 0.0007881408784686765, "grad_norm": 1.0162984132766724, "learning_rate": 1e-05, "loss": 0.2275, "step": 230 }, { "epoch": 0.000822407873184706, "grad_norm": 1.1085543632507324, "learning_rate": 1e-05, "loss": 0.2161, "step": 240 }, { "epoch": 0.0008566748679007354, "grad_norm": 1.1854636669158936, "learning_rate": 1e-05, "loss": 0.2382, "step": 250 }, { "epoch": 0.0008909418626167648, "grad_norm": 1.40137779712677, "learning_rate": 1e-05, "loss": 0.2579, "step": 260 }, { "epoch": 0.0009252088573327942, "grad_norm": 1.0814112424850464, "learning_rate": 1e-05, "loss": 0.2612, "step": 270 }, { "epoch": 0.0009594758520488236, "grad_norm": 1.083736538887024, "learning_rate": 1e-05, "loss": 0.2711, "step": 280 }, { "epoch": 0.000993742846764853, "grad_norm": 1.0861411094665527, "learning_rate": 1e-05, "loss": 0.2642, "step": 290 }, { "epoch": 0.0010280098414808825, "grad_norm": 1.1141265630722046, "learning_rate": 1e-05, "loss": 0.2585, "step": 300 }, { "epoch": 0.0010622768361969119, "grad_norm": 1.326241374015808, "learning_rate": 1e-05, "loss": 0.2858, "step": 310 }, { "epoch": 0.0010965438309129413, "grad_norm": 1.393750786781311, "learning_rate": 1e-05, "loss": 0.2635, "step": 320 }, { "epoch": 0.0011308108256289707, "grad_norm": 1.0851459503173828, "learning_rate": 1e-05, "loss": 0.2565, "step": 330 }, { "epoch": 0.001165077820345, "grad_norm": 1.2323757410049438, "learning_rate": 1e-05, "loss": 0.2465, "step": 340 }, { "epoch": 0.0011993448150610295, "grad_norm": 1.376953125, "learning_rate": 1e-05, "loss": 0.2671, "step": 350 }, { "epoch": 0.001233611809777059, "grad_norm": 1.084592580795288, "learning_rate": 1e-05, "loss": 0.2643, "step": 360 }, { "epoch": 0.0012678788044930883, "grad_norm": 1.2907005548477173, "learning_rate": 1e-05, "loss": 0.2584, "step": 370 }, { "epoch": 0.0013021457992091177, "grad_norm": 1.0698130130767822, "learning_rate": 1e-05, "loss": 0.2526, "step": 380 }, { "epoch": 0.0013364127939251471, "grad_norm": 1.1399807929992676, "learning_rate": 1e-05, "loss": 0.2759, "step": 390 }, { "epoch": 0.0013706797886411765, "grad_norm": 1.1480791568756104, "learning_rate": 1e-05, "loss": 0.2499, "step": 400 }, { "epoch": 0.001404946783357206, "grad_norm": 1.3095237016677856, "learning_rate": 1e-05, "loss": 0.2536, "step": 410 }, { "epoch": 0.0014392137780732353, "grad_norm": 1.068246841430664, "learning_rate": 1e-05, "loss": 0.2604, "step": 420 }, { "epoch": 0.0014734807727892648, "grad_norm": 1.2310419082641602, "learning_rate": 1e-05, "loss": 0.2632, "step": 430 }, { "epoch": 0.0015077477675052942, "grad_norm": 1.161867380142212, "learning_rate": 1e-05, "loss": 0.2584, "step": 440 }, { "epoch": 0.0015420147622213236, "grad_norm": 1.1461217403411865, "learning_rate": 1e-05, "loss": 0.2592, "step": 450 }, { "epoch": 0.001576281756937353, "grad_norm": 1.3006030321121216, "learning_rate": 1e-05, "loss": 0.2607, "step": 460 }, { "epoch": 0.0016105487516533824, "grad_norm": 1.1223125457763672, "learning_rate": 1e-05, "loss": 0.2433, "step": 470 }, { "epoch": 0.001644815746369412, "grad_norm": 1.2909380197525024, "learning_rate": 1e-05, "loss": 0.2693, "step": 480 }, { "epoch": 0.0016790827410854414, "grad_norm": 1.2270597219467163, "learning_rate": 1e-05, "loss": 0.2661, "step": 490 }, { "epoch": 0.0017133497358014708, "grad_norm": 1.1439770460128784, "learning_rate": 1e-05, "loss": 0.2517, "step": 500 }, { "epoch": 0.0017133497358014708, "eval_cer": 13.0358087846181, "eval_loss": 0.25224336981773376, "eval_normalized_cer": 9.4224620303757, "eval_runtime": 227.2174, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.035, "step": 500 }, { "epoch": 0.0017476167305175002, "grad_norm": 1.1377454996109009, "learning_rate": 1e-05, "loss": 0.2579, "step": 510 }, { "epoch": 0.0017818837252335296, "grad_norm": 1.2096498012542725, "learning_rate": 1e-05, "loss": 0.2727, "step": 520 }, { "epoch": 0.001816150719949559, "grad_norm": 1.187213659286499, "learning_rate": 1e-05, "loss": 0.2562, "step": 530 }, { "epoch": 0.0018504177146655885, "grad_norm": 0.969393253326416, "learning_rate": 1e-05, "loss": 0.2378, "step": 540 }, { "epoch": 0.0018846847093816179, "grad_norm": 0.9745528697967529, "learning_rate": 1e-05, "loss": 0.2774, "step": 550 }, { "epoch": 0.0019189517040976473, "grad_norm": 1.0725352764129639, "learning_rate": 1e-05, "loss": 0.2541, "step": 560 }, { "epoch": 0.0019532186988136767, "grad_norm": 1.217871904373169, "learning_rate": 1e-05, "loss": 0.2395, "step": 570 }, { "epoch": 0.001987485693529706, "grad_norm": 1.3582627773284912, "learning_rate": 1e-05, "loss": 0.2594, "step": 580 }, { "epoch": 0.0020217526882457355, "grad_norm": 1.2415379285812378, "learning_rate": 1e-05, "loss": 0.2582, "step": 590 }, { "epoch": 0.002056019682961765, "grad_norm": 0.9810131192207336, "learning_rate": 1e-05, "loss": 0.2284, "step": 600 }, { "epoch": 0.0020902866776777943, "grad_norm": 0.9806564450263977, "learning_rate": 1e-05, "loss": 0.2688, "step": 610 }, { "epoch": 0.0021245536723938237, "grad_norm": 1.2755467891693115, "learning_rate": 1e-05, "loss": 0.2591, "step": 620 }, { "epoch": 0.002158820667109853, "grad_norm": 0.9300326704978943, "learning_rate": 1e-05, "loss": 0.2444, "step": 630 }, { "epoch": 0.0021930876618258825, "grad_norm": 1.1276524066925049, "learning_rate": 1e-05, "loss": 0.236, "step": 640 }, { "epoch": 0.002227354656541912, "grad_norm": 1.1786876916885376, "learning_rate": 1e-05, "loss": 0.2443, "step": 650 }, { "epoch": 0.0022616216512579414, "grad_norm": 1.1702712774276733, "learning_rate": 1e-05, "loss": 0.2627, "step": 660 }, { "epoch": 0.0022958886459739708, "grad_norm": 1.2837899923324585, "learning_rate": 1e-05, "loss": 0.2378, "step": 670 }, { "epoch": 0.00233015564069, "grad_norm": 1.0623608827590942, "learning_rate": 1e-05, "loss": 0.2491, "step": 680 }, { "epoch": 0.0023644226354060296, "grad_norm": 1.1288243532180786, "learning_rate": 1e-05, "loss": 0.2773, "step": 690 }, { "epoch": 0.002398689630122059, "grad_norm": 1.0192692279815674, "learning_rate": 1e-05, "loss": 0.2492, "step": 700 }, { "epoch": 0.0024329566248380884, "grad_norm": 1.2274680137634277, "learning_rate": 1e-05, "loss": 0.2345, "step": 710 }, { "epoch": 0.002467223619554118, "grad_norm": 1.240645170211792, "learning_rate": 1e-05, "loss": 0.2624, "step": 720 }, { "epoch": 0.002501490614270147, "grad_norm": 1.0681366920471191, "learning_rate": 1e-05, "loss": 0.2553, "step": 730 }, { "epoch": 0.0025357576089861766, "grad_norm": 1.0161867141723633, "learning_rate": 1e-05, "loss": 0.2547, "step": 740 }, { "epoch": 0.002570024603702206, "grad_norm": 1.2384017705917358, "learning_rate": 1e-05, "loss": 0.2449, "step": 750 }, { "epoch": 0.0026042915984182354, "grad_norm": 1.1739261150360107, "learning_rate": 1e-05, "loss": 0.2523, "step": 760 }, { "epoch": 0.002638558593134265, "grad_norm": 1.0396535396575928, "learning_rate": 1e-05, "loss": 0.2535, "step": 770 }, { "epoch": 0.0026728255878502943, "grad_norm": 1.14767324924469, "learning_rate": 1e-05, "loss": 0.2594, "step": 780 }, { "epoch": 0.0027070925825663237, "grad_norm": 1.1783303022384644, "learning_rate": 1e-05, "loss": 0.2546, "step": 790 }, { "epoch": 0.002741359577282353, "grad_norm": 1.1065645217895508, "learning_rate": 1e-05, "loss": 0.2547, "step": 800 }, { "epoch": 0.0027756265719983825, "grad_norm": 1.256645917892456, "learning_rate": 1e-05, "loss": 0.2548, "step": 810 }, { "epoch": 0.002809893566714412, "grad_norm": 1.058158278465271, "learning_rate": 1e-05, "loss": 0.257, "step": 820 }, { "epoch": 0.0028441605614304413, "grad_norm": 1.0647656917572021, "learning_rate": 1e-05, "loss": 0.2479, "step": 830 }, { "epoch": 0.0028784275561464707, "grad_norm": 1.1984691619873047, "learning_rate": 1e-05, "loss": 0.2503, "step": 840 }, { "epoch": 0.0029126945508625, "grad_norm": 1.1380070447921753, "learning_rate": 1e-05, "loss": 0.245, "step": 850 }, { "epoch": 0.0029469615455785295, "grad_norm": 1.2131065130233765, "learning_rate": 1e-05, "loss": 0.242, "step": 860 }, { "epoch": 0.002981228540294559, "grad_norm": 1.1822234392166138, "learning_rate": 1e-05, "loss": 0.2613, "step": 870 }, { "epoch": 0.0030154955350105883, "grad_norm": 1.0591018199920654, "learning_rate": 1e-05, "loss": 0.2654, "step": 880 }, { "epoch": 0.0030497625297266177, "grad_norm": 1.2318428754806519, "learning_rate": 1e-05, "loss": 0.2525, "step": 890 }, { "epoch": 0.003084029524442647, "grad_norm": 1.0146839618682861, "learning_rate": 1e-05, "loss": 0.2609, "step": 900 }, { "epoch": 0.0031182965191586766, "grad_norm": 1.1508561372756958, "learning_rate": 1e-05, "loss": 0.2541, "step": 910 }, { "epoch": 0.003152563513874706, "grad_norm": 1.1494849920272827, "learning_rate": 1e-05, "loss": 0.2461, "step": 920 }, { "epoch": 0.0031868305085907354, "grad_norm": 1.2423807382583618, "learning_rate": 1e-05, "loss": 0.2573, "step": 930 }, { "epoch": 0.0032210975033067648, "grad_norm": 1.2714438438415527, "learning_rate": 1e-05, "loss": 0.2545, "step": 940 }, { "epoch": 0.0032553644980227946, "grad_norm": 1.2088007926940918, "learning_rate": 1e-05, "loss": 0.2773, "step": 950 }, { "epoch": 0.003289631492738824, "grad_norm": 1.0737963914871216, "learning_rate": 1e-05, "loss": 0.2495, "step": 960 }, { "epoch": 0.0033238984874548534, "grad_norm": 1.0942472219467163, "learning_rate": 1e-05, "loss": 0.2401, "step": 970 }, { "epoch": 0.003358165482170883, "grad_norm": 1.1282986402511597, "learning_rate": 1e-05, "loss": 0.2638, "step": 980 }, { "epoch": 0.0033924324768869123, "grad_norm": 1.0762425661087036, "learning_rate": 1e-05, "loss": 0.2619, "step": 990 }, { "epoch": 0.0034266994716029417, "grad_norm": 1.09200119972229, "learning_rate": 1e-05, "loss": 0.2464, "step": 1000 }, { "epoch": 0.0034266994716029417, "eval_cer": 13.80313988357735, "eval_loss": 0.25397512316703796, "eval_normalized_cer": 9.952038369304557, "eval_runtime": 227.5088, "eval_samples_per_second": 2.25, "eval_steps_per_second": 0.035, "step": 1000 }, { "epoch": 0.003460966466318971, "grad_norm": 0.9681844711303711, "learning_rate": 1e-05, "loss": 0.2567, "step": 1010 }, { "epoch": 0.0034952334610350005, "grad_norm": 1.0064711570739746, "learning_rate": 1e-05, "loss": 0.2514, "step": 1020 }, { "epoch": 0.00352950045575103, "grad_norm": 1.190294623374939, "learning_rate": 1e-05, "loss": 0.2654, "step": 1030 }, { "epoch": 0.0035637674504670593, "grad_norm": 1.332492709159851, "learning_rate": 1e-05, "loss": 0.2725, "step": 1040 }, { "epoch": 0.0035980344451830887, "grad_norm": 1.1110397577285767, "learning_rate": 1e-05, "loss": 0.2504, "step": 1050 }, { "epoch": 0.003632301439899118, "grad_norm": 1.2327215671539307, "learning_rate": 1e-05, "loss": 0.2733, "step": 1060 }, { "epoch": 0.0036665684346151475, "grad_norm": 1.1694815158843994, "learning_rate": 1e-05, "loss": 0.2611, "step": 1070 }, { "epoch": 0.003700835429331177, "grad_norm": 1.212570309638977, "learning_rate": 1e-05, "loss": 0.2556, "step": 1080 }, { "epoch": 0.0037351024240472063, "grad_norm": 1.1467297077178955, "learning_rate": 1e-05, "loss": 0.2485, "step": 1090 }, { "epoch": 0.0037693694187632357, "grad_norm": 0.9628469347953796, "learning_rate": 1e-05, "loss": 0.2523, "step": 1100 }, { "epoch": 0.003803636413479265, "grad_norm": 1.1593494415283203, "learning_rate": 1e-05, "loss": 0.2635, "step": 1110 }, { "epoch": 0.0038379034081952946, "grad_norm": 1.1376386880874634, "learning_rate": 1e-05, "loss": 0.2504, "step": 1120 }, { "epoch": 0.003872170402911324, "grad_norm": 1.129338026046753, "learning_rate": 1e-05, "loss": 0.2601, "step": 1130 }, { "epoch": 0.003906437397627353, "grad_norm": 1.0889575481414795, "learning_rate": 1e-05, "loss": 0.2455, "step": 1140 }, { "epoch": 0.003940704392343382, "grad_norm": 1.1437270641326904, "learning_rate": 1e-05, "loss": 0.253, "step": 1150 }, { "epoch": 0.003974971387059412, "grad_norm": 1.0283392667770386, "learning_rate": 1e-05, "loss": 0.2507, "step": 1160 }, { "epoch": 0.004009238381775441, "grad_norm": 1.130747675895691, "learning_rate": 1e-05, "loss": 0.2715, "step": 1170 }, { "epoch": 0.004043505376491471, "grad_norm": 1.3483778238296509, "learning_rate": 1e-05, "loss": 0.2742, "step": 1180 }, { "epoch": 0.0040777723712075, "grad_norm": 1.0879924297332764, "learning_rate": 1e-05, "loss": 0.2641, "step": 1190 }, { "epoch": 0.00411203936592353, "grad_norm": 1.1242927312850952, "learning_rate": 1e-05, "loss": 0.2586, "step": 1200 }, { "epoch": 0.004146306360639559, "grad_norm": 1.0185858011245728, "learning_rate": 1e-05, "loss": 0.2465, "step": 1210 }, { "epoch": 0.004180573355355589, "grad_norm": 0.9555259943008423, "learning_rate": 1e-05, "loss": 0.2528, "step": 1220 }, { "epoch": 0.004214840350071618, "grad_norm": 1.210371971130371, "learning_rate": 1e-05, "loss": 0.2613, "step": 1230 }, { "epoch": 0.0042491073447876474, "grad_norm": 1.1261368989944458, "learning_rate": 1e-05, "loss": 0.2551, "step": 1240 }, { "epoch": 0.004283374339503676, "grad_norm": 1.2142603397369385, "learning_rate": 1e-05, "loss": 0.264, "step": 1250 }, { "epoch": 0.004317641334219706, "grad_norm": 1.057758092880249, "learning_rate": 1e-05, "loss": 0.2587, "step": 1260 }, { "epoch": 0.004351908328935736, "grad_norm": 1.0871245861053467, "learning_rate": 1e-05, "loss": 0.2549, "step": 1270 }, { "epoch": 0.004386175323651765, "grad_norm": 1.1214648485183716, "learning_rate": 1e-05, "loss": 0.2582, "step": 1280 }, { "epoch": 0.004420442318367795, "grad_norm": 1.0265707969665527, "learning_rate": 1e-05, "loss": 0.2123, "step": 1290 }, { "epoch": 0.004454709313083824, "grad_norm": 1.1180216073989868, "learning_rate": 1e-05, "loss": 0.2245, "step": 1300 }, { "epoch": 0.004488976307799854, "grad_norm": 1.028238296508789, "learning_rate": 1e-05, "loss": 0.2118, "step": 1310 }, { "epoch": 0.004523243302515883, "grad_norm": 1.0321682691574097, "learning_rate": 1e-05, "loss": 0.2196, "step": 1320 }, { "epoch": 0.0045575102972319126, "grad_norm": 1.1180269718170166, "learning_rate": 1e-05, "loss": 0.2403, "step": 1330 }, { "epoch": 0.0045917772919479415, "grad_norm": 1.079560399055481, "learning_rate": 1e-05, "loss": 0.2309, "step": 1340 }, { "epoch": 0.004626044286663971, "grad_norm": 1.0062284469604492, "learning_rate": 1e-05, "loss": 0.228, "step": 1350 }, { "epoch": 0.00466031128138, "grad_norm": 1.1098395586013794, "learning_rate": 1e-05, "loss": 0.2435, "step": 1360 }, { "epoch": 0.00469457827609603, "grad_norm": 1.0619688034057617, "learning_rate": 1e-05, "loss": 0.2342, "step": 1370 }, { "epoch": 0.004728845270812059, "grad_norm": 1.1943925619125366, "learning_rate": 1e-05, "loss": 0.2315, "step": 1380 }, { "epoch": 0.004763112265528089, "grad_norm": 1.0958552360534668, "learning_rate": 1e-05, "loss": 0.2379, "step": 1390 }, { "epoch": 0.004797379260244118, "grad_norm": 1.0984197854995728, "learning_rate": 1e-05, "loss": 0.2208, "step": 1400 }, { "epoch": 0.004831646254960148, "grad_norm": 1.0741859674453735, "learning_rate": 1e-05, "loss": 0.2378, "step": 1410 }, { "epoch": 0.004865913249676177, "grad_norm": 1.1457058191299438, "learning_rate": 1e-05, "loss": 0.2516, "step": 1420 }, { "epoch": 0.004900180244392207, "grad_norm": 0.9849014282226562, "learning_rate": 1e-05, "loss": 0.2406, "step": 1430 }, { "epoch": 0.004934447239108236, "grad_norm": 1.1174912452697754, "learning_rate": 1e-05, "loss": 0.2122, "step": 1440 }, { "epoch": 0.0049687142338242654, "grad_norm": 1.0292854309082031, "learning_rate": 1e-05, "loss": 0.2349, "step": 1450 }, { "epoch": 0.005002981228540294, "grad_norm": 1.0343785285949707, "learning_rate": 1e-05, "loss": 0.2158, "step": 1460 }, { "epoch": 0.005037248223256324, "grad_norm": 1.1178008317947388, "learning_rate": 1e-05, "loss": 0.2264, "step": 1470 }, { "epoch": 0.005071515217972353, "grad_norm": 1.0238450765609741, "learning_rate": 1e-05, "loss": 0.2287, "step": 1480 }, { "epoch": 0.005105782212688383, "grad_norm": 1.1728886365890503, "learning_rate": 1e-05, "loss": 0.2373, "step": 1490 }, { "epoch": 0.005140049207404412, "grad_norm": 1.227034091949463, "learning_rate": 1e-05, "loss": 0.222, "step": 1500 }, { "epoch": 0.005140049207404412, "eval_cer": 13.150467454577527, "eval_loss": 0.25801682472229004, "eval_normalized_cer": 9.452438049560353, "eval_runtime": 227.9378, "eval_samples_per_second": 2.246, "eval_steps_per_second": 0.035, "step": 1500 }, { "epoch": 0.005174316202120442, "grad_norm": 1.0703920125961304, "learning_rate": 1e-05, "loss": 0.2156, "step": 1510 }, { "epoch": 0.005208583196836471, "grad_norm": 1.1343841552734375, "learning_rate": 1e-05, "loss": 0.2126, "step": 1520 }, { "epoch": 0.005242850191552501, "grad_norm": 1.1743741035461426, "learning_rate": 1e-05, "loss": 0.2491, "step": 1530 }, { "epoch": 0.00527711718626853, "grad_norm": 1.1476744413375854, "learning_rate": 1e-05, "loss": 0.236, "step": 1540 }, { "epoch": 0.0053113841809845595, "grad_norm": 1.0899590253829956, "learning_rate": 1e-05, "loss": 0.2361, "step": 1550 }, { "epoch": 0.0053456511757005885, "grad_norm": 1.0281250476837158, "learning_rate": 1e-05, "loss": 0.2226, "step": 1560 }, { "epoch": 0.005379918170416618, "grad_norm": 0.9932867884635925, "learning_rate": 1e-05, "loss": 0.2301, "step": 1570 }, { "epoch": 0.005414185165132647, "grad_norm": 1.1992309093475342, "learning_rate": 1e-05, "loss": 0.2179, "step": 1580 }, { "epoch": 0.005448452159848677, "grad_norm": 1.0017774105072021, "learning_rate": 1e-05, "loss": 0.2244, "step": 1590 }, { "epoch": 0.005482719154564706, "grad_norm": 1.0827686786651611, "learning_rate": 1e-05, "loss": 0.2313, "step": 1600 }, { "epoch": 0.005516986149280736, "grad_norm": 1.2260409593582153, "learning_rate": 1e-05, "loss": 0.229, "step": 1610 }, { "epoch": 0.005551253143996765, "grad_norm": 1.2530804872512817, "learning_rate": 1e-05, "loss": 0.2437, "step": 1620 }, { "epoch": 0.005585520138712795, "grad_norm": 1.068452000617981, "learning_rate": 1e-05, "loss": 0.2138, "step": 1630 }, { "epoch": 0.005619787133428824, "grad_norm": 1.3108712434768677, "learning_rate": 1e-05, "loss": 0.2284, "step": 1640 }, { "epoch": 0.005654054128144854, "grad_norm": 1.0919209718704224, "learning_rate": 1e-05, "loss": 0.213, "step": 1650 }, { "epoch": 0.005688321122860883, "grad_norm": 1.1530914306640625, "learning_rate": 1e-05, "loss": 0.2292, "step": 1660 }, { "epoch": 0.005722588117576912, "grad_norm": 1.084028959274292, "learning_rate": 1e-05, "loss": 0.2393, "step": 1670 }, { "epoch": 0.005756855112292941, "grad_norm": 1.247847557067871, "learning_rate": 1e-05, "loss": 0.2452, "step": 1680 }, { "epoch": 0.005791122107008971, "grad_norm": 1.03806734085083, "learning_rate": 1e-05, "loss": 0.2317, "step": 1690 }, { "epoch": 0.005825389101725, "grad_norm": 1.1643092632293701, "learning_rate": 1e-05, "loss": 0.2348, "step": 1700 }, { "epoch": 0.00585965609644103, "grad_norm": 1.1066207885742188, "learning_rate": 1e-05, "loss": 0.2348, "step": 1710 }, { "epoch": 0.005893923091157059, "grad_norm": 1.1813760995864868, "learning_rate": 1e-05, "loss": 0.2295, "step": 1720 }, { "epoch": 0.005928190085873089, "grad_norm": 1.1444518566131592, "learning_rate": 1e-05, "loss": 0.2101, "step": 1730 }, { "epoch": 0.005962457080589118, "grad_norm": 1.1485129594802856, "learning_rate": 1e-05, "loss": 0.2397, "step": 1740 }, { "epoch": 0.005996724075305148, "grad_norm": 1.1813607215881348, "learning_rate": 1e-05, "loss": 0.231, "step": 1750 }, { "epoch": 0.006030991070021177, "grad_norm": 1.4075005054473877, "learning_rate": 1e-05, "loss": 0.2306, "step": 1760 }, { "epoch": 0.0060652580647372065, "grad_norm": 1.2183804512023926, "learning_rate": 1e-05, "loss": 0.2227, "step": 1770 }, { "epoch": 0.0060995250594532355, "grad_norm": 1.3654927015304565, "learning_rate": 1e-05, "loss": 0.2341, "step": 1780 }, { "epoch": 0.006133792054169265, "grad_norm": 1.2806668281555176, "learning_rate": 1e-05, "loss": 0.2226, "step": 1790 }, { "epoch": 0.006168059048885294, "grad_norm": 1.2949618101119995, "learning_rate": 1e-05, "loss": 0.2698, "step": 1800 }, { "epoch": 0.006202326043601324, "grad_norm": 1.3080159425735474, "learning_rate": 1e-05, "loss": 0.2691, "step": 1810 }, { "epoch": 0.006236593038317353, "grad_norm": 1.1831908226013184, "learning_rate": 1e-05, "loss": 0.2644, "step": 1820 }, { "epoch": 0.006270860033033383, "grad_norm": 1.1216965913772583, "learning_rate": 1e-05, "loss": 0.2582, "step": 1830 }, { "epoch": 0.006305127027749412, "grad_norm": 1.1943161487579346, "learning_rate": 1e-05, "loss": 0.2769, "step": 1840 }, { "epoch": 0.006339394022465442, "grad_norm": 1.0856040716171265, "learning_rate": 1e-05, "loss": 0.2526, "step": 1850 }, { "epoch": 0.006373661017181471, "grad_norm": 1.1100040674209595, "learning_rate": 1e-05, "loss": 0.2576, "step": 1860 }, { "epoch": 0.006407928011897501, "grad_norm": 1.3369051218032837, "learning_rate": 1e-05, "loss": 0.2684, "step": 1870 }, { "epoch": 0.0064421950066135296, "grad_norm": 1.158797264099121, "learning_rate": 1e-05, "loss": 0.2474, "step": 1880 }, { "epoch": 0.006476462001329559, "grad_norm": 1.1821873188018799, "learning_rate": 1e-05, "loss": 0.272, "step": 1890 }, { "epoch": 0.006510728996045589, "grad_norm": 1.0739686489105225, "learning_rate": 1e-05, "loss": 0.2798, "step": 1900 }, { "epoch": 0.006544995990761618, "grad_norm": 1.0639653205871582, "learning_rate": 1e-05, "loss": 0.2682, "step": 1910 }, { "epoch": 0.006579262985477648, "grad_norm": 1.2149512767791748, "learning_rate": 1e-05, "loss": 0.2586, "step": 1920 }, { "epoch": 0.006613529980193677, "grad_norm": 1.1057014465332031, "learning_rate": 1e-05, "loss": 0.2719, "step": 1930 }, { "epoch": 0.006647796974909707, "grad_norm": 1.0929185152053833, "learning_rate": 1e-05, "loss": 0.2703, "step": 1940 }, { "epoch": 0.006682063969625736, "grad_norm": 1.0322917699813843, "learning_rate": 1e-05, "loss": 0.2477, "step": 1950 }, { "epoch": 0.006716330964341766, "grad_norm": 1.2460272312164307, "learning_rate": 1e-05, "loss": 0.2816, "step": 1960 }, { "epoch": 0.006750597959057795, "grad_norm": 1.2049859762191772, "learning_rate": 1e-05, "loss": 0.2648, "step": 1970 }, { "epoch": 0.0067848649537738245, "grad_norm": 1.1182633638381958, "learning_rate": 1e-05, "loss": 0.2549, "step": 1980 }, { "epoch": 0.0068191319484898535, "grad_norm": 1.1514990329742432, "learning_rate": 1e-05, "loss": 0.2695, "step": 1990 }, { "epoch": 0.006853398943205883, "grad_norm": 1.0150858163833618, "learning_rate": 1e-05, "loss": 0.2532, "step": 2000 }, { "epoch": 0.006853398943205883, "eval_cer": 13.565002645969306, "eval_loss": 0.2523655593395233, "eval_normalized_cer": 9.942046362909672, "eval_runtime": 226.5571, "eval_samples_per_second": 2.26, "eval_steps_per_second": 0.035, "step": 2000 }, { "epoch": 0.006887665937921912, "grad_norm": 1.0476700067520142, "learning_rate": 1e-05, "loss": 0.2555, "step": 2010 }, { "epoch": 0.006921932932637942, "grad_norm": 1.1178691387176514, "learning_rate": 1e-05, "loss": 0.2489, "step": 2020 }, { "epoch": 0.006956199927353971, "grad_norm": 1.2596313953399658, "learning_rate": 1e-05, "loss": 0.2884, "step": 2030 }, { "epoch": 0.006990466922070001, "grad_norm": 1.1929702758789062, "learning_rate": 1e-05, "loss": 0.262, "step": 2040 }, { "epoch": 0.00702473391678603, "grad_norm": 1.1269497871398926, "learning_rate": 1e-05, "loss": 0.2758, "step": 2050 }, { "epoch": 0.00705900091150206, "grad_norm": 1.1495511531829834, "learning_rate": 1e-05, "loss": 0.2668, "step": 2060 }, { "epoch": 0.007093267906218089, "grad_norm": 1.0648061037063599, "learning_rate": 1e-05, "loss": 0.2548, "step": 2070 }, { "epoch": 0.007127534900934119, "grad_norm": 1.3193435668945312, "learning_rate": 1e-05, "loss": 0.2743, "step": 2080 }, { "epoch": 0.0071618018956501476, "grad_norm": 1.2877907752990723, "learning_rate": 1e-05, "loss": 0.248, "step": 2090 }, { "epoch": 0.007196068890366177, "grad_norm": 1.2012474536895752, "learning_rate": 1e-05, "loss": 0.2662, "step": 2100 }, { "epoch": 0.007230335885082206, "grad_norm": 1.1491566896438599, "learning_rate": 1e-05, "loss": 0.2666, "step": 2110 }, { "epoch": 0.007264602879798236, "grad_norm": 1.1861019134521484, "learning_rate": 1e-05, "loss": 0.2618, "step": 2120 }, { "epoch": 0.007298869874514265, "grad_norm": 1.123963713645935, "learning_rate": 1e-05, "loss": 0.2646, "step": 2130 }, { "epoch": 0.007333136869230295, "grad_norm": 1.2697441577911377, "learning_rate": 1e-05, "loss": 0.2713, "step": 2140 }, { "epoch": 0.007367403863946324, "grad_norm": 0.9741083383560181, "learning_rate": 1e-05, "loss": 0.2463, "step": 2150 }, { "epoch": 0.007401670858662354, "grad_norm": 1.0292670726776123, "learning_rate": 1e-05, "loss": 0.2542, "step": 2160 }, { "epoch": 0.007435937853378383, "grad_norm": 1.0958001613616943, "learning_rate": 1e-05, "loss": 0.2463, "step": 2170 }, { "epoch": 0.007470204848094413, "grad_norm": 1.166869044303894, "learning_rate": 1e-05, "loss": 0.2454, "step": 2180 }, { "epoch": 0.007504471842810442, "grad_norm": 1.2552424669265747, "learning_rate": 1e-05, "loss": 0.2498, "step": 2190 }, { "epoch": 0.0075387388375264715, "grad_norm": 1.1589868068695068, "learning_rate": 1e-05, "loss": 0.2659, "step": 2200 }, { "epoch": 0.0075730058322425004, "grad_norm": 1.1640287637710571, "learning_rate": 1e-05, "loss": 0.257, "step": 2210 }, { "epoch": 0.00760727282695853, "grad_norm": 1.0953587293624878, "learning_rate": 1e-05, "loss": 0.2444, "step": 2220 }, { "epoch": 0.007641539821674559, "grad_norm": 1.2174441814422607, "learning_rate": 1e-05, "loss": 0.2626, "step": 2230 }, { "epoch": 0.007675806816390589, "grad_norm": 1.1194220781326294, "learning_rate": 1e-05, "loss": 0.241, "step": 2240 }, { "epoch": 0.007710073811106618, "grad_norm": 1.0677419900894165, "learning_rate": 1e-05, "loss": 0.2718, "step": 2250 }, { "epoch": 0.007744340805822648, "grad_norm": 1.0956069231033325, "learning_rate": 1e-05, "loss": 0.2493, "step": 2260 }, { "epoch": 0.007778607800538677, "grad_norm": 1.1772819757461548, "learning_rate": 1e-05, "loss": 0.2614, "step": 2270 }, { "epoch": 0.007812874795254707, "grad_norm": 1.0341110229492188, "learning_rate": 1e-05, "loss": 0.2488, "step": 2280 }, { "epoch": 0.007847141789970737, "grad_norm": 1.174186110496521, "learning_rate": 1e-05, "loss": 0.2542, "step": 2290 }, { "epoch": 0.007881408784686765, "grad_norm": 0.9867792725563049, "learning_rate": 1e-05, "loss": 0.2582, "step": 2300 }, { "epoch": 0.007915675779402795, "grad_norm": 1.1443661451339722, "learning_rate": 1e-05, "loss": 0.2331, "step": 2310 }, { "epoch": 0.007949942774118824, "grad_norm": 1.117896318435669, "learning_rate": 1e-05, "loss": 0.2277, "step": 2320 }, { "epoch": 0.007984209768834854, "grad_norm": 1.13510000705719, "learning_rate": 1e-05, "loss": 0.2137, "step": 2330 }, { "epoch": 0.008018476763550882, "grad_norm": 0.9749162793159485, "learning_rate": 1e-05, "loss": 0.2161, "step": 2340 }, { "epoch": 0.008052743758266912, "grad_norm": 1.1519534587860107, "learning_rate": 1e-05, "loss": 0.2254, "step": 2350 }, { "epoch": 0.008087010752982942, "grad_norm": 1.0861778259277344, "learning_rate": 1e-05, "loss": 0.2153, "step": 2360 }, { "epoch": 0.008121277747698972, "grad_norm": 1.0184444189071655, "learning_rate": 1e-05, "loss": 0.2066, "step": 2370 }, { "epoch": 0.008155544742415, "grad_norm": 1.0581239461898804, "learning_rate": 1e-05, "loss": 0.2243, "step": 2380 }, { "epoch": 0.00818981173713103, "grad_norm": 0.9954540729522705, "learning_rate": 1e-05, "loss": 0.2171, "step": 2390 }, { "epoch": 0.00822407873184706, "grad_norm": 1.121960163116455, "learning_rate": 1e-05, "loss": 0.2216, "step": 2400 }, { "epoch": 0.00825834572656309, "grad_norm": 1.097725510597229, "learning_rate": 1e-05, "loss": 0.2142, "step": 2410 }, { "epoch": 0.008292612721279118, "grad_norm": 1.0566459894180298, "learning_rate": 1e-05, "loss": 0.2272, "step": 2420 }, { "epoch": 0.008326879715995147, "grad_norm": 1.0077927112579346, "learning_rate": 1e-05, "loss": 0.211, "step": 2430 }, { "epoch": 0.008361146710711177, "grad_norm": 1.176035761833191, "learning_rate": 1e-05, "loss": 0.2125, "step": 2440 }, { "epoch": 0.008395413705427207, "grad_norm": 1.0064568519592285, "learning_rate": 1e-05, "loss": 0.2066, "step": 2450 }, { "epoch": 0.008429680700143235, "grad_norm": 1.1852171421051025, "learning_rate": 1e-05, "loss": 0.2087, "step": 2460 }, { "epoch": 0.008463947694859265, "grad_norm": 0.9580971002578735, "learning_rate": 1e-05, "loss": 0.2172, "step": 2470 }, { "epoch": 0.008498214689575295, "grad_norm": 1.1230813264846802, "learning_rate": 1e-05, "loss": 0.2104, "step": 2480 }, { "epoch": 0.008532481684291325, "grad_norm": 1.1891340017318726, "learning_rate": 1e-05, "loss": 0.229, "step": 2490 }, { "epoch": 0.008566748679007353, "grad_norm": 1.2579045295715332, "learning_rate": 1e-05, "loss": 0.2109, "step": 2500 }, { "epoch": 0.008566748679007353, "eval_cer": 13.300405715293703, "eval_loss": 0.26059621572494507, "eval_normalized_cer": 9.502398081534773, "eval_runtime": 226.5522, "eval_samples_per_second": 2.26, "eval_steps_per_second": 0.035, "step": 2500 }, { "epoch": 0.008601015673723383, "grad_norm": 1.0522507429122925, "learning_rate": 1e-05, "loss": 0.2154, "step": 2510 }, { "epoch": 0.008635282668439413, "grad_norm": 1.0875492095947266, "learning_rate": 1e-05, "loss": 0.2251, "step": 2520 }, { "epoch": 0.008669549663155442, "grad_norm": 1.0868346691131592, "learning_rate": 1e-05, "loss": 0.2086, "step": 2530 }, { "epoch": 0.008703816657871472, "grad_norm": 1.0993175506591797, "learning_rate": 1e-05, "loss": 0.205, "step": 2540 }, { "epoch": 0.0087380836525875, "grad_norm": 1.0495941638946533, "learning_rate": 1e-05, "loss": 0.2135, "step": 2550 }, { "epoch": 0.00877235064730353, "grad_norm": 1.0326807498931885, "learning_rate": 1e-05, "loss": 0.2105, "step": 2560 }, { "epoch": 0.00880661764201956, "grad_norm": 1.0804367065429688, "learning_rate": 1e-05, "loss": 0.2438, "step": 2570 }, { "epoch": 0.00884088463673559, "grad_norm": 1.0738023519515991, "learning_rate": 1e-05, "loss": 0.2537, "step": 2580 }, { "epoch": 0.008875151631451618, "grad_norm": 1.1695871353149414, "learning_rate": 1e-05, "loss": 0.2518, "step": 2590 }, { "epoch": 0.008909418626167648, "grad_norm": 1.155653476715088, "learning_rate": 1e-05, "loss": 0.2592, "step": 2600 }, { "epoch": 0.008943685620883678, "grad_norm": 1.1516027450561523, "learning_rate": 1e-05, "loss": 0.2387, "step": 2610 }, { "epoch": 0.008977952615599707, "grad_norm": 1.2618260383605957, "learning_rate": 1e-05, "loss": 0.2638, "step": 2620 }, { "epoch": 0.009012219610315736, "grad_norm": 1.2422987222671509, "learning_rate": 1e-05, "loss": 0.2459, "step": 2630 }, { "epoch": 0.009046486605031765, "grad_norm": 1.1460082530975342, "learning_rate": 1e-05, "loss": 0.2509, "step": 2640 }, { "epoch": 0.009080753599747795, "grad_norm": 1.2502261400222778, "learning_rate": 1e-05, "loss": 0.2595, "step": 2650 }, { "epoch": 0.009115020594463825, "grad_norm": 1.139840006828308, "learning_rate": 1e-05, "loss": 0.255, "step": 2660 }, { "epoch": 0.009149287589179853, "grad_norm": 1.3247896432876587, "learning_rate": 1e-05, "loss": 0.2721, "step": 2670 }, { "epoch": 0.009183554583895883, "grad_norm": 1.1355103254318237, "learning_rate": 1e-05, "loss": 0.2604, "step": 2680 }, { "epoch": 0.009217821578611913, "grad_norm": 1.106541633605957, "learning_rate": 1e-05, "loss": 0.2374, "step": 2690 }, { "epoch": 0.009252088573327943, "grad_norm": 1.2375975847244263, "learning_rate": 1e-05, "loss": 0.2719, "step": 2700 }, { "epoch": 0.00928635556804397, "grad_norm": 1.1048275232315063, "learning_rate": 1e-05, "loss": 0.2791, "step": 2710 }, { "epoch": 0.00932062256276, "grad_norm": 0.9889766573905945, "learning_rate": 1e-05, "loss": 0.2457, "step": 2720 }, { "epoch": 0.00935488955747603, "grad_norm": 1.1566202640533447, "learning_rate": 1e-05, "loss": 0.252, "step": 2730 }, { "epoch": 0.00938915655219206, "grad_norm": 1.1586074829101562, "learning_rate": 1e-05, "loss": 0.2517, "step": 2740 }, { "epoch": 0.009423423546908088, "grad_norm": 0.990419328212738, "learning_rate": 1e-05, "loss": 0.2572, "step": 2750 }, { "epoch": 0.009457690541624118, "grad_norm": 1.1101089715957642, "learning_rate": 1e-05, "loss": 0.2525, "step": 2760 }, { "epoch": 0.009491957536340148, "grad_norm": 1.0488269329071045, "learning_rate": 1e-05, "loss": 0.2452, "step": 2770 }, { "epoch": 0.009526224531056178, "grad_norm": 1.1127737760543823, "learning_rate": 1e-05, "loss": 0.2578, "step": 2780 }, { "epoch": 0.009560491525772206, "grad_norm": 1.2353262901306152, "learning_rate": 1e-05, "loss": 0.2412, "step": 2790 }, { "epoch": 0.009594758520488236, "grad_norm": 1.1262571811676025, "learning_rate": 1e-05, "loss": 0.2438, "step": 2800 }, { "epoch": 0.009629025515204266, "grad_norm": 1.294323205947876, "learning_rate": 1e-05, "loss": 0.2512, "step": 2810 }, { "epoch": 0.009663292509920296, "grad_norm": 1.0706703662872314, "learning_rate": 1e-05, "loss": 0.2595, "step": 2820 }, { "epoch": 0.009697559504636324, "grad_norm": 1.0089077949523926, "learning_rate": 1e-05, "loss": 0.2522, "step": 2830 }, { "epoch": 0.009731826499352354, "grad_norm": 0.9697763323783875, "learning_rate": 1e-05, "loss": 0.2684, "step": 2840 }, { "epoch": 0.009766093494068383, "grad_norm": 1.1122509241104126, "learning_rate": 1e-05, "loss": 0.2629, "step": 2850 }, { "epoch": 0.009800360488784413, "grad_norm": 1.0381057262420654, "learning_rate": 1e-05, "loss": 0.2482, "step": 2860 }, { "epoch": 0.009834627483500441, "grad_norm": 1.126947045326233, "learning_rate": 1e-05, "loss": 0.2674, "step": 2870 }, { "epoch": 0.009868894478216471, "grad_norm": 1.0714973211288452, "learning_rate": 1e-05, "loss": 0.2634, "step": 2880 }, { "epoch": 0.009903161472932501, "grad_norm": 1.0942039489746094, "learning_rate": 1e-05, "loss": 0.2751, "step": 2890 }, { "epoch": 0.009937428467648531, "grad_norm": 1.1503955125808716, "learning_rate": 1e-05, "loss": 0.272, "step": 2900 }, { "epoch": 0.009971695462364559, "grad_norm": 1.1912988424301147, "learning_rate": 1e-05, "loss": 0.2645, "step": 2910 }, { "epoch": 0.010005962457080589, "grad_norm": 1.0941249132156372, "learning_rate": 1e-05, "loss": 0.2531, "step": 2920 }, { "epoch": 0.010040229451796619, "grad_norm": 1.2545968294143677, "learning_rate": 1e-05, "loss": 0.2562, "step": 2930 }, { "epoch": 0.010074496446512649, "grad_norm": 1.3605022430419922, "learning_rate": 1e-05, "loss": 0.2601, "step": 2940 }, { "epoch": 0.010108763441228677, "grad_norm": 1.0911775827407837, "learning_rate": 1e-05, "loss": 0.2605, "step": 2950 }, { "epoch": 0.010143030435944706, "grad_norm": 1.133867859840393, "learning_rate": 1e-05, "loss": 0.2554, "step": 2960 }, { "epoch": 0.010177297430660736, "grad_norm": 1.2511764764785767, "learning_rate": 1e-05, "loss": 0.2658, "step": 2970 }, { "epoch": 0.010211564425376766, "grad_norm": 1.1705303192138672, "learning_rate": 1e-05, "loss": 0.2737, "step": 2980 }, { "epoch": 0.010245831420092794, "grad_norm": 1.132071614265442, "learning_rate": 1e-05, "loss": 0.2665, "step": 2990 }, { "epoch": 0.010280098414808824, "grad_norm": 1.2301791906356812, "learning_rate": 1e-05, "loss": 0.2645, "step": 3000 }, { "epoch": 0.010280098414808824, "eval_cer": 12.938789910037043, "eval_loss": 0.2511608302593231, "eval_normalized_cer": 9.152677857713828, "eval_runtime": 227.4553, "eval_samples_per_second": 2.251, "eval_steps_per_second": 0.035, "step": 3000 }, { "epoch": 0.010314365409524854, "grad_norm": 1.1527032852172852, "learning_rate": 1e-05, "loss": 0.2508, "step": 3010 }, { "epoch": 0.010348632404240884, "grad_norm": 1.1162952184677124, "learning_rate": 1e-05, "loss": 0.2728, "step": 3020 }, { "epoch": 0.010382899398956912, "grad_norm": 1.062084436416626, "learning_rate": 1e-05, "loss": 0.2496, "step": 3030 }, { "epoch": 0.010417166393672942, "grad_norm": 1.1536457538604736, "learning_rate": 1e-05, "loss": 0.2633, "step": 3040 }, { "epoch": 0.010451433388388972, "grad_norm": 1.2096189260482788, "learning_rate": 1e-05, "loss": 0.2498, "step": 3050 }, { "epoch": 0.010485700383105001, "grad_norm": 0.9950299263000488, "learning_rate": 1e-05, "loss": 0.246, "step": 3060 }, { "epoch": 0.01051996737782103, "grad_norm": 1.0628243684768677, "learning_rate": 1e-05, "loss": 0.2544, "step": 3070 }, { "epoch": 0.01055423437253706, "grad_norm": 1.042555570602417, "learning_rate": 1e-05, "loss": 0.2401, "step": 3080 }, { "epoch": 0.01058850136725309, "grad_norm": 1.22646164894104, "learning_rate": 1e-05, "loss": 0.2503, "step": 3090 }, { "epoch": 0.010622768361969119, "grad_norm": 1.0862691402435303, "learning_rate": 1e-05, "loss": 0.2508, "step": 3100 }, { "epoch": 0.010657035356685147, "grad_norm": 1.148868203163147, "learning_rate": 1e-05, "loss": 0.2526, "step": 3110 }, { "epoch": 0.010691302351401177, "grad_norm": 1.1677169799804688, "learning_rate": 1e-05, "loss": 0.2481, "step": 3120 }, { "epoch": 0.010725569346117207, "grad_norm": 0.990696132183075, "learning_rate": 1e-05, "loss": 0.2421, "step": 3130 }, { "epoch": 0.010759836340833237, "grad_norm": 1.2869263887405396, "learning_rate": 1e-05, "loss": 0.2463, "step": 3140 }, { "epoch": 0.010794103335549265, "grad_norm": 1.0741721391677856, "learning_rate": 1e-05, "loss": 0.2617, "step": 3150 }, { "epoch": 0.010828370330265295, "grad_norm": 1.103102445602417, "learning_rate": 1e-05, "loss": 0.2442, "step": 3160 }, { "epoch": 0.010862637324981324, "grad_norm": 1.2562378644943237, "learning_rate": 1e-05, "loss": 0.2589, "step": 3170 }, { "epoch": 0.010896904319697354, "grad_norm": 1.2153191566467285, "learning_rate": 1e-05, "loss": 0.2417, "step": 3180 }, { "epoch": 0.010931171314413384, "grad_norm": 1.0507330894470215, "learning_rate": 1e-05, "loss": 0.2607, "step": 3190 }, { "epoch": 0.010965438309129412, "grad_norm": 1.1882787942886353, "learning_rate": 1e-05, "loss": 0.2469, "step": 3200 }, { "epoch": 0.010999705303845442, "grad_norm": 1.1394702196121216, "learning_rate": 1e-05, "loss": 0.2574, "step": 3210 }, { "epoch": 0.011033972298561472, "grad_norm": 1.2482614517211914, "learning_rate": 1e-05, "loss": 0.2456, "step": 3220 }, { "epoch": 0.011068239293277502, "grad_norm": 1.0362995862960815, "learning_rate": 1e-05, "loss": 0.2589, "step": 3230 }, { "epoch": 0.01110250628799353, "grad_norm": 1.1730456352233887, "learning_rate": 1e-05, "loss": 0.2497, "step": 3240 }, { "epoch": 0.01113677328270956, "grad_norm": 1.1563142538070679, "learning_rate": 1e-05, "loss": 0.2439, "step": 3250 }, { "epoch": 0.01117104027742559, "grad_norm": 1.1030769348144531, "learning_rate": 1e-05, "loss": 0.2671, "step": 3260 }, { "epoch": 0.01120530727214162, "grad_norm": 1.1719223260879517, "learning_rate": 1e-05, "loss": 0.2501, "step": 3270 }, { "epoch": 0.011239574266857648, "grad_norm": 1.1840440034866333, "learning_rate": 1e-05, "loss": 0.2643, "step": 3280 }, { "epoch": 0.011273841261573677, "grad_norm": 1.1928170919418335, "learning_rate": 1e-05, "loss": 0.2629, "step": 3290 }, { "epoch": 0.011308108256289707, "grad_norm": 1.0311812162399292, "learning_rate": 1e-05, "loss": 0.2552, "step": 3300 }, { "epoch": 0.011342375251005737, "grad_norm": 1.1625889539718628, "learning_rate": 1e-05, "loss": 0.2561, "step": 3310 }, { "epoch": 0.011376642245721765, "grad_norm": 1.0287625789642334, "learning_rate": 1e-05, "loss": 0.2341, "step": 3320 }, { "epoch": 0.011410909240437795, "grad_norm": 1.1310815811157227, "learning_rate": 1e-05, "loss": 0.2554, "step": 3330 }, { "epoch": 0.011445176235153825, "grad_norm": 1.1266168355941772, "learning_rate": 1e-05, "loss": 0.234, "step": 3340 }, { "epoch": 0.011479443229869855, "grad_norm": 1.1979014873504639, "learning_rate": 1e-05, "loss": 0.2559, "step": 3350 }, { "epoch": 0.011513710224585883, "grad_norm": 1.0378515720367432, "learning_rate": 1e-05, "loss": 0.2502, "step": 3360 }, { "epoch": 0.011547977219301913, "grad_norm": 1.1832512617111206, "learning_rate": 1e-05, "loss": 0.236, "step": 3370 }, { "epoch": 0.011582244214017942, "grad_norm": 0.9605569839477539, "learning_rate": 1e-05, "loss": 0.2349, "step": 3380 }, { "epoch": 0.011616511208733972, "grad_norm": 1.0463056564331055, "learning_rate": 1e-05, "loss": 0.2328, "step": 3390 }, { "epoch": 0.01165077820345, "grad_norm": 1.1021932363510132, "learning_rate": 1e-05, "loss": 0.2383, "step": 3400 }, { "epoch": 0.01168504519816603, "grad_norm": 1.040493130683899, "learning_rate": 1e-05, "loss": 0.2374, "step": 3410 }, { "epoch": 0.01171931219288206, "grad_norm": 1.1483063697814941, "learning_rate": 1e-05, "loss": 0.2398, "step": 3420 }, { "epoch": 0.01175357918759809, "grad_norm": 1.0316531658172607, "learning_rate": 1e-05, "loss": 0.2329, "step": 3430 }, { "epoch": 0.011787846182314118, "grad_norm": 1.1677886247634888, "learning_rate": 1e-05, "loss": 0.2493, "step": 3440 }, { "epoch": 0.011822113177030148, "grad_norm": 1.2078930139541626, "learning_rate": 1e-05, "loss": 0.2337, "step": 3450 }, { "epoch": 0.011856380171746178, "grad_norm": 1.178202509880066, "learning_rate": 1e-05, "loss": 0.239, "step": 3460 }, { "epoch": 0.011890647166462208, "grad_norm": 1.0453248023986816, "learning_rate": 1e-05, "loss": 0.2233, "step": 3470 }, { "epoch": 0.011924914161178236, "grad_norm": 1.0171067714691162, "learning_rate": 1e-05, "loss": 0.2338, "step": 3480 }, { "epoch": 0.011959181155894266, "grad_norm": 1.051792860031128, "learning_rate": 1e-05, "loss": 0.2394, "step": 3490 }, { "epoch": 0.011993448150610295, "grad_norm": 1.1237847805023193, "learning_rate": 1e-05, "loss": 0.2428, "step": 3500 }, { "epoch": 0.011993448150610295, "eval_cer": 13.071088375374845, "eval_loss": 0.25454944372177124, "eval_normalized_cer": 9.542366107114308, "eval_runtime": 228.9468, "eval_samples_per_second": 2.236, "eval_steps_per_second": 0.035, "step": 3500 }, { "epoch": 0.012027715145326325, "grad_norm": 1.1366350650787354, "learning_rate": 1e-05, "loss": 0.2353, "step": 3510 }, { "epoch": 0.012061982140042353, "grad_norm": 1.136927604675293, "learning_rate": 1e-05, "loss": 0.2358, "step": 3520 }, { "epoch": 0.012096249134758383, "grad_norm": 1.1875656843185425, "learning_rate": 1e-05, "loss": 0.2305, "step": 3530 }, { "epoch": 0.012130516129474413, "grad_norm": 1.2016057968139648, "learning_rate": 1e-05, "loss": 0.2435, "step": 3540 }, { "epoch": 0.012164783124190443, "grad_norm": 1.209622859954834, "learning_rate": 1e-05, "loss": 0.2361, "step": 3550 }, { "epoch": 0.012199050118906471, "grad_norm": 1.0696970224380493, "learning_rate": 1e-05, "loss": 0.2385, "step": 3560 }, { "epoch": 0.0122333171136225, "grad_norm": 1.2674167156219482, "learning_rate": 1e-05, "loss": 0.243, "step": 3570 }, { "epoch": 0.01226758410833853, "grad_norm": 1.2928141355514526, "learning_rate": 1e-05, "loss": 0.2491, "step": 3580 }, { "epoch": 0.01230185110305456, "grad_norm": 1.0642272233963013, "learning_rate": 1e-05, "loss": 0.2356, "step": 3590 }, { "epoch": 0.012336118097770589, "grad_norm": 1.0935972929000854, "learning_rate": 1e-05, "loss": 0.2389, "step": 3600 }, { "epoch": 0.012370385092486618, "grad_norm": 1.180668830871582, "learning_rate": 1e-05, "loss": 0.2409, "step": 3610 }, { "epoch": 0.012404652087202648, "grad_norm": 1.2312487363815308, "learning_rate": 1e-05, "loss": 0.2478, "step": 3620 }, { "epoch": 0.012438919081918678, "grad_norm": 0.947522759437561, "learning_rate": 1e-05, "loss": 0.2281, "step": 3630 }, { "epoch": 0.012473186076634706, "grad_norm": 1.0618727207183838, "learning_rate": 1e-05, "loss": 0.2423, "step": 3640 }, { "epoch": 0.012507453071350736, "grad_norm": 1.0766098499298096, "learning_rate": 1e-05, "loss": 0.2364, "step": 3650 }, { "epoch": 0.012541720066066766, "grad_norm": 1.1174747943878174, "learning_rate": 1e-05, "loss": 0.238, "step": 3660 }, { "epoch": 0.012575987060782796, "grad_norm": 1.1940118074417114, "learning_rate": 1e-05, "loss": 0.2212, "step": 3670 }, { "epoch": 0.012610254055498824, "grad_norm": 1.1407246589660645, "learning_rate": 1e-05, "loss": 0.2423, "step": 3680 }, { "epoch": 0.012644521050214854, "grad_norm": 1.2646050453186035, "learning_rate": 1e-05, "loss": 0.2252, "step": 3690 }, { "epoch": 0.012678788044930884, "grad_norm": 1.130337119102478, "learning_rate": 1e-05, "loss": 0.2131, "step": 3700 }, { "epoch": 0.012713055039646913, "grad_norm": 1.1432557106018066, "learning_rate": 1e-05, "loss": 0.2386, "step": 3710 }, { "epoch": 0.012747322034362941, "grad_norm": 1.1370545625686646, "learning_rate": 1e-05, "loss": 0.2347, "step": 3720 }, { "epoch": 0.012781589029078971, "grad_norm": 1.3126403093338013, "learning_rate": 1e-05, "loss": 0.2159, "step": 3730 }, { "epoch": 0.012815856023795001, "grad_norm": 1.2375295162200928, "learning_rate": 1e-05, "loss": 0.2275, "step": 3740 }, { "epoch": 0.012850123018511031, "grad_norm": 1.0877372026443481, "learning_rate": 1e-05, "loss": 0.2201, "step": 3750 }, { "epoch": 0.012884390013227059, "grad_norm": 1.1122978925704956, "learning_rate": 1e-05, "loss": 0.229, "step": 3760 }, { "epoch": 0.012918657007943089, "grad_norm": 1.0270159244537354, "learning_rate": 1e-05, "loss": 0.2313, "step": 3770 }, { "epoch": 0.012952924002659119, "grad_norm": 1.1370947360992432, "learning_rate": 1e-05, "loss": 0.229, "step": 3780 }, { "epoch": 0.012987190997375149, "grad_norm": 1.2888813018798828, "learning_rate": 1e-05, "loss": 0.2384, "step": 3790 }, { "epoch": 0.013021457992091178, "grad_norm": 1.2443634271621704, "learning_rate": 1e-05, "loss": 0.2218, "step": 3800 }, { "epoch": 0.013055724986807207, "grad_norm": 1.1919447183609009, "learning_rate": 1e-05, "loss": 0.2277, "step": 3810 }, { "epoch": 0.013089991981523236, "grad_norm": 1.140600562095642, "learning_rate": 1e-05, "loss": 0.2317, "step": 3820 }, { "epoch": 0.013124258976239266, "grad_norm": 1.074697494506836, "learning_rate": 1e-05, "loss": 0.2273, "step": 3830 }, { "epoch": 0.013158525970955296, "grad_norm": 1.1003391742706299, "learning_rate": 1e-05, "loss": 0.2217, "step": 3840 }, { "epoch": 0.013192792965671324, "grad_norm": 1.1427338123321533, "learning_rate": 1e-05, "loss": 0.2377, "step": 3850 }, { "epoch": 0.013227059960387354, "grad_norm": 1.0806514024734497, "learning_rate": 1e-05, "loss": 0.2332, "step": 3860 }, { "epoch": 0.013261326955103384, "grad_norm": 1.1547067165374756, "learning_rate": 1e-05, "loss": 0.2306, "step": 3870 }, { "epoch": 0.013295593949819414, "grad_norm": 1.2483099699020386, "learning_rate": 1e-05, "loss": 0.2166, "step": 3880 }, { "epoch": 0.013329860944535442, "grad_norm": 1.096939206123352, "learning_rate": 1e-05, "loss": 0.2253, "step": 3890 }, { "epoch": 0.013364127939251472, "grad_norm": 1.1876115798950195, "learning_rate": 1e-05, "loss": 0.2377, "step": 3900 }, { "epoch": 0.013398394933967502, "grad_norm": 1.1380902528762817, "learning_rate": 1e-05, "loss": 0.2256, "step": 3910 }, { "epoch": 0.013432661928683531, "grad_norm": 1.0738089084625244, "learning_rate": 1e-05, "loss": 0.2307, "step": 3920 }, { "epoch": 0.01346692892339956, "grad_norm": 1.0351170301437378, "learning_rate": 1e-05, "loss": 0.2296, "step": 3930 }, { "epoch": 0.01350119591811559, "grad_norm": 1.2752678394317627, "learning_rate": 1e-05, "loss": 0.2462, "step": 3940 }, { "epoch": 0.01353546291283162, "grad_norm": 1.2618532180786133, "learning_rate": 1e-05, "loss": 0.2364, "step": 3950 }, { "epoch": 0.013569729907547649, "grad_norm": 1.1907076835632324, "learning_rate": 1e-05, "loss": 0.2397, "step": 3960 }, { "epoch": 0.013603996902263677, "grad_norm": 0.9435076117515564, "learning_rate": 1e-05, "loss": 0.2391, "step": 3970 }, { "epoch": 0.013638263896979707, "grad_norm": 1.0608407258987427, "learning_rate": 1e-05, "loss": 0.2241, "step": 3980 }, { "epoch": 0.013672530891695737, "grad_norm": 1.0729584693908691, "learning_rate": 1e-05, "loss": 0.2237, "step": 3990 }, { "epoch": 0.013706797886411767, "grad_norm": 1.2006182670593262, "learning_rate": 1e-05, "loss": 0.2386, "step": 4000 }, { "epoch": 0.013706797886411767, "eval_cer": 12.594813900158758, "eval_loss": 0.25156331062316895, "eval_normalized_cer": 8.912869704236611, "eval_runtime": 228.7977, "eval_samples_per_second": 2.238, "eval_steps_per_second": 0.035, "step": 4000 }, { "epoch": 0.013741064881127795, "grad_norm": 1.2020457983016968, "learning_rate": 1e-05, "loss": 0.2318, "step": 4010 }, { "epoch": 0.013775331875843825, "grad_norm": 1.0251790285110474, "learning_rate": 1e-05, "loss": 0.248, "step": 4020 }, { "epoch": 0.013809598870559854, "grad_norm": 1.160437822341919, "learning_rate": 1e-05, "loss": 0.2385, "step": 4030 }, { "epoch": 0.013843865865275884, "grad_norm": 1.025770664215088, "learning_rate": 1e-05, "loss": 0.2293, "step": 4040 }, { "epoch": 0.013878132859991912, "grad_norm": 1.111954689025879, "learning_rate": 1e-05, "loss": 0.2377, "step": 4050 }, { "epoch": 0.013912399854707942, "grad_norm": 1.0644809007644653, "learning_rate": 1e-05, "loss": 0.2195, "step": 4060 }, { "epoch": 0.013946666849423972, "grad_norm": 1.2926712036132812, "learning_rate": 1e-05, "loss": 0.2508, "step": 4070 }, { "epoch": 0.013980933844140002, "grad_norm": 1.2169601917266846, "learning_rate": 1e-05, "loss": 0.2401, "step": 4080 }, { "epoch": 0.01401520083885603, "grad_norm": 1.1396681070327759, "learning_rate": 1e-05, "loss": 0.2305, "step": 4090 }, { "epoch": 0.01404946783357206, "grad_norm": 1.2242721319198608, "learning_rate": 1e-05, "loss": 0.2301, "step": 4100 }, { "epoch": 0.01408373482828809, "grad_norm": 1.195324420928955, "learning_rate": 1e-05, "loss": 0.2368, "step": 4110 }, { "epoch": 0.01411800182300412, "grad_norm": 1.2345412969589233, "learning_rate": 1e-05, "loss": 0.2301, "step": 4120 }, { "epoch": 0.014152268817720148, "grad_norm": 1.1502156257629395, "learning_rate": 1e-05, "loss": 0.2327, "step": 4130 }, { "epoch": 0.014186535812436177, "grad_norm": 1.2128121852874756, "learning_rate": 1e-05, "loss": 0.2458, "step": 4140 }, { "epoch": 0.014220802807152207, "grad_norm": 1.2618858814239502, "learning_rate": 1e-05, "loss": 0.231, "step": 4150 }, { "epoch": 0.014255069801868237, "grad_norm": 1.0879299640655518, "learning_rate": 1e-05, "loss": 0.2302, "step": 4160 }, { "epoch": 0.014289336796584265, "grad_norm": 0.9794358015060425, "learning_rate": 1e-05, "loss": 0.239, "step": 4170 }, { "epoch": 0.014323603791300295, "grad_norm": 1.1454006433486938, "learning_rate": 1e-05, "loss": 0.2328, "step": 4180 }, { "epoch": 0.014357870786016325, "grad_norm": 1.223686933517456, "learning_rate": 1e-05, "loss": 0.2211, "step": 4190 }, { "epoch": 0.014392137780732355, "grad_norm": 1.1423155069351196, "learning_rate": 1e-05, "loss": 0.2391, "step": 4200 }, { "epoch": 0.014426404775448383, "grad_norm": 1.1027394533157349, "learning_rate": 1e-05, "loss": 0.2279, "step": 4210 }, { "epoch": 0.014460671770164413, "grad_norm": 1.1777397394180298, "learning_rate": 1e-05, "loss": 0.2293, "step": 4220 }, { "epoch": 0.014494938764880443, "grad_norm": 1.01688551902771, "learning_rate": 1e-05, "loss": 0.2275, "step": 4230 }, { "epoch": 0.014529205759596472, "grad_norm": 1.1520488262176514, "learning_rate": 1e-05, "loss": 0.2301, "step": 4240 }, { "epoch": 0.0145634727543125, "grad_norm": 1.2820484638214111, "learning_rate": 1e-05, "loss": 0.2205, "step": 4250 }, { "epoch": 0.01459773974902853, "grad_norm": 1.169291377067566, "learning_rate": 1e-05, "loss": 0.2389, "step": 4260 }, { "epoch": 0.01463200674374456, "grad_norm": 1.1135886907577515, "learning_rate": 1e-05, "loss": 0.2384, "step": 4270 }, { "epoch": 0.01466627373846059, "grad_norm": 1.0846205949783325, "learning_rate": 1e-05, "loss": 0.223, "step": 4280 }, { "epoch": 0.014700540733176618, "grad_norm": 0.981488049030304, "learning_rate": 1e-05, "loss": 0.2092, "step": 4290 }, { "epoch": 0.014734807727892648, "grad_norm": 1.0437407493591309, "learning_rate": 1e-05, "loss": 0.2293, "step": 4300 }, { "epoch": 0.014769074722608678, "grad_norm": 1.005792260169983, "learning_rate": 1e-05, "loss": 0.2286, "step": 4310 }, { "epoch": 0.014803341717324708, "grad_norm": 1.1903142929077148, "learning_rate": 1e-05, "loss": 0.231, "step": 4320 }, { "epoch": 0.014837608712040736, "grad_norm": 1.1308993101119995, "learning_rate": 1e-05, "loss": 0.2458, "step": 4330 }, { "epoch": 0.014871875706756766, "grad_norm": 1.0948210954666138, "learning_rate": 1e-05, "loss": 0.213, "step": 4340 }, { "epoch": 0.014906142701472795, "grad_norm": 1.2674663066864014, "learning_rate": 1e-05, "loss": 0.2432, "step": 4350 }, { "epoch": 0.014940409696188825, "grad_norm": 1.4228485822677612, "learning_rate": 1e-05, "loss": 0.2491, "step": 4360 }, { "epoch": 0.014974676690904853, "grad_norm": 1.1533160209655762, "learning_rate": 1e-05, "loss": 0.2485, "step": 4370 }, { "epoch": 0.015008943685620883, "grad_norm": 1.1454424858093262, "learning_rate": 1e-05, "loss": 0.2635, "step": 4380 }, { "epoch": 0.015043210680336913, "grad_norm": 1.2944281101226807, "learning_rate": 1e-05, "loss": 0.2651, "step": 4390 }, { "epoch": 0.015077477675052943, "grad_norm": 1.2148584127426147, "learning_rate": 1e-05, "loss": 0.2694, "step": 4400 }, { "epoch": 0.015111744669768971, "grad_norm": 1.091282844543457, "learning_rate": 1e-05, "loss": 0.2672, "step": 4410 }, { "epoch": 0.015146011664485001, "grad_norm": 1.2254445552825928, "learning_rate": 1e-05, "loss": 0.2583, "step": 4420 }, { "epoch": 0.01518027865920103, "grad_norm": 1.367516279220581, "learning_rate": 1e-05, "loss": 0.2586, "step": 4430 }, { "epoch": 0.01521454565391706, "grad_norm": 1.1858383417129517, "learning_rate": 1e-05, "loss": 0.2764, "step": 4440 }, { "epoch": 0.01524881264863309, "grad_norm": 1.1331857442855835, "learning_rate": 1e-05, "loss": 0.2577, "step": 4450 }, { "epoch": 0.015283079643349119, "grad_norm": 1.2343239784240723, "learning_rate": 1e-05, "loss": 0.2661, "step": 4460 }, { "epoch": 0.015317346638065148, "grad_norm": 1.0893656015396118, "learning_rate": 1e-05, "loss": 0.2538, "step": 4470 }, { "epoch": 0.015351613632781178, "grad_norm": 1.1467857360839844, "learning_rate": 1e-05, "loss": 0.2496, "step": 4480 }, { "epoch": 0.015385880627497208, "grad_norm": 1.2753335237503052, "learning_rate": 1e-05, "loss": 0.2797, "step": 4490 }, { "epoch": 0.015420147622213236, "grad_norm": 1.1355762481689453, "learning_rate": 1e-05, "loss": 0.2672, "step": 4500 }, { "epoch": 0.015420147622213236, "eval_cer": 13.159287352266713, "eval_loss": 0.24996142089366913, "eval_normalized_cer": 9.59232613908873, "eval_runtime": 228.0477, "eval_samples_per_second": 2.245, "eval_steps_per_second": 0.035, "step": 4500 }, { "epoch": 0.015454414616929266, "grad_norm": 1.2256762981414795, "learning_rate": 1e-05, "loss": 0.2662, "step": 4510 }, { "epoch": 0.015488681611645296, "grad_norm": 1.0631389617919922, "learning_rate": 1e-05, "loss": 0.2596, "step": 4520 }, { "epoch": 0.015522948606361326, "grad_norm": 1.0759390592575073, "learning_rate": 1e-05, "loss": 0.2553, "step": 4530 }, { "epoch": 0.015557215601077354, "grad_norm": 1.1867231130599976, "learning_rate": 1e-05, "loss": 0.2498, "step": 4540 }, { "epoch": 0.015591482595793384, "grad_norm": 1.1203633546829224, "learning_rate": 1e-05, "loss": 0.2732, "step": 4550 }, { "epoch": 0.015625749590509413, "grad_norm": 1.1223920583724976, "learning_rate": 1e-05, "loss": 0.2535, "step": 4560 }, { "epoch": 0.015660016585225443, "grad_norm": 1.066497564315796, "learning_rate": 1e-05, "loss": 0.2456, "step": 4570 }, { "epoch": 0.015694283579941473, "grad_norm": 1.2520133256912231, "learning_rate": 1e-05, "loss": 0.2558, "step": 4580 }, { "epoch": 0.015728550574657503, "grad_norm": 1.3602423667907715, "learning_rate": 1e-05, "loss": 0.2698, "step": 4590 }, { "epoch": 0.01576281756937353, "grad_norm": 1.1748729944229126, "learning_rate": 1e-05, "loss": 0.2621, "step": 4600 }, { "epoch": 0.01579708456408956, "grad_norm": 0.9431802034378052, "learning_rate": 1e-05, "loss": 0.2433, "step": 4610 }, { "epoch": 0.01583135155880559, "grad_norm": 1.0146753787994385, "learning_rate": 1e-05, "loss": 0.239, "step": 4620 }, { "epoch": 0.01586561855352162, "grad_norm": 1.1340891122817993, "learning_rate": 1e-05, "loss": 0.2437, "step": 4630 }, { "epoch": 0.01589988554823765, "grad_norm": 1.1456454992294312, "learning_rate": 1e-05, "loss": 0.2307, "step": 4640 }, { "epoch": 0.01593415254295368, "grad_norm": 1.1026827096939087, "learning_rate": 1e-05, "loss": 0.2295, "step": 4650 }, { "epoch": 0.01596841953766971, "grad_norm": 1.2215088605880737, "learning_rate": 1e-05, "loss": 0.245, "step": 4660 }, { "epoch": 0.01600268653238574, "grad_norm": 1.1760615110397339, "learning_rate": 1e-05, "loss": 0.2461, "step": 4670 }, { "epoch": 0.016036953527101765, "grad_norm": 1.1690876483917236, "learning_rate": 1e-05, "loss": 0.2282, "step": 4680 }, { "epoch": 0.016071220521817794, "grad_norm": 1.182026743888855, "learning_rate": 1e-05, "loss": 0.2351, "step": 4690 }, { "epoch": 0.016105487516533824, "grad_norm": 1.0182474851608276, "learning_rate": 1e-05, "loss": 0.2284, "step": 4700 }, { "epoch": 0.016139754511249854, "grad_norm": 1.2531431913375854, "learning_rate": 1e-05, "loss": 0.244, "step": 4710 }, { "epoch": 0.016174021505965884, "grad_norm": 0.9633692502975464, "learning_rate": 1e-05, "loss": 0.2297, "step": 4720 }, { "epoch": 0.016208288500681914, "grad_norm": 1.1144667863845825, "learning_rate": 1e-05, "loss": 0.2475, "step": 4730 }, { "epoch": 0.016242555495397944, "grad_norm": 1.0768555402755737, "learning_rate": 1e-05, "loss": 0.2216, "step": 4740 }, { "epoch": 0.016276822490113974, "grad_norm": 1.2052035331726074, "learning_rate": 1e-05, "loss": 0.2278, "step": 4750 }, { "epoch": 0.01631108948483, "grad_norm": 1.0291496515274048, "learning_rate": 1e-05, "loss": 0.2226, "step": 4760 }, { "epoch": 0.01634535647954603, "grad_norm": 1.2100346088409424, "learning_rate": 1e-05, "loss": 0.2278, "step": 4770 }, { "epoch": 0.01637962347426206, "grad_norm": 1.214861273765564, "learning_rate": 1e-05, "loss": 0.2313, "step": 4780 }, { "epoch": 0.01641389046897809, "grad_norm": 1.137210726737976, "learning_rate": 1e-05, "loss": 0.2235, "step": 4790 }, { "epoch": 0.01644815746369412, "grad_norm": 1.046673059463501, "learning_rate": 1e-05, "loss": 0.2231, "step": 4800 }, { "epoch": 0.01648242445841015, "grad_norm": 1.08164644241333, "learning_rate": 1e-05, "loss": 0.2235, "step": 4810 }, { "epoch": 0.01651669145312618, "grad_norm": 1.1432491540908813, "learning_rate": 1e-05, "loss": 0.246, "step": 4820 }, { "epoch": 0.01655095844784221, "grad_norm": 1.1684173345565796, "learning_rate": 1e-05, "loss": 0.218, "step": 4830 }, { "epoch": 0.016585225442558235, "grad_norm": 1.0895615816116333, "learning_rate": 1e-05, "loss": 0.2109, "step": 4840 }, { "epoch": 0.016619492437274265, "grad_norm": 1.1505770683288574, "learning_rate": 1e-05, "loss": 0.2283, "step": 4850 }, { "epoch": 0.016653759431990295, "grad_norm": 1.3385730981826782, "learning_rate": 1e-05, "loss": 0.2344, "step": 4860 }, { "epoch": 0.016688026426706325, "grad_norm": 1.109035611152649, "learning_rate": 1e-05, "loss": 0.2558, "step": 4870 }, { "epoch": 0.016722293421422355, "grad_norm": 1.1834880113601685, "learning_rate": 1e-05, "loss": 0.2247, "step": 4880 }, { "epoch": 0.016756560416138384, "grad_norm": 1.2369152307510376, "learning_rate": 1e-05, "loss": 0.2449, "step": 4890 }, { "epoch": 0.016790827410854414, "grad_norm": 1.131173014640808, "learning_rate": 1e-05, "loss": 0.2458, "step": 4900 }, { "epoch": 0.016825094405570444, "grad_norm": 1.1100351810455322, "learning_rate": 1e-05, "loss": 0.2523, "step": 4910 }, { "epoch": 0.01685936140028647, "grad_norm": 1.1857340335845947, "learning_rate": 1e-05, "loss": 0.2523, "step": 4920 }, { "epoch": 0.0168936283950025, "grad_norm": 1.1568819284439087, "learning_rate": 1e-05, "loss": 0.2549, "step": 4930 }, { "epoch": 0.01692789538971853, "grad_norm": 1.104872465133667, "learning_rate": 1e-05, "loss": 0.2449, "step": 4940 }, { "epoch": 0.01696216238443456, "grad_norm": 1.0907660722732544, "learning_rate": 1e-05, "loss": 0.2496, "step": 4950 }, { "epoch": 0.01699642937915059, "grad_norm": 1.1100903749465942, "learning_rate": 1e-05, "loss": 0.239, "step": 4960 }, { "epoch": 0.01703069637386662, "grad_norm": 1.141200065612793, "learning_rate": 1e-05, "loss": 0.2459, "step": 4970 }, { "epoch": 0.01706496336858265, "grad_norm": 1.2853361368179321, "learning_rate": 1e-05, "loss": 0.2452, "step": 4980 }, { "epoch": 0.01709923036329868, "grad_norm": 1.1542645692825317, "learning_rate": 1e-05, "loss": 0.2635, "step": 4990 }, { "epoch": 0.017133497358014706, "grad_norm": 1.2022640705108643, "learning_rate": 1e-05, "loss": 0.2371, "step": 5000 }, { "epoch": 0.017133497358014706, "eval_cer": 12.92115011465867, "eval_loss": 0.2521001100540161, "eval_normalized_cer": 9.30255795363709, "eval_runtime": 227.4868, "eval_samples_per_second": 2.251, "eval_steps_per_second": 0.035, "step": 5000 }, { "epoch": 0.017167764352730736, "grad_norm": 1.0765001773834229, "learning_rate": 1e-05, "loss": 0.2455, "step": 5010 }, { "epoch": 0.017202031347446765, "grad_norm": 1.0711493492126465, "learning_rate": 1e-05, "loss": 0.2422, "step": 5020 }, { "epoch": 0.017236298342162795, "grad_norm": 1.0719484090805054, "learning_rate": 1e-05, "loss": 0.2531, "step": 5030 }, { "epoch": 0.017270565336878825, "grad_norm": 1.1884721517562866, "learning_rate": 1e-05, "loss": 0.2508, "step": 5040 }, { "epoch": 0.017304832331594855, "grad_norm": 1.068827509880066, "learning_rate": 1e-05, "loss": 0.2474, "step": 5050 }, { "epoch": 0.017339099326310885, "grad_norm": 1.1308655738830566, "learning_rate": 1e-05, "loss": 0.2627, "step": 5060 }, { "epoch": 0.017373366321026915, "grad_norm": 1.1527314186096191, "learning_rate": 1e-05, "loss": 0.2535, "step": 5070 }, { "epoch": 0.017407633315742944, "grad_norm": 1.1800657510757446, "learning_rate": 1e-05, "loss": 0.2587, "step": 5080 }, { "epoch": 0.01744190031045897, "grad_norm": 1.095189094543457, "learning_rate": 1e-05, "loss": 0.2424, "step": 5090 }, { "epoch": 0.017476167305175, "grad_norm": 1.109617829322815, "learning_rate": 1e-05, "loss": 0.2543, "step": 5100 }, { "epoch": 0.01751043429989103, "grad_norm": 1.2110544443130493, "learning_rate": 1e-05, "loss": 0.2687, "step": 5110 }, { "epoch": 0.01754470129460706, "grad_norm": 1.0466723442077637, "learning_rate": 1e-05, "loss": 0.2424, "step": 5120 }, { "epoch": 0.01757896828932309, "grad_norm": 1.2060648202896118, "learning_rate": 1e-05, "loss": 0.2337, "step": 5130 }, { "epoch": 0.01761323528403912, "grad_norm": 1.203142762184143, "learning_rate": 1e-05, "loss": 0.2556, "step": 5140 }, { "epoch": 0.01764750227875515, "grad_norm": 1.0751283168792725, "learning_rate": 1e-05, "loss": 0.2235, "step": 5150 }, { "epoch": 0.01768176927347118, "grad_norm": 1.1377781629562378, "learning_rate": 1e-05, "loss": 0.2448, "step": 5160 }, { "epoch": 0.017716036268187206, "grad_norm": 1.147454023361206, "learning_rate": 1e-05, "loss": 0.2172, "step": 5170 }, { "epoch": 0.017750303262903236, "grad_norm": 1.129897952079773, "learning_rate": 1e-05, "loss": 0.2418, "step": 5180 }, { "epoch": 0.017784570257619266, "grad_norm": 1.1261131763458252, "learning_rate": 1e-05, "loss": 0.2328, "step": 5190 }, { "epoch": 0.017818837252335296, "grad_norm": 1.0794824361801147, "learning_rate": 1e-05, "loss": 0.2546, "step": 5200 }, { "epoch": 0.017853104247051325, "grad_norm": 1.1870142221450806, "learning_rate": 1e-05, "loss": 0.249, "step": 5210 }, { "epoch": 0.017887371241767355, "grad_norm": 1.0414400100708008, "learning_rate": 1e-05, "loss": 0.2285, "step": 5220 }, { "epoch": 0.017921638236483385, "grad_norm": 1.173405647277832, "learning_rate": 1e-05, "loss": 0.2529, "step": 5230 }, { "epoch": 0.017955905231199415, "grad_norm": 1.039650797843933, "learning_rate": 1e-05, "loss": 0.2321, "step": 5240 }, { "epoch": 0.01799017222591544, "grad_norm": 1.0359266996383667, "learning_rate": 1e-05, "loss": 0.2433, "step": 5250 }, { "epoch": 0.01802443922063147, "grad_norm": 1.0630840063095093, "learning_rate": 1e-05, "loss": 0.2117, "step": 5260 }, { "epoch": 0.0180587062153475, "grad_norm": 1.0937180519104004, "learning_rate": 1e-05, "loss": 0.2454, "step": 5270 }, { "epoch": 0.01809297321006353, "grad_norm": 1.1015993356704712, "learning_rate": 1e-05, "loss": 0.238, "step": 5280 }, { "epoch": 0.01812724020477956, "grad_norm": 1.060584545135498, "learning_rate": 1e-05, "loss": 0.2475, "step": 5290 }, { "epoch": 0.01816150719949559, "grad_norm": 1.1389795541763306, "learning_rate": 1e-05, "loss": 0.233, "step": 5300 }, { "epoch": 0.01819577419421162, "grad_norm": 1.0018917322158813, "learning_rate": 1e-05, "loss": 0.2453, "step": 5310 }, { "epoch": 0.01823004118892765, "grad_norm": 1.0546092987060547, "learning_rate": 1e-05, "loss": 0.2333, "step": 5320 }, { "epoch": 0.018264308183643677, "grad_norm": 1.1121848821640015, "learning_rate": 1e-05, "loss": 0.2317, "step": 5330 }, { "epoch": 0.018298575178359706, "grad_norm": 1.1613191366195679, "learning_rate": 1e-05, "loss": 0.2549, "step": 5340 }, { "epoch": 0.018332842173075736, "grad_norm": 1.1250524520874023, "learning_rate": 1e-05, "loss": 0.2471, "step": 5350 }, { "epoch": 0.018367109167791766, "grad_norm": 1.0905226469039917, "learning_rate": 1e-05, "loss": 0.229, "step": 5360 }, { "epoch": 0.018401376162507796, "grad_norm": 0.9885173439979553, "learning_rate": 1e-05, "loss": 0.2542, "step": 5370 }, { "epoch": 0.018435643157223826, "grad_norm": 1.288758635520935, "learning_rate": 1e-05, "loss": 0.2472, "step": 5380 }, { "epoch": 0.018469910151939856, "grad_norm": 1.2433462142944336, "learning_rate": 1e-05, "loss": 0.2427, "step": 5390 }, { "epoch": 0.018504177146655885, "grad_norm": 1.2367336750030518, "learning_rate": 1e-05, "loss": 0.2511, "step": 5400 }, { "epoch": 0.018538444141371912, "grad_norm": 1.1871395111083984, "learning_rate": 1e-05, "loss": 0.2276, "step": 5410 }, { "epoch": 0.01857271113608794, "grad_norm": 0.9569379091262817, "learning_rate": 1e-05, "loss": 0.2475, "step": 5420 }, { "epoch": 0.01860697813080397, "grad_norm": 1.1487014293670654, "learning_rate": 1e-05, "loss": 0.2295, "step": 5430 }, { "epoch": 0.01864124512552, "grad_norm": 1.0800844430923462, "learning_rate": 1e-05, "loss": 0.2247, "step": 5440 }, { "epoch": 0.01867551212023603, "grad_norm": 1.1834380626678467, "learning_rate": 1e-05, "loss": 0.226, "step": 5450 }, { "epoch": 0.01870977911495206, "grad_norm": 1.0035191774368286, "learning_rate": 1e-05, "loss": 0.2414, "step": 5460 }, { "epoch": 0.01874404610966809, "grad_norm": 1.0685466527938843, "learning_rate": 1e-05, "loss": 0.2449, "step": 5470 }, { "epoch": 0.01877831310438412, "grad_norm": 1.1921565532684326, "learning_rate": 1e-05, "loss": 0.2419, "step": 5480 }, { "epoch": 0.018812580099100147, "grad_norm": 1.1201281547546387, "learning_rate": 1e-05, "loss": 0.255, "step": 5490 }, { "epoch": 0.018846847093816177, "grad_norm": 1.1162866353988647, "learning_rate": 1e-05, "loss": 0.2426, "step": 5500 }, { "epoch": 0.018846847093816177, "eval_cer": 13.238666431469396, "eval_loss": 0.25262224674224854, "eval_normalized_cer": 9.562350119904076, "eval_runtime": 229.0802, "eval_samples_per_second": 2.235, "eval_steps_per_second": 0.035, "step": 5500 }, { "epoch": 0.018881114088532207, "grad_norm": 1.0215845108032227, "learning_rate": 1e-05, "loss": 0.2368, "step": 5510 }, { "epoch": 0.018915381083248237, "grad_norm": 1.0062447786331177, "learning_rate": 1e-05, "loss": 0.2308, "step": 5520 }, { "epoch": 0.018949648077964266, "grad_norm": 1.223649024963379, "learning_rate": 1e-05, "loss": 0.2409, "step": 5530 }, { "epoch": 0.018983915072680296, "grad_norm": 1.2076172828674316, "learning_rate": 1e-05, "loss": 0.2236, "step": 5540 }, { "epoch": 0.019018182067396326, "grad_norm": 1.154416561126709, "learning_rate": 1e-05, "loss": 0.2419, "step": 5550 }, { "epoch": 0.019052449062112356, "grad_norm": 1.284858226776123, "learning_rate": 1e-05, "loss": 0.2321, "step": 5560 }, { "epoch": 0.019086716056828382, "grad_norm": 1.0406948328018188, "learning_rate": 1e-05, "loss": 0.2485, "step": 5570 }, { "epoch": 0.019120983051544412, "grad_norm": 1.1980571746826172, "learning_rate": 1e-05, "loss": 0.2274, "step": 5580 }, { "epoch": 0.019155250046260442, "grad_norm": 1.073560357093811, "learning_rate": 1e-05, "loss": 0.2498, "step": 5590 }, { "epoch": 0.019189517040976472, "grad_norm": 1.0982617139816284, "learning_rate": 1e-05, "loss": 0.2391, "step": 5600 }, { "epoch": 0.019223784035692502, "grad_norm": 1.015085220336914, "learning_rate": 1e-05, "loss": 0.2269, "step": 5610 }, { "epoch": 0.01925805103040853, "grad_norm": 1.238585352897644, "learning_rate": 1e-05, "loss": 0.2428, "step": 5620 }, { "epoch": 0.01929231802512456, "grad_norm": 1.3326079845428467, "learning_rate": 1e-05, "loss": 0.25, "step": 5630 }, { "epoch": 0.01932658501984059, "grad_norm": 1.1263608932495117, "learning_rate": 1e-05, "loss": 0.234, "step": 5640 }, { "epoch": 0.019360852014556618, "grad_norm": 1.083595633506775, "learning_rate": 1e-05, "loss": 0.2504, "step": 5650 }, { "epoch": 0.019395119009272647, "grad_norm": 1.0787022113800049, "learning_rate": 1e-05, "loss": 0.2248, "step": 5660 }, { "epoch": 0.019429386003988677, "grad_norm": 1.312565803527832, "learning_rate": 1e-05, "loss": 0.263, "step": 5670 }, { "epoch": 0.019463652998704707, "grad_norm": 1.0305407047271729, "learning_rate": 1e-05, "loss": 0.2358, "step": 5680 }, { "epoch": 0.019497919993420737, "grad_norm": 1.0905306339263916, "learning_rate": 1e-05, "loss": 0.2358, "step": 5690 }, { "epoch": 0.019532186988136767, "grad_norm": 1.1105730533599854, "learning_rate": 1e-05, "loss": 0.2371, "step": 5700 }, { "epoch": 0.019566453982852797, "grad_norm": 1.1664555072784424, "learning_rate": 1e-05, "loss": 0.244, "step": 5710 }, { "epoch": 0.019600720977568827, "grad_norm": 1.0702719688415527, "learning_rate": 1e-05, "loss": 0.2305, "step": 5720 }, { "epoch": 0.019634987972284856, "grad_norm": 1.0736626386642456, "learning_rate": 1e-05, "loss": 0.2406, "step": 5730 }, { "epoch": 0.019669254967000883, "grad_norm": 1.0510461330413818, "learning_rate": 1e-05, "loss": 0.2335, "step": 5740 }, { "epoch": 0.019703521961716913, "grad_norm": 1.0435370206832886, "learning_rate": 1e-05, "loss": 0.2211, "step": 5750 }, { "epoch": 0.019737788956432942, "grad_norm": 1.2461049556732178, "learning_rate": 1e-05, "loss": 0.2188, "step": 5760 }, { "epoch": 0.019772055951148972, "grad_norm": 1.0351046323776245, "learning_rate": 1e-05, "loss": 0.2269, "step": 5770 }, { "epoch": 0.019806322945865002, "grad_norm": 1.124671459197998, "learning_rate": 1e-05, "loss": 0.2284, "step": 5780 }, { "epoch": 0.019840589940581032, "grad_norm": 1.145488977432251, "learning_rate": 1e-05, "loss": 0.2415, "step": 5790 }, { "epoch": 0.019874856935297062, "grad_norm": 1.1410046815872192, "learning_rate": 1e-05, "loss": 0.2296, "step": 5800 }, { "epoch": 0.01990912393001309, "grad_norm": 1.2782517671585083, "learning_rate": 1e-05, "loss": 0.2367, "step": 5810 }, { "epoch": 0.019943390924729118, "grad_norm": 1.204562783241272, "learning_rate": 1e-05, "loss": 0.2289, "step": 5820 }, { "epoch": 0.019977657919445148, "grad_norm": 1.1141811609268188, "learning_rate": 1e-05, "loss": 0.2223, "step": 5830 }, { "epoch": 0.020011924914161178, "grad_norm": 1.1790316104888916, "learning_rate": 1e-05, "loss": 0.2308, "step": 5840 }, { "epoch": 0.020046191908877208, "grad_norm": 1.0944266319274902, "learning_rate": 1e-05, "loss": 0.2366, "step": 5850 }, { "epoch": 0.020080458903593237, "grad_norm": 1.0892263650894165, "learning_rate": 1e-05, "loss": 0.2384, "step": 5860 }, { "epoch": 0.020114725898309267, "grad_norm": 1.1419873237609863, "learning_rate": 1e-05, "loss": 0.2414, "step": 5870 }, { "epoch": 0.020148992893025297, "grad_norm": 1.2230783700942993, "learning_rate": 1e-05, "loss": 0.2394, "step": 5880 }, { "epoch": 0.020183259887741327, "grad_norm": 1.1309173107147217, "learning_rate": 1e-05, "loss": 0.2561, "step": 5890 }, { "epoch": 0.020217526882457353, "grad_norm": 1.2405802011489868, "learning_rate": 1e-05, "loss": 0.259, "step": 5900 }, { "epoch": 0.020251793877173383, "grad_norm": 1.2853388786315918, "learning_rate": 1e-05, "loss": 0.2668, "step": 5910 }, { "epoch": 0.020286060871889413, "grad_norm": 1.299046277999878, "learning_rate": 1e-05, "loss": 0.251, "step": 5920 }, { "epoch": 0.020320327866605443, "grad_norm": 1.142052173614502, "learning_rate": 1e-05, "loss": 0.2655, "step": 5930 }, { "epoch": 0.020354594861321473, "grad_norm": 1.3770766258239746, "learning_rate": 1e-05, "loss": 0.2508, "step": 5940 }, { "epoch": 0.020388861856037502, "grad_norm": 1.1458237171173096, "learning_rate": 1e-05, "loss": 0.2742, "step": 5950 }, { "epoch": 0.020423128850753532, "grad_norm": 1.3130786418914795, "learning_rate": 1e-05, "loss": 0.2514, "step": 5960 }, { "epoch": 0.020457395845469562, "grad_norm": 1.2816088199615479, "learning_rate": 1e-05, "loss": 0.2593, "step": 5970 }, { "epoch": 0.02049166284018559, "grad_norm": 1.0405460596084595, "learning_rate": 1e-05, "loss": 0.2608, "step": 5980 }, { "epoch": 0.02052592983490162, "grad_norm": 1.2035329341888428, "learning_rate": 1e-05, "loss": 0.2558, "step": 5990 }, { "epoch": 0.020560196829617648, "grad_norm": 1.0495450496673584, "learning_rate": 1e-05, "loss": 0.2468, "step": 6000 }, { "epoch": 0.020560196829617648, "eval_cer": 13.079908273064033, "eval_loss": 0.2540421485900879, "eval_normalized_cer": 9.292565947242206, "eval_runtime": 227.4153, "eval_samples_per_second": 2.251, "eval_steps_per_second": 0.035, "step": 6000 }, { "epoch": 0.020594463824333678, "grad_norm": 1.1614056825637817, "learning_rate": 1e-05, "loss": 0.2527, "step": 6010 }, { "epoch": 0.020628730819049708, "grad_norm": 1.1835705041885376, "learning_rate": 1e-05, "loss": 0.2592, "step": 6020 }, { "epoch": 0.020662997813765738, "grad_norm": 1.1335136890411377, "learning_rate": 1e-05, "loss": 0.2727, "step": 6030 }, { "epoch": 0.020697264808481768, "grad_norm": 1.052079439163208, "learning_rate": 1e-05, "loss": 0.2514, "step": 6040 }, { "epoch": 0.020731531803197797, "grad_norm": 1.096330165863037, "learning_rate": 1e-05, "loss": 0.2684, "step": 6050 }, { "epoch": 0.020765798797913824, "grad_norm": 1.2359880208969116, "learning_rate": 1e-05, "loss": 0.2638, "step": 6060 }, { "epoch": 0.020800065792629854, "grad_norm": 1.2259430885314941, "learning_rate": 1e-05, "loss": 0.2488, "step": 6070 }, { "epoch": 0.020834332787345883, "grad_norm": 1.0531619787216187, "learning_rate": 1e-05, "loss": 0.2584, "step": 6080 }, { "epoch": 0.020868599782061913, "grad_norm": 1.1754058599472046, "learning_rate": 1e-05, "loss": 0.254, "step": 6090 }, { "epoch": 0.020902866776777943, "grad_norm": 1.0922538042068481, "learning_rate": 1e-05, "loss": 0.2522, "step": 6100 }, { "epoch": 0.020937133771493973, "grad_norm": 1.1970179080963135, "learning_rate": 1e-05, "loss": 0.267, "step": 6110 }, { "epoch": 0.020971400766210003, "grad_norm": 1.2625236511230469, "learning_rate": 1e-05, "loss": 0.2379, "step": 6120 }, { "epoch": 0.021005667760926033, "grad_norm": 1.152846336364746, "learning_rate": 1e-05, "loss": 0.2429, "step": 6130 }, { "epoch": 0.02103993475564206, "grad_norm": 1.1184160709381104, "learning_rate": 1e-05, "loss": 0.2566, "step": 6140 }, { "epoch": 0.02107420175035809, "grad_norm": 1.1153484582901, "learning_rate": 1e-05, "loss": 0.2583, "step": 6150 }, { "epoch": 0.02110846874507412, "grad_norm": 1.2822504043579102, "learning_rate": 1e-05, "loss": 0.2535, "step": 6160 }, { "epoch": 0.02114273573979015, "grad_norm": 1.1332992315292358, "learning_rate": 1e-05, "loss": 0.2799, "step": 6170 }, { "epoch": 0.02117700273450618, "grad_norm": 1.0284112691879272, "learning_rate": 1e-05, "loss": 0.2458, "step": 6180 }, { "epoch": 0.02121126972922221, "grad_norm": 1.1097975969314575, "learning_rate": 1e-05, "loss": 0.2513, "step": 6190 }, { "epoch": 0.021245536723938238, "grad_norm": 1.168990969657898, "learning_rate": 1e-05, "loss": 0.2843, "step": 6200 }, { "epoch": 0.021279803718654268, "grad_norm": 0.9956926107406616, "learning_rate": 1e-05, "loss": 0.247, "step": 6210 }, { "epoch": 0.021314070713370294, "grad_norm": 1.2191492319107056, "learning_rate": 1e-05, "loss": 0.2608, "step": 6220 }, { "epoch": 0.021348337708086324, "grad_norm": 1.0872688293457031, "learning_rate": 1e-05, "loss": 0.2463, "step": 6230 }, { "epoch": 0.021382604702802354, "grad_norm": 1.0746614933013916, "learning_rate": 1e-05, "loss": 0.244, "step": 6240 }, { "epoch": 0.021416871697518384, "grad_norm": 1.1560328006744385, "learning_rate": 1e-05, "loss": 0.2639, "step": 6250 }, { "epoch": 0.021451138692234414, "grad_norm": 1.1529641151428223, "learning_rate": 1e-05, "loss": 0.2585, "step": 6260 }, { "epoch": 0.021485405686950444, "grad_norm": 1.0708386898040771, "learning_rate": 1e-05, "loss": 0.2669, "step": 6270 }, { "epoch": 0.021519672681666473, "grad_norm": 1.208079218864441, "learning_rate": 1e-05, "loss": 0.2436, "step": 6280 }, { "epoch": 0.021553939676382503, "grad_norm": 1.1871508359909058, "learning_rate": 1e-05, "loss": 0.2655, "step": 6290 }, { "epoch": 0.02158820667109853, "grad_norm": 1.0997953414916992, "learning_rate": 1e-05, "loss": 0.2578, "step": 6300 }, { "epoch": 0.02162247366581456, "grad_norm": 1.2404417991638184, "learning_rate": 1e-05, "loss": 0.2726, "step": 6310 }, { "epoch": 0.02165674066053059, "grad_norm": 1.1724058389663696, "learning_rate": 1e-05, "loss": 0.2611, "step": 6320 }, { "epoch": 0.02169100765524662, "grad_norm": 1.124932885169983, "learning_rate": 1e-05, "loss": 0.2582, "step": 6330 }, { "epoch": 0.02172527464996265, "grad_norm": 1.129584550857544, "learning_rate": 1e-05, "loss": 0.2651, "step": 6340 }, { "epoch": 0.02175954164467868, "grad_norm": 1.1869479417800903, "learning_rate": 1e-05, "loss": 0.2451, "step": 6350 }, { "epoch": 0.02179380863939471, "grad_norm": 1.1753504276275635, "learning_rate": 1e-05, "loss": 0.2509, "step": 6360 }, { "epoch": 0.02182807563411074, "grad_norm": 1.1704761981964111, "learning_rate": 1e-05, "loss": 0.2614, "step": 6370 }, { "epoch": 0.02186234262882677, "grad_norm": 1.347970724105835, "learning_rate": 1e-05, "loss": 0.253, "step": 6380 }, { "epoch": 0.021896609623542795, "grad_norm": 1.0677597522735596, "learning_rate": 1e-05, "loss": 0.2539, "step": 6390 }, { "epoch": 0.021930876618258825, "grad_norm": 1.1567541360855103, "learning_rate": 1e-05, "loss": 0.2621, "step": 6400 }, { "epoch": 0.021965143612974854, "grad_norm": 1.1231553554534912, "learning_rate": 1e-05, "loss": 0.2453, "step": 6410 }, { "epoch": 0.021999410607690884, "grad_norm": 1.0485198497772217, "learning_rate": 1e-05, "loss": 0.2503, "step": 6420 }, { "epoch": 0.022033677602406914, "grad_norm": 1.12228262424469, "learning_rate": 1e-05, "loss": 0.2488, "step": 6430 }, { "epoch": 0.022067944597122944, "grad_norm": 1.2610136270523071, "learning_rate": 1e-05, "loss": 0.2445, "step": 6440 }, { "epoch": 0.022102211591838974, "grad_norm": 0.9546436071395874, "learning_rate": 1e-05, "loss": 0.226, "step": 6450 }, { "epoch": 0.022136478586555004, "grad_norm": 1.3363466262817383, "learning_rate": 1e-05, "loss": 0.2489, "step": 6460 }, { "epoch": 0.02217074558127103, "grad_norm": 1.1454704999923706, "learning_rate": 1e-05, "loss": 0.2434, "step": 6470 }, { "epoch": 0.02220501257598706, "grad_norm": 1.1578549146652222, "learning_rate": 1e-05, "loss": 0.2549, "step": 6480 }, { "epoch": 0.02223927957070309, "grad_norm": 1.096081018447876, "learning_rate": 1e-05, "loss": 0.2472, "step": 6490 }, { "epoch": 0.02227354656541912, "grad_norm": 1.2388731241226196, "learning_rate": 1e-05, "loss": 0.2457, "step": 6500 }, { "epoch": 0.02227354656541912, "eval_cer": 12.929970012347859, "eval_loss": 0.24839338660240173, "eval_normalized_cer": 9.242605915267786, "eval_runtime": 227.6401, "eval_samples_per_second": 2.249, "eval_steps_per_second": 0.035, "step": 6500 }, { "epoch": 0.02230781356013515, "grad_norm": 1.0306715965270996, "learning_rate": 1e-05, "loss": 0.2393, "step": 6510 }, { "epoch": 0.02234208055485118, "grad_norm": 1.1339504718780518, "learning_rate": 1e-05, "loss": 0.2563, "step": 6520 }, { "epoch": 0.02237634754956721, "grad_norm": 0.912266731262207, "learning_rate": 1e-05, "loss": 0.2465, "step": 6530 }, { "epoch": 0.02241061454428324, "grad_norm": 1.1917020082473755, "learning_rate": 1e-05, "loss": 0.2395, "step": 6540 }, { "epoch": 0.022444881538999265, "grad_norm": 1.248515248298645, "learning_rate": 1e-05, "loss": 0.2479, "step": 6550 }, { "epoch": 0.022479148533715295, "grad_norm": 1.180799961090088, "learning_rate": 1e-05, "loss": 0.2616, "step": 6560 }, { "epoch": 0.022513415528431325, "grad_norm": 1.0700205564498901, "learning_rate": 1e-05, "loss": 0.2401, "step": 6570 }, { "epoch": 0.022547682523147355, "grad_norm": 1.1814614534378052, "learning_rate": 1e-05, "loss": 0.2471, "step": 6580 }, { "epoch": 0.022581949517863385, "grad_norm": 1.3973134756088257, "learning_rate": 1e-05, "loss": 0.2383, "step": 6590 }, { "epoch": 0.022616216512579414, "grad_norm": 1.244265079498291, "learning_rate": 1e-05, "loss": 0.2548, "step": 6600 }, { "epoch": 0.022650483507295444, "grad_norm": 1.1685833930969238, "learning_rate": 1e-05, "loss": 0.2499, "step": 6610 }, { "epoch": 0.022684750502011474, "grad_norm": 1.1566667556762695, "learning_rate": 1e-05, "loss": 0.2443, "step": 6620 }, { "epoch": 0.0227190174967275, "grad_norm": 1.0241929292678833, "learning_rate": 1e-05, "loss": 0.2412, "step": 6630 }, { "epoch": 0.02275328449144353, "grad_norm": 1.0359474420547485, "learning_rate": 1e-05, "loss": 0.2374, "step": 6640 }, { "epoch": 0.02278755148615956, "grad_norm": 1.040810227394104, "learning_rate": 1e-05, "loss": 0.2254, "step": 6650 }, { "epoch": 0.02282181848087559, "grad_norm": 1.0343252420425415, "learning_rate": 1e-05, "loss": 0.2366, "step": 6660 }, { "epoch": 0.02285608547559162, "grad_norm": 1.052739143371582, "learning_rate": 1e-05, "loss": 0.2273, "step": 6670 }, { "epoch": 0.02289035247030765, "grad_norm": 1.0414966344833374, "learning_rate": 1e-05, "loss": 0.2082, "step": 6680 }, { "epoch": 0.02292461946502368, "grad_norm": 1.2340532541275024, "learning_rate": 1e-05, "loss": 0.2241, "step": 6690 }, { "epoch": 0.02295888645973971, "grad_norm": 0.9693310260772705, "learning_rate": 1e-05, "loss": 0.2322, "step": 6700 }, { "epoch": 0.022993153454455736, "grad_norm": 1.103025197982788, "learning_rate": 1e-05, "loss": 0.236, "step": 6710 }, { "epoch": 0.023027420449171766, "grad_norm": 1.119689702987671, "learning_rate": 1e-05, "loss": 0.214, "step": 6720 }, { "epoch": 0.023061687443887795, "grad_norm": 0.93172287940979, "learning_rate": 1e-05, "loss": 0.2094, "step": 6730 }, { "epoch": 0.023095954438603825, "grad_norm": 1.0207446813583374, "learning_rate": 1e-05, "loss": 0.2238, "step": 6740 }, { "epoch": 0.023130221433319855, "grad_norm": 1.200201392173767, "learning_rate": 1e-05, "loss": 0.218, "step": 6750 }, { "epoch": 0.023164488428035885, "grad_norm": 1.1485291719436646, "learning_rate": 1e-05, "loss": 0.2314, "step": 6760 }, { "epoch": 0.023198755422751915, "grad_norm": 1.2236285209655762, "learning_rate": 1e-05, "loss": 0.2326, "step": 6770 }, { "epoch": 0.023233022417467945, "grad_norm": 1.1756523847579956, "learning_rate": 1e-05, "loss": 0.2122, "step": 6780 }, { "epoch": 0.02326728941218397, "grad_norm": 1.0356839895248413, "learning_rate": 1e-05, "loss": 0.2078, "step": 6790 }, { "epoch": 0.0233015564069, "grad_norm": 1.1896883249282837, "learning_rate": 1e-05, "loss": 0.2072, "step": 6800 }, { "epoch": 0.02333582340161603, "grad_norm": 1.1080976724624634, "learning_rate": 1e-05, "loss": 0.2127, "step": 6810 }, { "epoch": 0.02337009039633206, "grad_norm": 1.128263235092163, "learning_rate": 1e-05, "loss": 0.2282, "step": 6820 }, { "epoch": 0.02340435739104809, "grad_norm": 1.0398188829421997, "learning_rate": 1e-05, "loss": 0.2095, "step": 6830 }, { "epoch": 0.02343862438576412, "grad_norm": 1.1791975498199463, "learning_rate": 1e-05, "loss": 0.2216, "step": 6840 }, { "epoch": 0.02347289138048015, "grad_norm": 1.1444710493087769, "learning_rate": 1e-05, "loss": 0.2447, "step": 6850 }, { "epoch": 0.02350715837519618, "grad_norm": 1.136607050895691, "learning_rate": 1e-05, "loss": 0.2093, "step": 6860 }, { "epoch": 0.023541425369912206, "grad_norm": 1.0915231704711914, "learning_rate": 1e-05, "loss": 0.2128, "step": 6870 }, { "epoch": 0.023575692364628236, "grad_norm": 1.0416276454925537, "learning_rate": 1e-05, "loss": 0.2092, "step": 6880 }, { "epoch": 0.023609959359344266, "grad_norm": 1.3693732023239136, "learning_rate": 1e-05, "loss": 0.2137, "step": 6890 }, { "epoch": 0.023644226354060296, "grad_norm": 1.1747677326202393, "learning_rate": 1e-05, "loss": 0.2215, "step": 6900 }, { "epoch": 0.023678493348776326, "grad_norm": 1.1593588590621948, "learning_rate": 1e-05, "loss": 0.2234, "step": 6910 }, { "epoch": 0.023712760343492355, "grad_norm": 1.2322016954421997, "learning_rate": 1e-05, "loss": 0.2437, "step": 6920 }, { "epoch": 0.023747027338208385, "grad_norm": 1.167648196220398, "learning_rate": 1e-05, "loss": 0.2461, "step": 6930 }, { "epoch": 0.023781294332924415, "grad_norm": 1.0984666347503662, "learning_rate": 1e-05, "loss": 0.2584, "step": 6940 }, { "epoch": 0.023815561327640445, "grad_norm": 1.1234291791915894, "learning_rate": 1e-05, "loss": 0.2532, "step": 6950 }, { "epoch": 0.02384982832235647, "grad_norm": 1.2158063650131226, "learning_rate": 1e-05, "loss": 0.2567, "step": 6960 }, { "epoch": 0.0238840953170725, "grad_norm": 1.0958101749420166, "learning_rate": 1e-05, "loss": 0.2387, "step": 6970 }, { "epoch": 0.02391836231178853, "grad_norm": 1.1536844968795776, "learning_rate": 1e-05, "loss": 0.2712, "step": 6980 }, { "epoch": 0.02395262930650456, "grad_norm": 1.2437007427215576, "learning_rate": 1e-05, "loss": 0.2563, "step": 6990 }, { "epoch": 0.02398689630122059, "grad_norm": 1.0884592533111572, "learning_rate": 1e-05, "loss": 0.2379, "step": 7000 }, { "epoch": 0.02398689630122059, "eval_cer": 13.079908273064033, "eval_loss": 0.2514401376247406, "eval_normalized_cer": 9.622302158273381, "eval_runtime": 227.6705, "eval_samples_per_second": 2.249, "eval_steps_per_second": 0.035, "step": 7000 }, { "epoch": 0.02402116329593662, "grad_norm": 1.2332980632781982, "learning_rate": 1e-05, "loss": 0.2543, "step": 7010 }, { "epoch": 0.02405543029065265, "grad_norm": 1.1041260957717896, "learning_rate": 1e-05, "loss": 0.2663, "step": 7020 }, { "epoch": 0.02408969728536868, "grad_norm": 1.1479183435440063, "learning_rate": 1e-05, "loss": 0.2528, "step": 7030 }, { "epoch": 0.024123964280084707, "grad_norm": 1.103766918182373, "learning_rate": 1e-05, "loss": 0.2336, "step": 7040 }, { "epoch": 0.024158231274800736, "grad_norm": 1.238996148109436, "learning_rate": 1e-05, "loss": 0.2436, "step": 7050 }, { "epoch": 0.024192498269516766, "grad_norm": 1.2652095556259155, "learning_rate": 1e-05, "loss": 0.2464, "step": 7060 }, { "epoch": 0.024226765264232796, "grad_norm": 1.180665373802185, "learning_rate": 1e-05, "loss": 0.2541, "step": 7070 }, { "epoch": 0.024261032258948826, "grad_norm": 1.1601506471633911, "learning_rate": 1e-05, "loss": 0.2508, "step": 7080 }, { "epoch": 0.024295299253664856, "grad_norm": 1.257034420967102, "learning_rate": 1e-05, "loss": 0.2446, "step": 7090 }, { "epoch": 0.024329566248380886, "grad_norm": 1.0813285112380981, "learning_rate": 1e-05, "loss": 0.2546, "step": 7100 }, { "epoch": 0.024363833243096916, "grad_norm": 1.1124157905578613, "learning_rate": 1e-05, "loss": 0.2379, "step": 7110 }, { "epoch": 0.024398100237812942, "grad_norm": 1.0615211725234985, "learning_rate": 1e-05, "loss": 0.253, "step": 7120 }, { "epoch": 0.024432367232528972, "grad_norm": 1.185677409172058, "learning_rate": 1e-05, "loss": 0.2383, "step": 7130 }, { "epoch": 0.024466634227245, "grad_norm": 1.1810061931610107, "learning_rate": 1e-05, "loss": 0.2603, "step": 7140 }, { "epoch": 0.02450090122196103, "grad_norm": 1.155860424041748, "learning_rate": 1e-05, "loss": 0.2434, "step": 7150 }, { "epoch": 0.02453516821667706, "grad_norm": 1.113008737564087, "learning_rate": 1e-05, "loss": 0.2529, "step": 7160 }, { "epoch": 0.02456943521139309, "grad_norm": 1.1276872158050537, "learning_rate": 1e-05, "loss": 0.2265, "step": 7170 }, { "epoch": 0.02460370220610912, "grad_norm": 1.149792194366455, "learning_rate": 1e-05, "loss": 0.2349, "step": 7180 }, { "epoch": 0.02463796920082515, "grad_norm": 1.1619532108306885, "learning_rate": 1e-05, "loss": 0.2336, "step": 7190 }, { "epoch": 0.024672236195541177, "grad_norm": 1.0760303735733032, "learning_rate": 1e-05, "loss": 0.2315, "step": 7200 }, { "epoch": 0.024706503190257207, "grad_norm": 1.2807782888412476, "learning_rate": 1e-05, "loss": 0.2382, "step": 7210 }, { "epoch": 0.024740770184973237, "grad_norm": 1.0910037755966187, "learning_rate": 1e-05, "loss": 0.2333, "step": 7220 }, { "epoch": 0.024775037179689267, "grad_norm": 1.2938390970230103, "learning_rate": 1e-05, "loss": 0.2147, "step": 7230 }, { "epoch": 0.024809304174405297, "grad_norm": 1.185542106628418, "learning_rate": 1e-05, "loss": 0.2232, "step": 7240 }, { "epoch": 0.024843571169121326, "grad_norm": 1.0598995685577393, "learning_rate": 1e-05, "loss": 0.2278, "step": 7250 }, { "epoch": 0.024877838163837356, "grad_norm": 1.1860477924346924, "learning_rate": 1e-05, "loss": 0.2179, "step": 7260 }, { "epoch": 0.024912105158553386, "grad_norm": 1.1935844421386719, "learning_rate": 1e-05, "loss": 0.238, "step": 7270 }, { "epoch": 0.024946372153269412, "grad_norm": 1.0449039936065674, "learning_rate": 1e-05, "loss": 0.2307, "step": 7280 }, { "epoch": 0.024980639147985442, "grad_norm": 1.0651369094848633, "learning_rate": 1e-05, "loss": 0.2379, "step": 7290 }, { "epoch": 0.025014906142701472, "grad_norm": 1.0416852235794067, "learning_rate": 1e-05, "loss": 0.2208, "step": 7300 }, { "epoch": 0.025049173137417502, "grad_norm": 1.0064860582351685, "learning_rate": 1e-05, "loss": 0.2227, "step": 7310 }, { "epoch": 0.025083440132133532, "grad_norm": 1.0357342958450317, "learning_rate": 1e-05, "loss": 0.22, "step": 7320 }, { "epoch": 0.02511770712684956, "grad_norm": 1.019918441772461, "learning_rate": 1e-05, "loss": 0.2396, "step": 7330 }, { "epoch": 0.02515197412156559, "grad_norm": 1.0327798128128052, "learning_rate": 1e-05, "loss": 0.2118, "step": 7340 }, { "epoch": 0.02518624111628162, "grad_norm": 0.9973874092102051, "learning_rate": 1e-05, "loss": 0.2275, "step": 7350 }, { "epoch": 0.025220508110997648, "grad_norm": 1.093544840812683, "learning_rate": 1e-05, "loss": 0.2214, "step": 7360 }, { "epoch": 0.025254775105713678, "grad_norm": 1.118829369544983, "learning_rate": 1e-05, "loss": 0.237, "step": 7370 }, { "epoch": 0.025289042100429707, "grad_norm": 1.2009224891662598, "learning_rate": 1e-05, "loss": 0.2447, "step": 7380 }, { "epoch": 0.025323309095145737, "grad_norm": 1.1427584886550903, "learning_rate": 1e-05, "loss": 0.234, "step": 7390 }, { "epoch": 0.025357576089861767, "grad_norm": 0.9685842394828796, "learning_rate": 1e-05, "loss": 0.2231, "step": 7400 }, { "epoch": 0.025391843084577797, "grad_norm": 1.165501356124878, "learning_rate": 1e-05, "loss": 0.2139, "step": 7410 }, { "epoch": 0.025426110079293827, "grad_norm": 1.4023411273956299, "learning_rate": 1e-05, "loss": 0.236, "step": 7420 }, { "epoch": 0.025460377074009857, "grad_norm": 1.218546748161316, "learning_rate": 1e-05, "loss": 0.2433, "step": 7430 }, { "epoch": 0.025494644068725883, "grad_norm": 1.4930671453475952, "learning_rate": 1e-05, "loss": 0.2466, "step": 7440 }, { "epoch": 0.025528911063441913, "grad_norm": 1.145317554473877, "learning_rate": 1e-05, "loss": 0.2535, "step": 7450 }, { "epoch": 0.025563178058157943, "grad_norm": 1.2366299629211426, "learning_rate": 1e-05, "loss": 0.2606, "step": 7460 }, { "epoch": 0.025597445052873972, "grad_norm": 1.0542744398117065, "learning_rate": 1e-05, "loss": 0.2493, "step": 7470 }, { "epoch": 0.025631712047590002, "grad_norm": 1.2272337675094604, "learning_rate": 1e-05, "loss": 0.2537, "step": 7480 }, { "epoch": 0.025665979042306032, "grad_norm": 1.169912576675415, "learning_rate": 1e-05, "loss": 0.2581, "step": 7490 }, { "epoch": 0.025700246037022062, "grad_norm": 1.1997913122177124, "learning_rate": 1e-05, "loss": 0.2547, "step": 7500 }, { "epoch": 0.025700246037022062, "eval_cer": 12.859410830834362, "eval_loss": 0.2470153123140335, "eval_normalized_cer": 9.162669864108713, "eval_runtime": 227.7782, "eval_samples_per_second": 2.248, "eval_steps_per_second": 0.035, "step": 7500 }, { "epoch": 0.025734513031738092, "grad_norm": 1.0920944213867188, "learning_rate": 1e-05, "loss": 0.2498, "step": 7510 }, { "epoch": 0.025768780026454118, "grad_norm": 1.349660038948059, "learning_rate": 1e-05, "loss": 0.2591, "step": 7520 }, { "epoch": 0.025803047021170148, "grad_norm": 1.0097490549087524, "learning_rate": 1e-05, "loss": 0.2514, "step": 7530 }, { "epoch": 0.025837314015886178, "grad_norm": 1.118241548538208, "learning_rate": 1e-05, "loss": 0.2603, "step": 7540 }, { "epoch": 0.025871581010602208, "grad_norm": 1.078802466392517, "learning_rate": 1e-05, "loss": 0.2532, "step": 7550 }, { "epoch": 0.025905848005318238, "grad_norm": 1.0794482231140137, "learning_rate": 1e-05, "loss": 0.2521, "step": 7560 }, { "epoch": 0.025940115000034267, "grad_norm": 1.130106806755066, "learning_rate": 1e-05, "loss": 0.2574, "step": 7570 }, { "epoch": 0.025974381994750297, "grad_norm": 1.112724781036377, "learning_rate": 1e-05, "loss": 0.253, "step": 7580 }, { "epoch": 0.026008648989466327, "grad_norm": 1.2646088600158691, "learning_rate": 1e-05, "loss": 0.2548, "step": 7590 }, { "epoch": 0.026042915984182357, "grad_norm": 1.1961979866027832, "learning_rate": 1e-05, "loss": 0.2548, "step": 7600 }, { "epoch": 0.026077182978898383, "grad_norm": 1.2568695545196533, "learning_rate": 1e-05, "loss": 0.245, "step": 7610 }, { "epoch": 0.026111449973614413, "grad_norm": 1.0233054161071777, "learning_rate": 1e-05, "loss": 0.2429, "step": 7620 }, { "epoch": 0.026145716968330443, "grad_norm": 1.4355731010437012, "learning_rate": 1e-05, "loss": 0.2623, "step": 7630 }, { "epoch": 0.026179983963046473, "grad_norm": 0.9781149625778198, "learning_rate": 1e-05, "loss": 0.2436, "step": 7640 }, { "epoch": 0.026214250957762503, "grad_norm": 1.085255742073059, "learning_rate": 1e-05, "loss": 0.2475, "step": 7650 }, { "epoch": 0.026248517952478533, "grad_norm": 1.0647081136703491, "learning_rate": 1e-05, "loss": 0.2596, "step": 7660 }, { "epoch": 0.026282784947194562, "grad_norm": 1.3411939144134521, "learning_rate": 1e-05, "loss": 0.2444, "step": 7670 }, { "epoch": 0.026317051941910592, "grad_norm": 1.0778676271438599, "learning_rate": 1e-05, "loss": 0.2499, "step": 7680 }, { "epoch": 0.02635131893662662, "grad_norm": 1.1606541872024536, "learning_rate": 1e-05, "loss": 0.2537, "step": 7690 }, { "epoch": 0.02638558593134265, "grad_norm": 1.0706511735916138, "learning_rate": 1e-05, "loss": 0.2324, "step": 7700 }, { "epoch": 0.02641985292605868, "grad_norm": 1.2074836492538452, "learning_rate": 1e-05, "loss": 0.2487, "step": 7710 }, { "epoch": 0.026454119920774708, "grad_norm": 1.0147804021835327, "learning_rate": 1e-05, "loss": 0.2202, "step": 7720 }, { "epoch": 0.026488386915490738, "grad_norm": 1.1806961297988892, "learning_rate": 1e-05, "loss": 0.2464, "step": 7730 }, { "epoch": 0.026522653910206768, "grad_norm": 1.1552751064300537, "learning_rate": 1e-05, "loss": 0.2244, "step": 7740 }, { "epoch": 0.026556920904922798, "grad_norm": 1.115871548652649, "learning_rate": 1e-05, "loss": 0.2389, "step": 7750 }, { "epoch": 0.026591187899638827, "grad_norm": 1.0924640893936157, "learning_rate": 1e-05, "loss": 0.2237, "step": 7760 }, { "epoch": 0.026625454894354854, "grad_norm": 1.021644115447998, "learning_rate": 1e-05, "loss": 0.2257, "step": 7770 }, { "epoch": 0.026659721889070884, "grad_norm": 1.1757131814956665, "learning_rate": 1e-05, "loss": 0.2278, "step": 7780 }, { "epoch": 0.026693988883786914, "grad_norm": 1.1914074420928955, "learning_rate": 1e-05, "loss": 0.2266, "step": 7790 }, { "epoch": 0.026728255878502943, "grad_norm": 1.0416505336761475, "learning_rate": 1e-05, "loss": 0.2273, "step": 7800 }, { "epoch": 0.026762522873218973, "grad_norm": 1.0241059064865112, "learning_rate": 1e-05, "loss": 0.2342, "step": 7810 }, { "epoch": 0.026796789867935003, "grad_norm": 1.133334994316101, "learning_rate": 1e-05, "loss": 0.2303, "step": 7820 }, { "epoch": 0.026831056862651033, "grad_norm": 1.1711792945861816, "learning_rate": 1e-05, "loss": 0.2333, "step": 7830 }, { "epoch": 0.026865323857367063, "grad_norm": 1.1120338439941406, "learning_rate": 1e-05, "loss": 0.2474, "step": 7840 }, { "epoch": 0.02689959085208309, "grad_norm": 1.1995311975479126, "learning_rate": 1e-05, "loss": 0.2472, "step": 7850 }, { "epoch": 0.02693385784679912, "grad_norm": 1.1725718975067139, "learning_rate": 1e-05, "loss": 0.2361, "step": 7860 }, { "epoch": 0.02696812484151515, "grad_norm": 0.9564438462257385, "learning_rate": 1e-05, "loss": 0.2266, "step": 7870 }, { "epoch": 0.02700239183623118, "grad_norm": 1.140692114830017, "learning_rate": 1e-05, "loss": 0.2319, "step": 7880 }, { "epoch": 0.02703665883094721, "grad_norm": 1.0812654495239258, "learning_rate": 1e-05, "loss": 0.2434, "step": 7890 }, { "epoch": 0.02707092582566324, "grad_norm": 1.179500937461853, "learning_rate": 1e-05, "loss": 0.2191, "step": 7900 }, { "epoch": 0.027105192820379268, "grad_norm": 1.1073647737503052, "learning_rate": 1e-05, "loss": 0.2315, "step": 7910 }, { "epoch": 0.027139459815095298, "grad_norm": 1.093070387840271, "learning_rate": 1e-05, "loss": 0.2256, "step": 7920 }, { "epoch": 0.027173726809811324, "grad_norm": 1.2253212928771973, "learning_rate": 1e-05, "loss": 0.2413, "step": 7930 }, { "epoch": 0.027207993804527354, "grad_norm": 1.1531736850738525, "learning_rate": 1e-05, "loss": 0.2514, "step": 7940 }, { "epoch": 0.027242260799243384, "grad_norm": 1.0366076231002808, "learning_rate": 1e-05, "loss": 0.2475, "step": 7950 }, { "epoch": 0.027276527793959414, "grad_norm": 1.1657369136810303, "learning_rate": 1e-05, "loss": 0.2475, "step": 7960 }, { "epoch": 0.027310794788675444, "grad_norm": 1.3050105571746826, "learning_rate": 1e-05, "loss": 0.2704, "step": 7970 }, { "epoch": 0.027345061783391474, "grad_norm": 1.1378298997879028, "learning_rate": 1e-05, "loss": 0.2481, "step": 7980 }, { "epoch": 0.027379328778107503, "grad_norm": 1.1434043645858765, "learning_rate": 1e-05, "loss": 0.2671, "step": 7990 }, { "epoch": 0.027413595772823533, "grad_norm": 1.0899518728256226, "learning_rate": 1e-05, "loss": 0.2573, "step": 8000 }, { "epoch": 0.027413595772823533, "eval_cer": 12.903510319280295, "eval_loss": 0.2475583553314209, "eval_normalized_cer": 9.362509992006395, "eval_runtime": 228.4278, "eval_samples_per_second": 2.241, "eval_steps_per_second": 0.035, "step": 8000 }, { "epoch": 0.02744786276753956, "grad_norm": 0.970212459564209, "learning_rate": 1e-05, "loss": 0.238, "step": 8010 }, { "epoch": 0.02748212976225559, "grad_norm": 1.0460257530212402, "learning_rate": 1e-05, "loss": 0.2606, "step": 8020 }, { "epoch": 0.02751639675697162, "grad_norm": 1.116742491722107, "learning_rate": 1e-05, "loss": 0.2571, "step": 8030 }, { "epoch": 0.02755066375168765, "grad_norm": 1.2562140226364136, "learning_rate": 1e-05, "loss": 0.2561, "step": 8040 }, { "epoch": 0.02758493074640368, "grad_norm": 1.2499713897705078, "learning_rate": 1e-05, "loss": 0.2683, "step": 8050 }, { "epoch": 0.02761919774111971, "grad_norm": 1.151715874671936, "learning_rate": 1e-05, "loss": 0.2463, "step": 8060 }, { "epoch": 0.02765346473583574, "grad_norm": 1.2527892589569092, "learning_rate": 1e-05, "loss": 0.261, "step": 8070 }, { "epoch": 0.02768773173055177, "grad_norm": 1.1776025295257568, "learning_rate": 1e-05, "loss": 0.2616, "step": 8080 }, { "epoch": 0.027721998725267795, "grad_norm": 1.1632285118103027, "learning_rate": 1e-05, "loss": 0.2508, "step": 8090 }, { "epoch": 0.027756265719983825, "grad_norm": 1.3266422748565674, "learning_rate": 1e-05, "loss": 0.2667, "step": 8100 }, { "epoch": 0.027790532714699855, "grad_norm": 1.240424633026123, "learning_rate": 1e-05, "loss": 0.2582, "step": 8110 }, { "epoch": 0.027824799709415884, "grad_norm": 1.1874525547027588, "learning_rate": 1e-05, "loss": 0.2505, "step": 8120 }, { "epoch": 0.027859066704131914, "grad_norm": 1.0850279331207275, "learning_rate": 1e-05, "loss": 0.2556, "step": 8130 }, { "epoch": 0.027893333698847944, "grad_norm": 1.203342318534851, "learning_rate": 1e-05, "loss": 0.2526, "step": 8140 }, { "epoch": 0.027927600693563974, "grad_norm": 0.9685319066047668, "learning_rate": 1e-05, "loss": 0.2614, "step": 8150 }, { "epoch": 0.027961867688280004, "grad_norm": 1.020749807357788, "learning_rate": 1e-05, "loss": 0.2763, "step": 8160 }, { "epoch": 0.02799613468299603, "grad_norm": 1.1530399322509766, "learning_rate": 1e-05, "loss": 0.2544, "step": 8170 }, { "epoch": 0.02803040167771206, "grad_norm": 1.0800687074661255, "learning_rate": 1e-05, "loss": 0.2628, "step": 8180 }, { "epoch": 0.02806466867242809, "grad_norm": 1.1825618743896484, "learning_rate": 1e-05, "loss": 0.2524, "step": 8190 }, { "epoch": 0.02809893566714412, "grad_norm": 1.176870346069336, "learning_rate": 1e-05, "loss": 0.2401, "step": 8200 }, { "epoch": 0.02813320266186015, "grad_norm": 1.19720458984375, "learning_rate": 1e-05, "loss": 0.2801, "step": 8210 }, { "epoch": 0.02816746965657618, "grad_norm": 1.0634618997573853, "learning_rate": 1e-05, "loss": 0.2607, "step": 8220 }, { "epoch": 0.02820173665129221, "grad_norm": 1.1780894994735718, "learning_rate": 1e-05, "loss": 0.2558, "step": 8230 }, { "epoch": 0.02823600364600824, "grad_norm": 1.18949294090271, "learning_rate": 1e-05, "loss": 0.2432, "step": 8240 }, { "epoch": 0.02827027064072427, "grad_norm": 1.3350197076797485, "learning_rate": 1e-05, "loss": 0.2644, "step": 8250 }, { "epoch": 0.028304537635440295, "grad_norm": 1.1507694721221924, "learning_rate": 1e-05, "loss": 0.254, "step": 8260 }, { "epoch": 0.028338804630156325, "grad_norm": 1.0806615352630615, "learning_rate": 1e-05, "loss": 0.2479, "step": 8270 }, { "epoch": 0.028373071624872355, "grad_norm": 1.1201471090316772, "learning_rate": 1e-05, "loss": 0.2553, "step": 8280 }, { "epoch": 0.028407338619588385, "grad_norm": 1.0681666135787964, "learning_rate": 1e-05, "loss": 0.258, "step": 8290 }, { "epoch": 0.028441605614304415, "grad_norm": 1.0958445072174072, "learning_rate": 1e-05, "loss": 0.2502, "step": 8300 }, { "epoch": 0.028475872609020444, "grad_norm": 1.165635585784912, "learning_rate": 1e-05, "loss": 0.2642, "step": 8310 }, { "epoch": 0.028510139603736474, "grad_norm": 0.9674690961837769, "learning_rate": 1e-05, "loss": 0.2385, "step": 8320 }, { "epoch": 0.028544406598452504, "grad_norm": 1.239996314048767, "learning_rate": 1e-05, "loss": 0.2706, "step": 8330 }, { "epoch": 0.02857867359316853, "grad_norm": 1.0063962936401367, "learning_rate": 1e-05, "loss": 0.2448, "step": 8340 }, { "epoch": 0.02861294058788456, "grad_norm": 1.0466179847717285, "learning_rate": 1e-05, "loss": 0.2452, "step": 8350 }, { "epoch": 0.02864720758260059, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 0.2595, "step": 8360 }, { "epoch": 0.02868147457731662, "grad_norm": 1.1461595296859741, "learning_rate": 1e-05, "loss": 0.2515, "step": 8370 }, { "epoch": 0.02871574157203265, "grad_norm": 1.2697845697402954, "learning_rate": 1e-05, "loss": 0.2641, "step": 8380 }, { "epoch": 0.02875000856674868, "grad_norm": 1.2665945291519165, "learning_rate": 1e-05, "loss": 0.2613, "step": 8390 }, { "epoch": 0.02878427556146471, "grad_norm": 1.1350281238555908, "learning_rate": 1e-05, "loss": 0.2524, "step": 8400 }, { "epoch": 0.02881854255618074, "grad_norm": 1.0341808795928955, "learning_rate": 1e-05, "loss": 0.2466, "step": 8410 }, { "epoch": 0.028852809550896766, "grad_norm": 1.1108484268188477, "learning_rate": 1e-05, "loss": 0.2471, "step": 8420 }, { "epoch": 0.028887076545612796, "grad_norm": 1.059414029121399, "learning_rate": 1e-05, "loss": 0.2695, "step": 8430 }, { "epoch": 0.028921343540328825, "grad_norm": 1.0888679027557373, "learning_rate": 1e-05, "loss": 0.2683, "step": 8440 }, { "epoch": 0.028955610535044855, "grad_norm": 1.1649068593978882, "learning_rate": 1e-05, "loss": 0.2485, "step": 8450 }, { "epoch": 0.028989877529760885, "grad_norm": 1.218563199043274, "learning_rate": 1e-05, "loss": 0.2456, "step": 8460 }, { "epoch": 0.029024144524476915, "grad_norm": 1.3558833599090576, "learning_rate": 1e-05, "loss": 0.2517, "step": 8470 }, { "epoch": 0.029058411519192945, "grad_norm": 1.2579597234725952, "learning_rate": 1e-05, "loss": 0.2516, "step": 8480 }, { "epoch": 0.029092678513908975, "grad_norm": 1.185253381729126, "learning_rate": 1e-05, "loss": 0.2475, "step": 8490 }, { "epoch": 0.029126945508625, "grad_norm": 1.1937752962112427, "learning_rate": 1e-05, "loss": 0.2654, "step": 8500 }, { "epoch": 0.029126945508625, "eval_cer": 12.89469042159111, "eval_loss": 0.2503049969673157, "eval_normalized_cer": 8.952837729816148, "eval_runtime": 229.0216, "eval_samples_per_second": 2.236, "eval_steps_per_second": 0.035, "step": 8500 }, { "epoch": 0.02916121250334103, "grad_norm": 1.1005933284759521, "learning_rate": 1e-05, "loss": 0.247, "step": 8510 }, { "epoch": 0.02919547949805706, "grad_norm": 1.0437865257263184, "learning_rate": 1e-05, "loss": 0.2492, "step": 8520 }, { "epoch": 0.02922974649277309, "grad_norm": 1.0478579998016357, "learning_rate": 1e-05, "loss": 0.2508, "step": 8530 }, { "epoch": 0.02926401348748912, "grad_norm": 1.0615030527114868, "learning_rate": 1e-05, "loss": 0.2305, "step": 8540 }, { "epoch": 0.02929828048220515, "grad_norm": 1.105209469795227, "learning_rate": 1e-05, "loss": 0.2572, "step": 8550 }, { "epoch": 0.02933254747692118, "grad_norm": 1.139857530593872, "learning_rate": 1e-05, "loss": 0.237, "step": 8560 }, { "epoch": 0.02936681447163721, "grad_norm": 1.0326822996139526, "learning_rate": 1e-05, "loss": 0.2587, "step": 8570 }, { "epoch": 0.029401081466353236, "grad_norm": 1.4446253776550293, "learning_rate": 1e-05, "loss": 0.2488, "step": 8580 }, { "epoch": 0.029435348461069266, "grad_norm": 1.070698857307434, "learning_rate": 1e-05, "loss": 0.236, "step": 8590 }, { "epoch": 0.029469615455785296, "grad_norm": 1.119545817375183, "learning_rate": 1e-05, "loss": 0.2406, "step": 8600 }, { "epoch": 0.029503882450501326, "grad_norm": 1.1146609783172607, "learning_rate": 1e-05, "loss": 0.2429, "step": 8610 }, { "epoch": 0.029538149445217356, "grad_norm": 1.107639193534851, "learning_rate": 1e-05, "loss": 0.2412, "step": 8620 }, { "epoch": 0.029572416439933386, "grad_norm": 1.0722100734710693, "learning_rate": 1e-05, "loss": 0.2444, "step": 8630 }, { "epoch": 0.029606683434649415, "grad_norm": 1.3313097953796387, "learning_rate": 1e-05, "loss": 0.2482, "step": 8640 }, { "epoch": 0.029640950429365445, "grad_norm": 1.13486647605896, "learning_rate": 1e-05, "loss": 0.2397, "step": 8650 }, { "epoch": 0.02967521742408147, "grad_norm": 1.1610273122787476, "learning_rate": 1e-05, "loss": 0.2594, "step": 8660 }, { "epoch": 0.0297094844187975, "grad_norm": 1.3555855751037598, "learning_rate": 1e-05, "loss": 0.2521, "step": 8670 }, { "epoch": 0.02974375141351353, "grad_norm": 1.2834869623184204, "learning_rate": 1e-05, "loss": 0.2638, "step": 8680 }, { "epoch": 0.02977801840822956, "grad_norm": 1.1000789403915405, "learning_rate": 1e-05, "loss": 0.2371, "step": 8690 }, { "epoch": 0.02981228540294559, "grad_norm": 0.9901896119117737, "learning_rate": 1e-05, "loss": 0.2535, "step": 8700 }, { "epoch": 0.02984655239766162, "grad_norm": 1.0514518022537231, "learning_rate": 1e-05, "loss": 0.2443, "step": 8710 }, { "epoch": 0.02988081939237765, "grad_norm": 1.1127166748046875, "learning_rate": 1e-05, "loss": 0.2259, "step": 8720 }, { "epoch": 0.02991508638709368, "grad_norm": 1.0674943923950195, "learning_rate": 1e-05, "loss": 0.2309, "step": 8730 }, { "epoch": 0.029949353381809707, "grad_norm": 1.0397884845733643, "learning_rate": 1e-05, "loss": 0.2411, "step": 8740 }, { "epoch": 0.029983620376525737, "grad_norm": 1.2052630186080933, "learning_rate": 1e-05, "loss": 0.2294, "step": 8750 }, { "epoch": 0.030017887371241767, "grad_norm": 1.1350561380386353, "learning_rate": 1e-05, "loss": 0.2264, "step": 8760 }, { "epoch": 0.030052154365957796, "grad_norm": 1.1187571287155151, "learning_rate": 1e-05, "loss": 0.2325, "step": 8770 }, { "epoch": 0.030086421360673826, "grad_norm": 1.0860145092010498, "learning_rate": 1e-05, "loss": 0.2328, "step": 8780 }, { "epoch": 0.030120688355389856, "grad_norm": 1.1102906465530396, "learning_rate": 1e-05, "loss": 0.2271, "step": 8790 }, { "epoch": 0.030154955350105886, "grad_norm": 1.0239520072937012, "learning_rate": 1e-05, "loss": 0.2489, "step": 8800 }, { "epoch": 0.030189222344821916, "grad_norm": 1.0980205535888672, "learning_rate": 1e-05, "loss": 0.2314, "step": 8810 }, { "epoch": 0.030223489339537942, "grad_norm": 1.1887843608856201, "learning_rate": 1e-05, "loss": 0.24, "step": 8820 }, { "epoch": 0.030257756334253972, "grad_norm": 1.2101106643676758, "learning_rate": 1e-05, "loss": 0.2353, "step": 8830 }, { "epoch": 0.030292023328970002, "grad_norm": 1.1793345212936401, "learning_rate": 1e-05, "loss": 0.2482, "step": 8840 }, { "epoch": 0.03032629032368603, "grad_norm": 1.1983866691589355, "learning_rate": 1e-05, "loss": 0.2441, "step": 8850 }, { "epoch": 0.03036055731840206, "grad_norm": 0.9888906478881836, "learning_rate": 1e-05, "loss": 0.2271, "step": 8860 }, { "epoch": 0.03039482431311809, "grad_norm": 1.217824101448059, "learning_rate": 1e-05, "loss": 0.2298, "step": 8870 }, { "epoch": 0.03042909130783412, "grad_norm": 0.9851268529891968, "learning_rate": 1e-05, "loss": 0.2318, "step": 8880 }, { "epoch": 0.03046335830255015, "grad_norm": 1.0329748392105103, "learning_rate": 1e-05, "loss": 0.2654, "step": 8890 }, { "epoch": 0.03049762529726618, "grad_norm": 1.067325234413147, "learning_rate": 1e-05, "loss": 0.23, "step": 8900 }, { "epoch": 0.030531892291982207, "grad_norm": 1.2636964321136475, "learning_rate": 1e-05, "loss": 0.2303, "step": 8910 }, { "epoch": 0.030566159286698237, "grad_norm": 1.1565788984298706, "learning_rate": 1e-05, "loss": 0.2494, "step": 8920 }, { "epoch": 0.030600426281414267, "grad_norm": 1.2197197675704956, "learning_rate": 1e-05, "loss": 0.2338, "step": 8930 }, { "epoch": 0.030634693276130297, "grad_norm": 1.1062088012695312, "learning_rate": 1e-05, "loss": 0.2157, "step": 8940 }, { "epoch": 0.030668960270846327, "grad_norm": 1.107677936553955, "learning_rate": 1e-05, "loss": 0.2387, "step": 8950 }, { "epoch": 0.030703227265562356, "grad_norm": 1.1791733503341675, "learning_rate": 1e-05, "loss": 0.2337, "step": 8960 }, { "epoch": 0.030737494260278386, "grad_norm": 1.1337239742279053, "learning_rate": 1e-05, "loss": 0.2127, "step": 8970 }, { "epoch": 0.030771761254994416, "grad_norm": 1.0418322086334229, "learning_rate": 1e-05, "loss": 0.222, "step": 8980 }, { "epoch": 0.030806028249710442, "grad_norm": 1.1591708660125732, "learning_rate": 1e-05, "loss": 0.2388, "step": 8990 }, { "epoch": 0.030840295244426472, "grad_norm": 1.0103886127471924, "learning_rate": 1e-05, "loss": 0.2264, "step": 9000 }, { "epoch": 0.030840295244426472, "eval_cer": 13.273946022226143, "eval_loss": 0.2514854073524475, "eval_normalized_cer": 9.492406075139888, "eval_runtime": 227.5807, "eval_samples_per_second": 2.25, "eval_steps_per_second": 0.035, "step": 9000 }, { "epoch": 0.030874562239142502, "grad_norm": 1.15034019947052, "learning_rate": 1e-05, "loss": 0.2215, "step": 9010 }, { "epoch": 0.030908829233858532, "grad_norm": 1.183698296546936, "learning_rate": 1e-05, "loss": 0.2235, "step": 9020 }, { "epoch": 0.030943096228574562, "grad_norm": 1.1930736303329468, "learning_rate": 1e-05, "loss": 0.25, "step": 9030 }, { "epoch": 0.03097736322329059, "grad_norm": 1.0650999546051025, "learning_rate": 1e-05, "loss": 0.224, "step": 9040 }, { "epoch": 0.03101163021800662, "grad_norm": 1.0613574981689453, "learning_rate": 1e-05, "loss": 0.2379, "step": 9050 }, { "epoch": 0.03104589721272265, "grad_norm": 1.3004292249679565, "learning_rate": 1e-05, "loss": 0.2286, "step": 9060 }, { "epoch": 0.031080164207438678, "grad_norm": 1.144573450088501, "learning_rate": 1e-05, "loss": 0.2375, "step": 9070 }, { "epoch": 0.031114431202154708, "grad_norm": 1.3552353382110596, "learning_rate": 1e-05, "loss": 0.216, "step": 9080 }, { "epoch": 0.031148698196870737, "grad_norm": 1.139901041984558, "learning_rate": 1e-05, "loss": 0.2377, "step": 9090 }, { "epoch": 0.031182965191586767, "grad_norm": 1.179685354232788, "learning_rate": 1e-05, "loss": 0.2389, "step": 9100 }, { "epoch": 0.031217232186302797, "grad_norm": 1.1494848728179932, "learning_rate": 1e-05, "loss": 0.2275, "step": 9110 }, { "epoch": 0.03125149918101883, "grad_norm": 1.0726871490478516, "learning_rate": 1e-05, "loss": 0.211, "step": 9120 }, { "epoch": 0.03128576617573486, "grad_norm": 1.254655361175537, "learning_rate": 1e-05, "loss": 0.2332, "step": 9130 }, { "epoch": 0.03132003317045089, "grad_norm": 1.1774569749832153, "learning_rate": 1e-05, "loss": 0.2441, "step": 9140 }, { "epoch": 0.031354300165166916, "grad_norm": 1.0810356140136719, "learning_rate": 1e-05, "loss": 0.2354, "step": 9150 }, { "epoch": 0.031388567159882946, "grad_norm": 1.0976495742797852, "learning_rate": 1e-05, "loss": 0.2347, "step": 9160 }, { "epoch": 0.031422834154598976, "grad_norm": 1.2417839765548706, "learning_rate": 1e-05, "loss": 0.2261, "step": 9170 }, { "epoch": 0.031457101149315006, "grad_norm": 1.0022953748703003, "learning_rate": 1e-05, "loss": 0.2277, "step": 9180 }, { "epoch": 0.03149136814403103, "grad_norm": 1.1461567878723145, "learning_rate": 1e-05, "loss": 0.218, "step": 9190 }, { "epoch": 0.03152563513874706, "grad_norm": 1.1877334117889404, "learning_rate": 1e-05, "loss": 0.2195, "step": 9200 }, { "epoch": 0.03155990213346309, "grad_norm": 1.1513786315917969, "learning_rate": 1e-05, "loss": 0.2354, "step": 9210 }, { "epoch": 0.03159416912817912, "grad_norm": 1.057938814163208, "learning_rate": 1e-05, "loss": 0.2428, "step": 9220 }, { "epoch": 0.03162843612289515, "grad_norm": 1.0752719640731812, "learning_rate": 1e-05, "loss": 0.2499, "step": 9230 }, { "epoch": 0.03166270311761118, "grad_norm": 1.1784312725067139, "learning_rate": 1e-05, "loss": 0.2556, "step": 9240 }, { "epoch": 0.03169697011232721, "grad_norm": 1.0176231861114502, "learning_rate": 1e-05, "loss": 0.2552, "step": 9250 }, { "epoch": 0.03173123710704324, "grad_norm": 1.0849392414093018, "learning_rate": 1e-05, "loss": 0.2483, "step": 9260 }, { "epoch": 0.03176550410175927, "grad_norm": 1.0042351484298706, "learning_rate": 1e-05, "loss": 0.2472, "step": 9270 }, { "epoch": 0.0317997710964753, "grad_norm": 1.0480408668518066, "learning_rate": 1e-05, "loss": 0.238, "step": 9280 }, { "epoch": 0.03183403809119133, "grad_norm": 1.0602933168411255, "learning_rate": 1e-05, "loss": 0.2657, "step": 9290 }, { "epoch": 0.03186830508590736, "grad_norm": 1.1704037189483643, "learning_rate": 1e-05, "loss": 0.2634, "step": 9300 }, { "epoch": 0.03190257208062339, "grad_norm": 1.2454304695129395, "learning_rate": 1e-05, "loss": 0.2648, "step": 9310 }, { "epoch": 0.03193683907533942, "grad_norm": 1.0540211200714111, "learning_rate": 1e-05, "loss": 0.2462, "step": 9320 }, { "epoch": 0.03197110607005545, "grad_norm": 1.1440715789794922, "learning_rate": 1e-05, "loss": 0.2609, "step": 9330 }, { "epoch": 0.03200537306477148, "grad_norm": 1.0083932876586914, "learning_rate": 1e-05, "loss": 0.2422, "step": 9340 }, { "epoch": 0.0320396400594875, "grad_norm": 1.0180490016937256, "learning_rate": 1e-05, "loss": 0.2402, "step": 9350 }, { "epoch": 0.03207390705420353, "grad_norm": 1.1158274412155151, "learning_rate": 1e-05, "loss": 0.2378, "step": 9360 }, { "epoch": 0.03210817404891956, "grad_norm": 1.2014826536178589, "learning_rate": 1e-05, "loss": 0.2433, "step": 9370 }, { "epoch": 0.03214244104363559, "grad_norm": 1.1604617834091187, "learning_rate": 1e-05, "loss": 0.2396, "step": 9380 }, { "epoch": 0.03217670803835162, "grad_norm": 1.1088517904281616, "learning_rate": 1e-05, "loss": 0.2364, "step": 9390 }, { "epoch": 0.03221097503306765, "grad_norm": 1.1905596256256104, "learning_rate": 1e-05, "loss": 0.2335, "step": 9400 }, { "epoch": 0.03224524202778368, "grad_norm": 1.2237385511398315, "learning_rate": 1e-05, "loss": 0.2522, "step": 9410 }, { "epoch": 0.03227950902249971, "grad_norm": 1.0582191944122314, "learning_rate": 1e-05, "loss": 0.2467, "step": 9420 }, { "epoch": 0.03231377601721574, "grad_norm": 1.2065699100494385, "learning_rate": 1e-05, "loss": 0.2359, "step": 9430 }, { "epoch": 0.03234804301193177, "grad_norm": 1.1399251222610474, "learning_rate": 1e-05, "loss": 0.2507, "step": 9440 }, { "epoch": 0.0323823100066478, "grad_norm": 1.070038914680481, "learning_rate": 1e-05, "loss": 0.2732, "step": 9450 }, { "epoch": 0.03241657700136383, "grad_norm": 1.0940920114517212, "learning_rate": 1e-05, "loss": 0.2609, "step": 9460 }, { "epoch": 0.03245084399607986, "grad_norm": 1.2042659521102905, "learning_rate": 1e-05, "loss": 0.2511, "step": 9470 }, { "epoch": 0.03248511099079589, "grad_norm": 1.0865731239318848, "learning_rate": 1e-05, "loss": 0.2693, "step": 9480 }, { "epoch": 0.03251937798551192, "grad_norm": 1.0884779691696167, "learning_rate": 1e-05, "loss": 0.2303, "step": 9490 }, { "epoch": 0.03255364498022795, "grad_norm": 1.0557243824005127, "learning_rate": 1e-05, "loss": 0.2611, "step": 9500 }, { "epoch": 0.03255364498022795, "eval_cer": 13.300405715293703, "eval_loss": 0.2491595298051834, "eval_normalized_cer": 9.722222222222223, "eval_runtime": 227.5398, "eval_samples_per_second": 2.25, "eval_steps_per_second": 0.035, "step": 9500 }, { "epoch": 0.03258791197494398, "grad_norm": 1.1241854429244995, "learning_rate": 1e-05, "loss": 0.2531, "step": 9510 }, { "epoch": 0.03262217896966, "grad_norm": 1.091977834701538, "learning_rate": 1e-05, "loss": 0.2437, "step": 9520 }, { "epoch": 0.03265644596437603, "grad_norm": 1.0969996452331543, "learning_rate": 1e-05, "loss": 0.2595, "step": 9530 }, { "epoch": 0.03269071295909206, "grad_norm": 1.1597386598587036, "learning_rate": 1e-05, "loss": 0.246, "step": 9540 }, { "epoch": 0.03272497995380809, "grad_norm": 1.0741667747497559, "learning_rate": 1e-05, "loss": 0.2435, "step": 9550 }, { "epoch": 0.03275924694852412, "grad_norm": 1.1600459814071655, "learning_rate": 1e-05, "loss": 0.2476, "step": 9560 }, { "epoch": 0.03279351394324015, "grad_norm": 1.0636577606201172, "learning_rate": 1e-05, "loss": 0.2323, "step": 9570 }, { "epoch": 0.03282778093795618, "grad_norm": 1.0010998249053955, "learning_rate": 1e-05, "loss": 0.2436, "step": 9580 }, { "epoch": 0.03286204793267221, "grad_norm": 1.1286782026290894, "learning_rate": 1e-05, "loss": 0.2585, "step": 9590 }, { "epoch": 0.03289631492738824, "grad_norm": 1.1575970649719238, "learning_rate": 1e-05, "loss": 0.2529, "step": 9600 }, { "epoch": 0.03293058192210427, "grad_norm": 1.2235829830169678, "learning_rate": 1e-05, "loss": 0.2617, "step": 9610 }, { "epoch": 0.0329648489168203, "grad_norm": 1.203371286392212, "learning_rate": 1e-05, "loss": 0.2299, "step": 9620 }, { "epoch": 0.03299911591153633, "grad_norm": 1.2438814640045166, "learning_rate": 1e-05, "loss": 0.2544, "step": 9630 }, { "epoch": 0.03303338290625236, "grad_norm": 1.2102582454681396, "learning_rate": 1e-05, "loss": 0.2415, "step": 9640 }, { "epoch": 0.03306764990096839, "grad_norm": 0.9984006285667419, "learning_rate": 1e-05, "loss": 0.2633, "step": 9650 }, { "epoch": 0.03310191689568442, "grad_norm": 1.0693376064300537, "learning_rate": 1e-05, "loss": 0.2394, "step": 9660 }, { "epoch": 0.03313618389040045, "grad_norm": 1.2770649194717407, "learning_rate": 1e-05, "loss": 0.2577, "step": 9670 }, { "epoch": 0.03317045088511647, "grad_norm": 1.3751499652862549, "learning_rate": 1e-05, "loss": 0.2595, "step": 9680 }, { "epoch": 0.0332047178798325, "grad_norm": 1.247056484222412, "learning_rate": 1e-05, "loss": 0.2526, "step": 9690 }, { "epoch": 0.03323898487454853, "grad_norm": 1.2248870134353638, "learning_rate": 1e-05, "loss": 0.2609, "step": 9700 }, { "epoch": 0.03327325186926456, "grad_norm": 1.0467997789382935, "learning_rate": 1e-05, "loss": 0.2353, "step": 9710 }, { "epoch": 0.03330751886398059, "grad_norm": 1.2705328464508057, "learning_rate": 1e-05, "loss": 0.2483, "step": 9720 }, { "epoch": 0.03334178585869662, "grad_norm": 1.0360983610153198, "learning_rate": 1e-05, "loss": 0.2497, "step": 9730 }, { "epoch": 0.03337605285341265, "grad_norm": 1.1409697532653809, "learning_rate": 1e-05, "loss": 0.2506, "step": 9740 }, { "epoch": 0.03341031984812868, "grad_norm": 1.0656344890594482, "learning_rate": 1e-05, "loss": 0.252, "step": 9750 }, { "epoch": 0.03344458684284471, "grad_norm": 1.1420108079910278, "learning_rate": 1e-05, "loss": 0.2383, "step": 9760 }, { "epoch": 0.03347885383756074, "grad_norm": 1.0792863368988037, "learning_rate": 1e-05, "loss": 0.2461, "step": 9770 }, { "epoch": 0.03351312083227677, "grad_norm": 1.1297261714935303, "learning_rate": 1e-05, "loss": 0.2501, "step": 9780 }, { "epoch": 0.0335473878269928, "grad_norm": 1.2627495527267456, "learning_rate": 1e-05, "loss": 0.2452, "step": 9790 }, { "epoch": 0.03358165482170883, "grad_norm": 0.982812762260437, "learning_rate": 1e-05, "loss": 0.2443, "step": 9800 }, { "epoch": 0.03361592181642486, "grad_norm": 1.048766016960144, "learning_rate": 1e-05, "loss": 0.2531, "step": 9810 }, { "epoch": 0.03365018881114089, "grad_norm": 1.141780972480774, "learning_rate": 1e-05, "loss": 0.2271, "step": 9820 }, { "epoch": 0.03368445580585692, "grad_norm": 1.101762056350708, "learning_rate": 1e-05, "loss": 0.2338, "step": 9830 }, { "epoch": 0.03371872280057294, "grad_norm": 1.0150196552276611, "learning_rate": 1e-05, "loss": 0.2404, "step": 9840 }, { "epoch": 0.03375298979528897, "grad_norm": 1.1550086736679077, "learning_rate": 1e-05, "loss": 0.2479, "step": 9850 }, { "epoch": 0.033787256790005, "grad_norm": 1.1246519088745117, "learning_rate": 1e-05, "loss": 0.2525, "step": 9860 }, { "epoch": 0.03382152378472103, "grad_norm": 0.9673643708229065, "learning_rate": 1e-05, "loss": 0.2453, "step": 9870 }, { "epoch": 0.03385579077943706, "grad_norm": 1.019649863243103, "learning_rate": 1e-05, "loss": 0.2407, "step": 9880 }, { "epoch": 0.03389005777415309, "grad_norm": 1.1477577686309814, "learning_rate": 1e-05, "loss": 0.2374, "step": 9890 }, { "epoch": 0.03392432476886912, "grad_norm": 0.9760174751281738, "learning_rate": 1e-05, "loss": 0.248, "step": 9900 }, { "epoch": 0.03395859176358515, "grad_norm": 1.1654585599899292, "learning_rate": 1e-05, "loss": 0.253, "step": 9910 }, { "epoch": 0.03399285875830118, "grad_norm": 1.0818895101547241, "learning_rate": 1e-05, "loss": 0.2397, "step": 9920 }, { "epoch": 0.03402712575301721, "grad_norm": 1.1635690927505493, "learning_rate": 1e-05, "loss": 0.2539, "step": 9930 }, { "epoch": 0.03406139274773324, "grad_norm": 1.0819408893585205, "learning_rate": 1e-05, "loss": 0.2352, "step": 9940 }, { "epoch": 0.03409565974244927, "grad_norm": 1.0151749849319458, "learning_rate": 1e-05, "loss": 0.2378, "step": 9950 }, { "epoch": 0.0341299267371653, "grad_norm": 1.092203140258789, "learning_rate": 1e-05, "loss": 0.2406, "step": 9960 }, { "epoch": 0.03416419373188133, "grad_norm": 1.1004047393798828, "learning_rate": 1e-05, "loss": 0.2375, "step": 9970 }, { "epoch": 0.03419846072659736, "grad_norm": 1.2596560716629028, "learning_rate": 1e-05, "loss": 0.2601, "step": 9980 }, { "epoch": 0.03423272772131339, "grad_norm": 1.0936402082443237, "learning_rate": 1e-05, "loss": 0.2451, "step": 9990 }, { "epoch": 0.03426699471602941, "grad_norm": 1.1660488843917847, "learning_rate": 1e-05, "loss": 0.2523, "step": 10000 }, { "epoch": 0.03426699471602941, "eval_cer": 12.656553183983066, "eval_loss": 0.24734708666801453, "eval_normalized_cer": 8.912869704236611, "eval_runtime": 226.9924, "eval_samples_per_second": 2.256, "eval_steps_per_second": 0.035, "step": 10000 }, { "epoch": 0.03430126171074544, "grad_norm": 1.0807442665100098, "learning_rate": 1e-05, "loss": 0.2583, "step": 10010 }, { "epoch": 0.03433552870546147, "grad_norm": 1.17780339717865, "learning_rate": 1e-05, "loss": 0.2685, "step": 10020 }, { "epoch": 0.0343697957001775, "grad_norm": 0.98011314868927, "learning_rate": 1e-05, "loss": 0.2445, "step": 10030 }, { "epoch": 0.03440406269489353, "grad_norm": 1.3025845289230347, "learning_rate": 1e-05, "loss": 0.2526, "step": 10040 }, { "epoch": 0.03443832968960956, "grad_norm": 1.4656189680099487, "learning_rate": 1e-05, "loss": 0.2618, "step": 10050 }, { "epoch": 0.03447259668432559, "grad_norm": 1.2372117042541504, "learning_rate": 1e-05, "loss": 0.2545, "step": 10060 }, { "epoch": 0.03450686367904162, "grad_norm": 1.1028844118118286, "learning_rate": 1e-05, "loss": 0.2511, "step": 10070 }, { "epoch": 0.03454113067375765, "grad_norm": 1.2402809858322144, "learning_rate": 1e-05, "loss": 0.2507, "step": 10080 }, { "epoch": 0.03457539766847368, "grad_norm": 1.1127125024795532, "learning_rate": 1e-05, "loss": 0.2634, "step": 10090 }, { "epoch": 0.03460966466318971, "grad_norm": 1.1486737728118896, "learning_rate": 1e-05, "loss": 0.2495, "step": 10100 }, { "epoch": 0.03464393165790574, "grad_norm": 1.0792872905731201, "learning_rate": 1e-05, "loss": 0.2347, "step": 10110 }, { "epoch": 0.03467819865262177, "grad_norm": 1.0346540212631226, "learning_rate": 1e-05, "loss": 0.2468, "step": 10120 }, { "epoch": 0.0347124656473378, "grad_norm": 1.165614128112793, "learning_rate": 1e-05, "loss": 0.2419, "step": 10130 }, { "epoch": 0.03474673264205383, "grad_norm": 1.1619865894317627, "learning_rate": 1e-05, "loss": 0.2428, "step": 10140 }, { "epoch": 0.03478099963676986, "grad_norm": 1.2419089078903198, "learning_rate": 1e-05, "loss": 0.2652, "step": 10150 }, { "epoch": 0.03481526663148589, "grad_norm": 1.0978246927261353, "learning_rate": 1e-05, "loss": 0.2396, "step": 10160 }, { "epoch": 0.03484953362620191, "grad_norm": 0.9916633367538452, "learning_rate": 1e-05, "loss": 0.2603, "step": 10170 }, { "epoch": 0.03488380062091794, "grad_norm": 1.026318907737732, "learning_rate": 1e-05, "loss": 0.2564, "step": 10180 }, { "epoch": 0.03491806761563397, "grad_norm": 1.1151725053787231, "learning_rate": 1e-05, "loss": 0.2587, "step": 10190 }, { "epoch": 0.03495233461035, "grad_norm": 1.1420174837112427, "learning_rate": 1e-05, "loss": 0.2695, "step": 10200 }, { "epoch": 0.03498660160506603, "grad_norm": 1.0311987400054932, "learning_rate": 1e-05, "loss": 0.2574, "step": 10210 }, { "epoch": 0.03502086859978206, "grad_norm": 0.9759404063224792, "learning_rate": 1e-05, "loss": 0.2398, "step": 10220 }, { "epoch": 0.03505513559449809, "grad_norm": 1.5086033344268799, "learning_rate": 1e-05, "loss": 0.2589, "step": 10230 }, { "epoch": 0.03508940258921412, "grad_norm": 1.167893409729004, "learning_rate": 1e-05, "loss": 0.2536, "step": 10240 }, { "epoch": 0.03512366958393015, "grad_norm": 1.0426411628723145, "learning_rate": 1e-05, "loss": 0.237, "step": 10250 }, { "epoch": 0.03515793657864618, "grad_norm": 1.4766713380813599, "learning_rate": 1e-05, "loss": 0.2363, "step": 10260 }, { "epoch": 0.03519220357336221, "grad_norm": 1.019641399383545, "learning_rate": 1e-05, "loss": 0.2281, "step": 10270 }, { "epoch": 0.03522647056807824, "grad_norm": 1.1627217531204224, "learning_rate": 1e-05, "loss": 0.2524, "step": 10280 }, { "epoch": 0.03526073756279427, "grad_norm": 1.165414571762085, "learning_rate": 1e-05, "loss": 0.2437, "step": 10290 }, { "epoch": 0.0352950045575103, "grad_norm": 1.1816645860671997, "learning_rate": 1e-05, "loss": 0.2471, "step": 10300 }, { "epoch": 0.03532927155222633, "grad_norm": 1.0116764307022095, "learning_rate": 1e-05, "loss": 0.226, "step": 10310 }, { "epoch": 0.03536353854694236, "grad_norm": 1.2257869243621826, "learning_rate": 1e-05, "loss": 0.2441, "step": 10320 }, { "epoch": 0.03539780554165838, "grad_norm": 1.1971989870071411, "learning_rate": 1e-05, "loss": 0.2268, "step": 10330 }, { "epoch": 0.03543207253637441, "grad_norm": 1.057354211807251, "learning_rate": 1e-05, "loss": 0.2142, "step": 10340 }, { "epoch": 0.03546633953109044, "grad_norm": 1.2593644857406616, "learning_rate": 1e-05, "loss": 0.2149, "step": 10350 }, { "epoch": 0.03550060652580647, "grad_norm": 1.0903648138046265, "learning_rate": 1e-05, "loss": 0.2494, "step": 10360 }, { "epoch": 0.0355348735205225, "grad_norm": 1.2079240083694458, "learning_rate": 1e-05, "loss": 0.2273, "step": 10370 }, { "epoch": 0.03556914051523853, "grad_norm": 1.206696629524231, "learning_rate": 1e-05, "loss": 0.2142, "step": 10380 }, { "epoch": 0.03560340750995456, "grad_norm": 1.2176302671432495, "learning_rate": 1e-05, "loss": 0.2277, "step": 10390 }, { "epoch": 0.03563767450467059, "grad_norm": 0.9780252575874329, "learning_rate": 1e-05, "loss": 0.2089, "step": 10400 }, { "epoch": 0.03567194149938662, "grad_norm": 1.1160544157028198, "learning_rate": 1e-05, "loss": 0.2268, "step": 10410 }, { "epoch": 0.03570620849410265, "grad_norm": 1.1231842041015625, "learning_rate": 1e-05, "loss": 0.2385, "step": 10420 }, { "epoch": 0.03574047548881868, "grad_norm": 1.142675518989563, "learning_rate": 1e-05, "loss": 0.2386, "step": 10430 }, { "epoch": 0.03577474248353471, "grad_norm": 1.1563806533813477, "learning_rate": 1e-05, "loss": 0.2217, "step": 10440 }, { "epoch": 0.03580900947825074, "grad_norm": 1.0655251741409302, "learning_rate": 1e-05, "loss": 0.2207, "step": 10450 }, { "epoch": 0.03584327647296677, "grad_norm": 1.1469309329986572, "learning_rate": 1e-05, "loss": 0.2211, "step": 10460 }, { "epoch": 0.0358775434676828, "grad_norm": 1.182896375656128, "learning_rate": 1e-05, "loss": 0.2316, "step": 10470 }, { "epoch": 0.03591181046239883, "grad_norm": 1.018953800201416, "learning_rate": 1e-05, "loss": 0.2308, "step": 10480 }, { "epoch": 0.03594607745711485, "grad_norm": 1.1578072309494019, "learning_rate": 1e-05, "loss": 0.2272, "step": 10490 }, { "epoch": 0.03598034445183088, "grad_norm": 1.1483505964279175, "learning_rate": 1e-05, "loss": 0.2469, "step": 10500 }, { "epoch": 0.03598034445183088, "eval_cer": 13.026988886928912, "eval_loss": 0.24956555664539337, "eval_normalized_cer": 9.442446043165468, "eval_runtime": 227.6026, "eval_samples_per_second": 2.25, "eval_steps_per_second": 0.035, "step": 10500 }, { "epoch": 0.03601461144654691, "grad_norm": 1.1269420385360718, "learning_rate": 1e-05, "loss": 0.2482, "step": 10510 }, { "epoch": 0.03604887844126294, "grad_norm": 1.0399614572525024, "learning_rate": 1e-05, "loss": 0.2473, "step": 10520 }, { "epoch": 0.03608314543597897, "grad_norm": 1.11722731590271, "learning_rate": 1e-05, "loss": 0.2566, "step": 10530 }, { "epoch": 0.036117412430695, "grad_norm": 1.0773251056671143, "learning_rate": 1e-05, "loss": 0.2479, "step": 10540 }, { "epoch": 0.03615167942541103, "grad_norm": 1.0123059749603271, "learning_rate": 1e-05, "loss": 0.2527, "step": 10550 }, { "epoch": 0.03618594642012706, "grad_norm": 1.14670991897583, "learning_rate": 1e-05, "loss": 0.2474, "step": 10560 }, { "epoch": 0.03622021341484309, "grad_norm": 1.021543025970459, "learning_rate": 1e-05, "loss": 0.2314, "step": 10570 }, { "epoch": 0.03625448040955912, "grad_norm": 1.1485329866409302, "learning_rate": 1e-05, "loss": 0.2603, "step": 10580 }, { "epoch": 0.03628874740427515, "grad_norm": 1.239241600036621, "learning_rate": 1e-05, "loss": 0.2479, "step": 10590 }, { "epoch": 0.03632301439899118, "grad_norm": 1.2692322731018066, "learning_rate": 1e-05, "loss": 0.2411, "step": 10600 }, { "epoch": 0.03635728139370721, "grad_norm": 1.1440175771713257, "learning_rate": 1e-05, "loss": 0.256, "step": 10610 }, { "epoch": 0.03639154838842324, "grad_norm": 1.0153664350509644, "learning_rate": 1e-05, "loss": 0.2468, "step": 10620 }, { "epoch": 0.03642581538313927, "grad_norm": 1.0046017169952393, "learning_rate": 1e-05, "loss": 0.2463, "step": 10630 }, { "epoch": 0.0364600823778553, "grad_norm": 1.023366928100586, "learning_rate": 1e-05, "loss": 0.2499, "step": 10640 }, { "epoch": 0.03649434937257132, "grad_norm": 1.1663336753845215, "learning_rate": 1e-05, "loss": 0.2476, "step": 10650 }, { "epoch": 0.03652861636728735, "grad_norm": 1.2302120923995972, "learning_rate": 1e-05, "loss": 0.2469, "step": 10660 }, { "epoch": 0.03656288336200338, "grad_norm": 1.0369274616241455, "learning_rate": 1e-05, "loss": 0.2468, "step": 10670 }, { "epoch": 0.03659715035671941, "grad_norm": 1.3913209438323975, "learning_rate": 1e-05, "loss": 0.2499, "step": 10680 }, { "epoch": 0.03663141735143544, "grad_norm": 1.0193636417388916, "learning_rate": 1e-05, "loss": 0.25, "step": 10690 }, { "epoch": 0.03666568434615147, "grad_norm": 1.2572286128997803, "learning_rate": 1e-05, "loss": 0.2441, "step": 10700 }, { "epoch": 0.0366999513408675, "grad_norm": 1.1679438352584839, "learning_rate": 1e-05, "loss": 0.2462, "step": 10710 }, { "epoch": 0.03673421833558353, "grad_norm": 1.1959030628204346, "learning_rate": 1e-05, "loss": 0.2641, "step": 10720 }, { "epoch": 0.03676848533029956, "grad_norm": 1.1327241659164429, "learning_rate": 1e-05, "loss": 0.2538, "step": 10730 }, { "epoch": 0.03680275232501559, "grad_norm": 1.0999104976654053, "learning_rate": 1e-05, "loss": 0.2492, "step": 10740 }, { "epoch": 0.03683701931973162, "grad_norm": 1.1578527688980103, "learning_rate": 1e-05, "loss": 0.2506, "step": 10750 }, { "epoch": 0.03687128631444765, "grad_norm": 1.243034839630127, "learning_rate": 1e-05, "loss": 0.2404, "step": 10760 }, { "epoch": 0.03690555330916368, "grad_norm": 1.1157968044281006, "learning_rate": 1e-05, "loss": 0.2597, "step": 10770 }, { "epoch": 0.03693982030387971, "grad_norm": 1.0121145248413086, "learning_rate": 1e-05, "loss": 0.2457, "step": 10780 }, { "epoch": 0.03697408729859574, "grad_norm": 1.1467009782791138, "learning_rate": 1e-05, "loss": 0.2714, "step": 10790 }, { "epoch": 0.03700835429331177, "grad_norm": 1.1445188522338867, "learning_rate": 1e-05, "loss": 0.2652, "step": 10800 }, { "epoch": 0.0370426212880278, "grad_norm": 0.9861304759979248, "learning_rate": 1e-05, "loss": 0.2408, "step": 10810 }, { "epoch": 0.037076888282743824, "grad_norm": 1.1524399518966675, "learning_rate": 1e-05, "loss": 0.2645, "step": 10820 }, { "epoch": 0.037111155277459854, "grad_norm": 1.4263722896575928, "learning_rate": 1e-05, "loss": 0.2364, "step": 10830 }, { "epoch": 0.03714542227217588, "grad_norm": 1.057739019393921, "learning_rate": 1e-05, "loss": 0.2526, "step": 10840 }, { "epoch": 0.03717968926689191, "grad_norm": 1.232234239578247, "learning_rate": 1e-05, "loss": 0.2605, "step": 10850 }, { "epoch": 0.03721395626160794, "grad_norm": 1.2277271747589111, "learning_rate": 1e-05, "loss": 0.2646, "step": 10860 }, { "epoch": 0.03724822325632397, "grad_norm": 1.1447609663009644, "learning_rate": 1e-05, "loss": 0.2508, "step": 10870 }, { "epoch": 0.03728249025104, "grad_norm": 1.5821764469146729, "learning_rate": 1e-05, "loss": 0.259, "step": 10880 }, { "epoch": 0.03731675724575603, "grad_norm": 1.0948309898376465, "learning_rate": 1e-05, "loss": 0.2502, "step": 10890 }, { "epoch": 0.03735102424047206, "grad_norm": 1.0639638900756836, "learning_rate": 1e-05, "loss": 0.2796, "step": 10900 }, { "epoch": 0.03738529123518809, "grad_norm": 1.0613322257995605, "learning_rate": 1e-05, "loss": 0.2606, "step": 10910 }, { "epoch": 0.03741955822990412, "grad_norm": 1.1825395822525024, "learning_rate": 1e-05, "loss": 0.2382, "step": 10920 }, { "epoch": 0.03745382522462015, "grad_norm": 1.1198760271072388, "learning_rate": 1e-05, "loss": 0.2601, "step": 10930 }, { "epoch": 0.03748809221933618, "grad_norm": 0.9922842383384705, "learning_rate": 1e-05, "loss": 0.2567, "step": 10940 }, { "epoch": 0.03752235921405221, "grad_norm": 1.0075231790542603, "learning_rate": 1e-05, "loss": 0.2373, "step": 10950 }, { "epoch": 0.03755662620876824, "grad_norm": 1.2739824056625366, "learning_rate": 1e-05, "loss": 0.2571, "step": 10960 }, { "epoch": 0.03759089320348427, "grad_norm": 1.049249529838562, "learning_rate": 1e-05, "loss": 0.2505, "step": 10970 }, { "epoch": 0.037625160198200294, "grad_norm": 1.0130621194839478, "learning_rate": 1e-05, "loss": 0.246, "step": 10980 }, { "epoch": 0.037659427192916324, "grad_norm": 1.143740177154541, "learning_rate": 1e-05, "loss": 0.2584, "step": 10990 }, { "epoch": 0.037693694187632354, "grad_norm": 1.1482734680175781, "learning_rate": 1e-05, "loss": 0.245, "step": 11000 }, { "epoch": 0.037693694187632354, "eval_cer": 13.150467454577527, "eval_loss": 0.24854739010334015, "eval_normalized_cer": 9.492406075139888, "eval_runtime": 227.9, "eval_samples_per_second": 2.247, "eval_steps_per_second": 0.035, "step": 11000 }, { "epoch": 0.037727961182348384, "grad_norm": 1.175059199333191, "learning_rate": 1e-05, "loss": 0.235, "step": 11010 }, { "epoch": 0.037762228177064414, "grad_norm": 1.06391441822052, "learning_rate": 1e-05, "loss": 0.2548, "step": 11020 }, { "epoch": 0.03779649517178044, "grad_norm": 1.1072652339935303, "learning_rate": 1e-05, "loss": 0.2563, "step": 11030 }, { "epoch": 0.03783076216649647, "grad_norm": 1.0364381074905396, "learning_rate": 1e-05, "loss": 0.2353, "step": 11040 }, { "epoch": 0.0378650291612125, "grad_norm": 1.120888113975525, "learning_rate": 1e-05, "loss": 0.2564, "step": 11050 }, { "epoch": 0.03789929615592853, "grad_norm": 1.1769522428512573, "learning_rate": 1e-05, "loss": 0.2485, "step": 11060 }, { "epoch": 0.03793356315064456, "grad_norm": 1.103209137916565, "learning_rate": 1e-05, "loss": 0.2579, "step": 11070 }, { "epoch": 0.03796783014536059, "grad_norm": 1.191256046295166, "learning_rate": 1e-05, "loss": 0.258, "step": 11080 }, { "epoch": 0.03800209714007662, "grad_norm": 1.103756070137024, "learning_rate": 1e-05, "loss": 0.2542, "step": 11090 }, { "epoch": 0.03803636413479265, "grad_norm": 1.2097468376159668, "learning_rate": 1e-05, "loss": 0.2581, "step": 11100 }, { "epoch": 0.03807063112950868, "grad_norm": 1.0787367820739746, "learning_rate": 1e-05, "loss": 0.2428, "step": 11110 }, { "epoch": 0.03810489812422471, "grad_norm": 1.0501831769943237, "learning_rate": 1e-05, "loss": 0.2585, "step": 11120 }, { "epoch": 0.03813916511894074, "grad_norm": 1.1210603713989258, "learning_rate": 1e-05, "loss": 0.2435, "step": 11130 }, { "epoch": 0.038173432113656765, "grad_norm": 1.0846295356750488, "learning_rate": 1e-05, "loss": 0.2525, "step": 11140 }, { "epoch": 0.038207699108372795, "grad_norm": 1.176488995552063, "learning_rate": 1e-05, "loss": 0.2499, "step": 11150 }, { "epoch": 0.038241966103088824, "grad_norm": 1.034157395362854, "learning_rate": 1e-05, "loss": 0.2701, "step": 11160 }, { "epoch": 0.038276233097804854, "grad_norm": 1.0763425827026367, "learning_rate": 1e-05, "loss": 0.2327, "step": 11170 }, { "epoch": 0.038310500092520884, "grad_norm": 1.010324478149414, "learning_rate": 1e-05, "loss": 0.2506, "step": 11180 }, { "epoch": 0.038344767087236914, "grad_norm": 1.0999796390533447, "learning_rate": 1e-05, "loss": 0.2482, "step": 11190 }, { "epoch": 0.038379034081952944, "grad_norm": 1.2314294576644897, "learning_rate": 1e-05, "loss": 0.2413, "step": 11200 }, { "epoch": 0.038413301076668974, "grad_norm": 1.109063982963562, "learning_rate": 1e-05, "loss": 0.2433, "step": 11210 }, { "epoch": 0.038447568071385003, "grad_norm": 1.0760470628738403, "learning_rate": 1e-05, "loss": 0.2572, "step": 11220 }, { "epoch": 0.03848183506610103, "grad_norm": 1.2139952182769775, "learning_rate": 1e-05, "loss": 0.2542, "step": 11230 }, { "epoch": 0.03851610206081706, "grad_norm": 1.0825960636138916, "learning_rate": 1e-05, "loss": 0.2446, "step": 11240 }, { "epoch": 0.03855036905553309, "grad_norm": 1.1650110483169556, "learning_rate": 1e-05, "loss": 0.2486, "step": 11250 }, { "epoch": 0.03858463605024912, "grad_norm": 1.074236512184143, "learning_rate": 1e-05, "loss": 0.2623, "step": 11260 }, { "epoch": 0.03861890304496515, "grad_norm": 1.0651731491088867, "learning_rate": 1e-05, "loss": 0.2409, "step": 11270 }, { "epoch": 0.03865317003968118, "grad_norm": 1.0689282417297363, "learning_rate": 1e-05, "loss": 0.2329, "step": 11280 }, { "epoch": 0.03868743703439721, "grad_norm": 1.1548572778701782, "learning_rate": 1e-05, "loss": 0.2572, "step": 11290 }, { "epoch": 0.038721704029113235, "grad_norm": 1.1205992698669434, "learning_rate": 1e-05, "loss": 0.2424, "step": 11300 }, { "epoch": 0.038755971023829265, "grad_norm": 1.0806999206542969, "learning_rate": 1e-05, "loss": 0.2336, "step": 11310 }, { "epoch": 0.038790238018545295, "grad_norm": 1.0330880880355835, "learning_rate": 1e-05, "loss": 0.2371, "step": 11320 }, { "epoch": 0.038824505013261325, "grad_norm": 1.2580816745758057, "learning_rate": 1e-05, "loss": 0.2297, "step": 11330 }, { "epoch": 0.038858772007977355, "grad_norm": 1.3312656879425049, "learning_rate": 1e-05, "loss": 0.2478, "step": 11340 }, { "epoch": 0.038893039002693384, "grad_norm": 1.0043836832046509, "learning_rate": 1e-05, "loss": 0.2388, "step": 11350 }, { "epoch": 0.038927305997409414, "grad_norm": 1.0665231943130493, "learning_rate": 1e-05, "loss": 0.234, "step": 11360 }, { "epoch": 0.038961572992125444, "grad_norm": 1.1114041805267334, "learning_rate": 1e-05, "loss": 0.2407, "step": 11370 }, { "epoch": 0.038995839986841474, "grad_norm": 1.043134093284607, "learning_rate": 1e-05, "loss": 0.2352, "step": 11380 }, { "epoch": 0.039030106981557504, "grad_norm": 1.1435351371765137, "learning_rate": 1e-05, "loss": 0.2347, "step": 11390 }, { "epoch": 0.039064373976273534, "grad_norm": 1.2625036239624023, "learning_rate": 1e-05, "loss": 0.2515, "step": 11400 }, { "epoch": 0.039098640970989564, "grad_norm": 1.101953148841858, "learning_rate": 1e-05, "loss": 0.2354, "step": 11410 }, { "epoch": 0.03913290796570559, "grad_norm": 1.0906771421432495, "learning_rate": 1e-05, "loss": 0.2145, "step": 11420 }, { "epoch": 0.03916717496042162, "grad_norm": 1.2161511182785034, "learning_rate": 1e-05, "loss": 0.2366, "step": 11430 }, { "epoch": 0.03920144195513765, "grad_norm": 1.0494539737701416, "learning_rate": 1e-05, "loss": 0.2445, "step": 11440 }, { "epoch": 0.03923570894985368, "grad_norm": 1.098476529121399, "learning_rate": 1e-05, "loss": 0.2392, "step": 11450 }, { "epoch": 0.03926997594456971, "grad_norm": 1.1904308795928955, "learning_rate": 1e-05, "loss": 0.2452, "step": 11460 }, { "epoch": 0.039304242939285736, "grad_norm": 1.1967114210128784, "learning_rate": 1e-05, "loss": 0.2489, "step": 11470 }, { "epoch": 0.039338509934001765, "grad_norm": 1.2335082292556763, "learning_rate": 1e-05, "loss": 0.2428, "step": 11480 }, { "epoch": 0.039372776928717795, "grad_norm": 1.0949335098266602, "learning_rate": 1e-05, "loss": 0.2433, "step": 11490 }, { "epoch": 0.039407043923433825, "grad_norm": 0.9655303955078125, "learning_rate": 1e-05, "loss": 0.2298, "step": 11500 }, { "epoch": 0.039407043923433825, "eval_cer": 13.238666431469396, "eval_loss": 0.24733339250087738, "eval_normalized_cer": 9.532374100719425, "eval_runtime": 228.8524, "eval_samples_per_second": 2.237, "eval_steps_per_second": 0.035, "step": 11500 }, { "epoch": 0.039441310918149855, "grad_norm": 1.303244709968567, "learning_rate": 1e-05, "loss": 0.2312, "step": 11510 }, { "epoch": 0.039475577912865885, "grad_norm": 1.2272698879241943, "learning_rate": 1e-05, "loss": 0.2371, "step": 11520 }, { "epoch": 0.039509844907581915, "grad_norm": 1.2555683851242065, "learning_rate": 1e-05, "loss": 0.248, "step": 11530 }, { "epoch": 0.039544111902297945, "grad_norm": 1.2972025871276855, "learning_rate": 1e-05, "loss": 0.2637, "step": 11540 }, { "epoch": 0.039578378897013974, "grad_norm": 0.9733885526657104, "learning_rate": 1e-05, "loss": 0.2397, "step": 11550 }, { "epoch": 0.039612645891730004, "grad_norm": 1.1637110710144043, "learning_rate": 1e-05, "loss": 0.2468, "step": 11560 }, { "epoch": 0.039646912886446034, "grad_norm": 1.2355756759643555, "learning_rate": 1e-05, "loss": 0.2407, "step": 11570 }, { "epoch": 0.039681179881162064, "grad_norm": 1.1079312562942505, "learning_rate": 1e-05, "loss": 0.2511, "step": 11580 }, { "epoch": 0.039715446875878094, "grad_norm": 1.13614821434021, "learning_rate": 1e-05, "loss": 0.2442, "step": 11590 }, { "epoch": 0.039749713870594124, "grad_norm": 1.2050237655639648, "learning_rate": 1e-05, "loss": 0.2505, "step": 11600 }, { "epoch": 0.03978398086531015, "grad_norm": 1.020393967628479, "learning_rate": 1e-05, "loss": 0.2305, "step": 11610 }, { "epoch": 0.03981824786002618, "grad_norm": 1.088463544845581, "learning_rate": 1e-05, "loss": 0.2542, "step": 11620 }, { "epoch": 0.039852514854742206, "grad_norm": 1.121472716331482, "learning_rate": 1e-05, "loss": 0.2489, "step": 11630 }, { "epoch": 0.039886781849458236, "grad_norm": 1.109485149383545, "learning_rate": 1e-05, "loss": 0.2488, "step": 11640 }, { "epoch": 0.039921048844174266, "grad_norm": 1.0824321508407593, "learning_rate": 1e-05, "loss": 0.2395, "step": 11650 }, { "epoch": 0.039955315838890296, "grad_norm": 1.1438390016555786, "learning_rate": 1e-05, "loss": 0.2318, "step": 11660 }, { "epoch": 0.039989582833606326, "grad_norm": 1.005821943283081, "learning_rate": 1e-05, "loss": 0.2482, "step": 11670 }, { "epoch": 0.040023849828322355, "grad_norm": 1.187921166419983, "learning_rate": 1e-05, "loss": 0.2479, "step": 11680 }, { "epoch": 0.040058116823038385, "grad_norm": 1.0112143754959106, "learning_rate": 1e-05, "loss": 0.2555, "step": 11690 }, { "epoch": 0.040092383817754415, "grad_norm": 1.1568365097045898, "learning_rate": 1e-05, "loss": 0.2483, "step": 11700 }, { "epoch": 0.040126650812470445, "grad_norm": 1.1201119422912598, "learning_rate": 1e-05, "loss": 0.256, "step": 11710 }, { "epoch": 0.040160917807186475, "grad_norm": 1.1281081438064575, "learning_rate": 1e-05, "loss": 0.2462, "step": 11720 }, { "epoch": 0.040195184801902505, "grad_norm": 0.9730721116065979, "learning_rate": 1e-05, "loss": 0.2534, "step": 11730 }, { "epoch": 0.040229451796618534, "grad_norm": 1.1069108247756958, "learning_rate": 1e-05, "loss": 0.2497, "step": 11740 }, { "epoch": 0.040263718791334564, "grad_norm": 1.1095935106277466, "learning_rate": 1e-05, "loss": 0.2448, "step": 11750 }, { "epoch": 0.040297985786050594, "grad_norm": 0.9690611958503723, "learning_rate": 1e-05, "loss": 0.2448, "step": 11760 }, { "epoch": 0.040332252780766624, "grad_norm": 1.0263612270355225, "learning_rate": 1e-05, "loss": 0.2555, "step": 11770 }, { "epoch": 0.040366519775482654, "grad_norm": 1.0731168985366821, "learning_rate": 1e-05, "loss": 0.2321, "step": 11780 }, { "epoch": 0.04040078677019868, "grad_norm": 1.1446433067321777, "learning_rate": 1e-05, "loss": 0.239, "step": 11790 }, { "epoch": 0.04043505376491471, "grad_norm": 1.0776352882385254, "learning_rate": 1e-05, "loss": 0.2255, "step": 11800 }, { "epoch": 0.040469320759630736, "grad_norm": 0.9721156358718872, "learning_rate": 1e-05, "loss": 0.2234, "step": 11810 }, { "epoch": 0.040503587754346766, "grad_norm": 0.9534703493118286, "learning_rate": 1e-05, "loss": 0.2163, "step": 11820 }, { "epoch": 0.040537854749062796, "grad_norm": 1.0248794555664062, "learning_rate": 1e-05, "loss": 0.2406, "step": 11830 }, { "epoch": 0.040572121743778826, "grad_norm": 1.1740145683288574, "learning_rate": 1e-05, "loss": 0.2394, "step": 11840 }, { "epoch": 0.040606388738494856, "grad_norm": 1.1622172594070435, "learning_rate": 1e-05, "loss": 0.2387, "step": 11850 }, { "epoch": 0.040640655733210886, "grad_norm": 1.0684759616851807, "learning_rate": 1e-05, "loss": 0.2196, "step": 11860 }, { "epoch": 0.040674922727926915, "grad_norm": 1.024851679801941, "learning_rate": 1e-05, "loss": 0.2178, "step": 11870 }, { "epoch": 0.040709189722642945, "grad_norm": 1.2293421030044556, "learning_rate": 1e-05, "loss": 0.2372, "step": 11880 }, { "epoch": 0.040743456717358975, "grad_norm": 1.2226061820983887, "learning_rate": 1e-05, "loss": 0.2466, "step": 11890 }, { "epoch": 0.040777723712075005, "grad_norm": 1.0775419473648071, "learning_rate": 1e-05, "loss": 0.221, "step": 11900 }, { "epoch": 0.040811990706791035, "grad_norm": 1.0354384183883667, "learning_rate": 1e-05, "loss": 0.2338, "step": 11910 }, { "epoch": 0.040846257701507065, "grad_norm": 0.9725399613380432, "learning_rate": 1e-05, "loss": 0.2312, "step": 11920 }, { "epoch": 0.040880524696223094, "grad_norm": 0.9638645648956299, "learning_rate": 1e-05, "loss": 0.2238, "step": 11930 }, { "epoch": 0.040914791690939124, "grad_norm": 1.1646082401275635, "learning_rate": 1e-05, "loss": 0.2339, "step": 11940 }, { "epoch": 0.04094905868565515, "grad_norm": 1.049614429473877, "learning_rate": 1e-05, "loss": 0.229, "step": 11950 }, { "epoch": 0.04098332568037118, "grad_norm": 1.1187442541122437, "learning_rate": 1e-05, "loss": 0.2437, "step": 11960 }, { "epoch": 0.04101759267508721, "grad_norm": 1.1472731828689575, "learning_rate": 1e-05, "loss": 0.2416, "step": 11970 }, { "epoch": 0.04105185966980324, "grad_norm": 1.0694329738616943, "learning_rate": 1e-05, "loss": 0.2308, "step": 11980 }, { "epoch": 0.04108612666451927, "grad_norm": 0.9863060116767883, "learning_rate": 1e-05, "loss": 0.2258, "step": 11990 }, { "epoch": 0.041120393659235296, "grad_norm": 1.1150392293930054, "learning_rate": 1e-05, "loss": 0.2342, "step": 12000 }, { "epoch": 0.041120393659235296, "eval_cer": 12.797671547010056, "eval_loss": 0.2504700720310211, "eval_normalized_cer": 9.152677857713828, "eval_runtime": 227.2028, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.035, "step": 12000 }, { "epoch": 0.041154660653951326, "grad_norm": 1.0455725193023682, "learning_rate": 1e-05, "loss": 0.2263, "step": 12010 }, { "epoch": 0.041188927648667356, "grad_norm": 1.2993946075439453, "learning_rate": 1e-05, "loss": 0.2273, "step": 12020 }, { "epoch": 0.041223194643383386, "grad_norm": 1.159058690071106, "learning_rate": 1e-05, "loss": 0.2259, "step": 12030 }, { "epoch": 0.041257461638099416, "grad_norm": 1.1908732652664185, "learning_rate": 1e-05, "loss": 0.2489, "step": 12040 }, { "epoch": 0.041291728632815446, "grad_norm": 1.3122719526290894, "learning_rate": 1e-05, "loss": 0.2544, "step": 12050 }, { "epoch": 0.041325995627531475, "grad_norm": 1.1521992683410645, "learning_rate": 1e-05, "loss": 0.2496, "step": 12060 }, { "epoch": 0.041360262622247505, "grad_norm": 1.1007260084152222, "learning_rate": 1e-05, "loss": 0.2453, "step": 12070 }, { "epoch": 0.041394529616963535, "grad_norm": 1.1719632148742676, "learning_rate": 1e-05, "loss": 0.2419, "step": 12080 }, { "epoch": 0.041428796611679565, "grad_norm": 1.088536024093628, "learning_rate": 1e-05, "loss": 0.2408, "step": 12090 }, { "epoch": 0.041463063606395595, "grad_norm": 1.1596314907073975, "learning_rate": 1e-05, "loss": 0.2463, "step": 12100 }, { "epoch": 0.041497330601111625, "grad_norm": 1.1144077777862549, "learning_rate": 1e-05, "loss": 0.2411, "step": 12110 }, { "epoch": 0.04153159759582765, "grad_norm": 1.2945681810379028, "learning_rate": 1e-05, "loss": 0.2381, "step": 12120 }, { "epoch": 0.04156586459054368, "grad_norm": 1.0997275114059448, "learning_rate": 1e-05, "loss": 0.2704, "step": 12130 }, { "epoch": 0.04160013158525971, "grad_norm": 1.0945874452590942, "learning_rate": 1e-05, "loss": 0.2249, "step": 12140 }, { "epoch": 0.04163439857997574, "grad_norm": 1.2051665782928467, "learning_rate": 1e-05, "loss": 0.2442, "step": 12150 }, { "epoch": 0.04166866557469177, "grad_norm": 0.9178060293197632, "learning_rate": 1e-05, "loss": 0.2519, "step": 12160 }, { "epoch": 0.0417029325694078, "grad_norm": 1.1225532293319702, "learning_rate": 1e-05, "loss": 0.2425, "step": 12170 }, { "epoch": 0.04173719956412383, "grad_norm": 1.0947092771530151, "learning_rate": 1e-05, "loss": 0.2305, "step": 12180 }, { "epoch": 0.041771466558839856, "grad_norm": 1.0374338626861572, "learning_rate": 1e-05, "loss": 0.2445, "step": 12190 }, { "epoch": 0.041805733553555886, "grad_norm": 1.1471805572509766, "learning_rate": 1e-05, "loss": 0.2447, "step": 12200 }, { "epoch": 0.041840000548271916, "grad_norm": 1.1241774559020996, "learning_rate": 1e-05, "loss": 0.2235, "step": 12210 }, { "epoch": 0.041874267542987946, "grad_norm": 1.243691086769104, "learning_rate": 1e-05, "loss": 0.2409, "step": 12220 }, { "epoch": 0.041908534537703976, "grad_norm": 1.1077616214752197, "learning_rate": 1e-05, "loss": 0.2533, "step": 12230 }, { "epoch": 0.041942801532420006, "grad_norm": 1.0907562971115112, "learning_rate": 1e-05, "loss": 0.2443, "step": 12240 }, { "epoch": 0.041977068527136036, "grad_norm": 1.3562718629837036, "learning_rate": 1e-05, "loss": 0.2523, "step": 12250 }, { "epoch": 0.042011335521852065, "grad_norm": 1.0229142904281616, "learning_rate": 1e-05, "loss": 0.2328, "step": 12260 }, { "epoch": 0.042045602516568095, "grad_norm": 1.0843278169631958, "learning_rate": 1e-05, "loss": 0.2328, "step": 12270 }, { "epoch": 0.04207986951128412, "grad_norm": 1.2029650211334229, "learning_rate": 1e-05, "loss": 0.2458, "step": 12280 }, { "epoch": 0.04211413650600015, "grad_norm": 0.9493764638900757, "learning_rate": 1e-05, "loss": 0.2232, "step": 12290 }, { "epoch": 0.04214840350071618, "grad_norm": 1.2031728029251099, "learning_rate": 1e-05, "loss": 0.2473, "step": 12300 }, { "epoch": 0.04218267049543221, "grad_norm": 1.1091227531433105, "learning_rate": 1e-05, "loss": 0.2429, "step": 12310 }, { "epoch": 0.04221693749014824, "grad_norm": 1.1729086637496948, "learning_rate": 1e-05, "loss": 0.2594, "step": 12320 }, { "epoch": 0.04225120448486427, "grad_norm": 1.0592730045318604, "learning_rate": 1e-05, "loss": 0.2506, "step": 12330 }, { "epoch": 0.0422854714795803, "grad_norm": 1.2366282939910889, "learning_rate": 1e-05, "loss": 0.2457, "step": 12340 }, { "epoch": 0.04231973847429633, "grad_norm": 1.12427818775177, "learning_rate": 1e-05, "loss": 0.246, "step": 12350 }, { "epoch": 0.04235400546901236, "grad_norm": 1.1663504838943481, "learning_rate": 1e-05, "loss": 0.2491, "step": 12360 }, { "epoch": 0.04238827246372839, "grad_norm": 1.2383378744125366, "learning_rate": 1e-05, "loss": 0.2492, "step": 12370 }, { "epoch": 0.04242253945844442, "grad_norm": 1.184813380241394, "learning_rate": 1e-05, "loss": 0.2498, "step": 12380 }, { "epoch": 0.042456806453160446, "grad_norm": 1.035650610923767, "learning_rate": 1e-05, "loss": 0.2317, "step": 12390 }, { "epoch": 0.042491073447876476, "grad_norm": 1.0495967864990234, "learning_rate": 1e-05, "loss": 0.2467, "step": 12400 }, { "epoch": 0.042525340442592506, "grad_norm": 1.0791754722595215, "learning_rate": 1e-05, "loss": 0.245, "step": 12410 }, { "epoch": 0.042559607437308536, "grad_norm": 1.1513383388519287, "learning_rate": 1e-05, "loss": 0.2599, "step": 12420 }, { "epoch": 0.042593874432024566, "grad_norm": 1.1093658208847046, "learning_rate": 1e-05, "loss": 0.2584, "step": 12430 }, { "epoch": 0.04262814142674059, "grad_norm": 1.0504255294799805, "learning_rate": 1e-05, "loss": 0.2355, "step": 12440 }, { "epoch": 0.04266240842145662, "grad_norm": 1.121837854385376, "learning_rate": 1e-05, "loss": 0.2309, "step": 12450 }, { "epoch": 0.04269667541617265, "grad_norm": 1.2266592979431152, "learning_rate": 1e-05, "loss": 0.2384, "step": 12460 }, { "epoch": 0.04273094241088868, "grad_norm": 1.0912328958511353, "learning_rate": 1e-05, "loss": 0.2392, "step": 12470 }, { "epoch": 0.04276520940560471, "grad_norm": 1.1504424810409546, "learning_rate": 1e-05, "loss": 0.2716, "step": 12480 }, { "epoch": 0.04279947640032074, "grad_norm": 1.011088490486145, "learning_rate": 1e-05, "loss": 0.2501, "step": 12490 }, { "epoch": 0.04283374339503677, "grad_norm": 1.2423217296600342, "learning_rate": 1e-05, "loss": 0.2562, "step": 12500 }, { "epoch": 0.04283374339503677, "eval_cer": 12.92115011465867, "eval_loss": 0.24911069869995117, "eval_normalized_cer": 9.452438049560353, "eval_runtime": 228.1651, "eval_samples_per_second": 2.244, "eval_steps_per_second": 0.035, "step": 12500 }, { "epoch": 0.0428680103897528, "grad_norm": 1.4834926128387451, "learning_rate": 1e-05, "loss": 0.2354, "step": 12510 }, { "epoch": 0.04290227738446883, "grad_norm": 1.26629638671875, "learning_rate": 1e-05, "loss": 0.2569, "step": 12520 }, { "epoch": 0.04293654437918486, "grad_norm": 1.204516053199768, "learning_rate": 1e-05, "loss": 0.2502, "step": 12530 }, { "epoch": 0.04297081137390089, "grad_norm": 1.0527433156967163, "learning_rate": 1e-05, "loss": 0.2503, "step": 12540 }, { "epoch": 0.04300507836861692, "grad_norm": 1.0310479402542114, "learning_rate": 1e-05, "loss": 0.2289, "step": 12550 }, { "epoch": 0.04303934536333295, "grad_norm": 1.2252111434936523, "learning_rate": 1e-05, "loss": 0.2485, "step": 12560 }, { "epoch": 0.04307361235804898, "grad_norm": 1.0729095935821533, "learning_rate": 1e-05, "loss": 0.2302, "step": 12570 }, { "epoch": 0.043107879352765006, "grad_norm": 1.000106930732727, "learning_rate": 1e-05, "loss": 0.2192, "step": 12580 }, { "epoch": 0.043142146347481036, "grad_norm": 1.0674782991409302, "learning_rate": 1e-05, "loss": 0.2334, "step": 12590 }, { "epoch": 0.04317641334219706, "grad_norm": 1.1148403882980347, "learning_rate": 1e-05, "loss": 0.2402, "step": 12600 }, { "epoch": 0.04321068033691309, "grad_norm": 1.0144375562667847, "learning_rate": 1e-05, "loss": 0.2492, "step": 12610 }, { "epoch": 0.04324494733162912, "grad_norm": 1.1123058795928955, "learning_rate": 1e-05, "loss": 0.2334, "step": 12620 }, { "epoch": 0.04327921432634515, "grad_norm": 1.1008777618408203, "learning_rate": 1e-05, "loss": 0.2319, "step": 12630 }, { "epoch": 0.04331348132106118, "grad_norm": 1.1487098932266235, "learning_rate": 1e-05, "loss": 0.2451, "step": 12640 }, { "epoch": 0.04334774831577721, "grad_norm": 1.1339664459228516, "learning_rate": 1e-05, "loss": 0.2267, "step": 12650 }, { "epoch": 0.04338201531049324, "grad_norm": 1.198195219039917, "learning_rate": 1e-05, "loss": 0.2472, "step": 12660 }, { "epoch": 0.04341628230520927, "grad_norm": 0.9989431500434875, "learning_rate": 1e-05, "loss": 0.2381, "step": 12670 }, { "epoch": 0.0434505492999253, "grad_norm": 1.4252516031265259, "learning_rate": 1e-05, "loss": 0.2513, "step": 12680 }, { "epoch": 0.04348481629464133, "grad_norm": 1.1313762664794922, "learning_rate": 1e-05, "loss": 0.2591, "step": 12690 }, { "epoch": 0.04351908328935736, "grad_norm": 1.0512256622314453, "learning_rate": 1e-05, "loss": 0.238, "step": 12700 }, { "epoch": 0.04355335028407339, "grad_norm": 1.158078670501709, "learning_rate": 1e-05, "loss": 0.2446, "step": 12710 }, { "epoch": 0.04358761727878942, "grad_norm": 1.0620396137237549, "learning_rate": 1e-05, "loss": 0.2403, "step": 12720 }, { "epoch": 0.04362188427350545, "grad_norm": 1.0640372037887573, "learning_rate": 1e-05, "loss": 0.2378, "step": 12730 }, { "epoch": 0.04365615126822148, "grad_norm": 1.113105058670044, "learning_rate": 1e-05, "loss": 0.236, "step": 12740 }, { "epoch": 0.04369041826293751, "grad_norm": 1.0416456460952759, "learning_rate": 1e-05, "loss": 0.2424, "step": 12750 }, { "epoch": 0.04372468525765354, "grad_norm": 1.2312722206115723, "learning_rate": 1e-05, "loss": 0.2397, "step": 12760 }, { "epoch": 0.04375895225236956, "grad_norm": 1.228950023651123, "learning_rate": 1e-05, "loss": 0.2462, "step": 12770 }, { "epoch": 0.04379321924708559, "grad_norm": 1.0736054182052612, "learning_rate": 1e-05, "loss": 0.2299, "step": 12780 }, { "epoch": 0.04382748624180162, "grad_norm": 1.4141355752944946, "learning_rate": 1e-05, "loss": 0.2377, "step": 12790 }, { "epoch": 0.04386175323651765, "grad_norm": 1.2224112749099731, "learning_rate": 1e-05, "loss": 0.2334, "step": 12800 }, { "epoch": 0.04389602023123368, "grad_norm": 1.3090282678604126, "learning_rate": 1e-05, "loss": 0.2548, "step": 12810 }, { "epoch": 0.04393028722594971, "grad_norm": 1.0864715576171875, "learning_rate": 1e-05, "loss": 0.2711, "step": 12820 }, { "epoch": 0.04396455422066574, "grad_norm": 1.0953795909881592, "learning_rate": 1e-05, "loss": 0.2467, "step": 12830 }, { "epoch": 0.04399882121538177, "grad_norm": 0.9681864976882935, "learning_rate": 1e-05, "loss": 0.2217, "step": 12840 }, { "epoch": 0.0440330882100978, "grad_norm": 0.9268914461135864, "learning_rate": 1e-05, "loss": 0.2492, "step": 12850 }, { "epoch": 0.04406735520481383, "grad_norm": 1.0900733470916748, "learning_rate": 1e-05, "loss": 0.2421, "step": 12860 }, { "epoch": 0.04410162219952986, "grad_norm": 1.1551947593688965, "learning_rate": 1e-05, "loss": 0.2549, "step": 12870 }, { "epoch": 0.04413588919424589, "grad_norm": 1.0035364627838135, "learning_rate": 1e-05, "loss": 0.248, "step": 12880 }, { "epoch": 0.04417015618896192, "grad_norm": 1.2478151321411133, "learning_rate": 1e-05, "loss": 0.253, "step": 12890 }, { "epoch": 0.04420442318367795, "grad_norm": 1.1512874364852905, "learning_rate": 1e-05, "loss": 0.2247, "step": 12900 }, { "epoch": 0.04423869017839398, "grad_norm": 1.2012622356414795, "learning_rate": 1e-05, "loss": 0.2635, "step": 12910 }, { "epoch": 0.04427295717311001, "grad_norm": 1.1266357898712158, "learning_rate": 1e-05, "loss": 0.2464, "step": 12920 }, { "epoch": 0.04430722416782603, "grad_norm": 1.11850905418396, "learning_rate": 1e-05, "loss": 0.2499, "step": 12930 }, { "epoch": 0.04434149116254206, "grad_norm": 1.1375716924667358, "learning_rate": 1e-05, "loss": 0.2474, "step": 12940 }, { "epoch": 0.04437575815725809, "grad_norm": 1.3423253297805786, "learning_rate": 1e-05, "loss": 0.2525, "step": 12950 }, { "epoch": 0.04441002515197412, "grad_norm": 1.0608446598052979, "learning_rate": 1e-05, "loss": 0.2445, "step": 12960 }, { "epoch": 0.04444429214669015, "grad_norm": 1.059899091720581, "learning_rate": 1e-05, "loss": 0.2444, "step": 12970 }, { "epoch": 0.04447855914140618, "grad_norm": 1.117346167564392, "learning_rate": 1e-05, "loss": 0.2529, "step": 12980 }, { "epoch": 0.04451282613612221, "grad_norm": 1.2896045446395874, "learning_rate": 1e-05, "loss": 0.2473, "step": 12990 }, { "epoch": 0.04454709313083824, "grad_norm": 0.9721153974533081, "learning_rate": 1e-05, "loss": 0.2461, "step": 13000 }, { "epoch": 0.04454709313083824, "eval_cer": 12.84177103545599, "eval_loss": 0.2521709203720093, "eval_normalized_cer": 9.152677857713828, "eval_runtime": 226.778, "eval_samples_per_second": 2.258, "eval_steps_per_second": 0.035, "step": 13000 }, { "epoch": 0.04458136012555427, "grad_norm": 1.232352375984192, "learning_rate": 1e-05, "loss": 0.2387, "step": 13010 }, { "epoch": 0.0446156271202703, "grad_norm": 1.2386256456375122, "learning_rate": 1e-05, "loss": 0.255, "step": 13020 }, { "epoch": 0.04464989411498633, "grad_norm": 1.2183597087860107, "learning_rate": 1e-05, "loss": 0.2635, "step": 13030 }, { "epoch": 0.04468416110970236, "grad_norm": 1.166823387145996, "learning_rate": 1e-05, "loss": 0.2563, "step": 13040 }, { "epoch": 0.04471842810441839, "grad_norm": 1.1574853658676147, "learning_rate": 1e-05, "loss": 0.2557, "step": 13050 }, { "epoch": 0.04475269509913442, "grad_norm": 1.1207836866378784, "learning_rate": 1e-05, "loss": 0.2578, "step": 13060 }, { "epoch": 0.04478696209385045, "grad_norm": 1.2590343952178955, "learning_rate": 1e-05, "loss": 0.2551, "step": 13070 }, { "epoch": 0.04482122908856648, "grad_norm": 1.0984435081481934, "learning_rate": 1e-05, "loss": 0.2816, "step": 13080 }, { "epoch": 0.0448554960832825, "grad_norm": 1.1435647010803223, "learning_rate": 1e-05, "loss": 0.2481, "step": 13090 }, { "epoch": 0.04488976307799853, "grad_norm": 1.1446672677993774, "learning_rate": 1e-05, "loss": 0.2561, "step": 13100 }, { "epoch": 0.04492403007271456, "grad_norm": 1.0957670211791992, "learning_rate": 1e-05, "loss": 0.2458, "step": 13110 }, { "epoch": 0.04495829706743059, "grad_norm": 1.1321167945861816, "learning_rate": 1e-05, "loss": 0.2621, "step": 13120 }, { "epoch": 0.04499256406214662, "grad_norm": 1.140914797782898, "learning_rate": 1e-05, "loss": 0.2376, "step": 13130 }, { "epoch": 0.04502683105686265, "grad_norm": 1.1879481077194214, "learning_rate": 1e-05, "loss": 0.2457, "step": 13140 }, { "epoch": 0.04506109805157868, "grad_norm": 1.240084171295166, "learning_rate": 1e-05, "loss": 0.2486, "step": 13150 }, { "epoch": 0.04509536504629471, "grad_norm": 1.1524683237075806, "learning_rate": 1e-05, "loss": 0.2533, "step": 13160 }, { "epoch": 0.04512963204101074, "grad_norm": 1.1614208221435547, "learning_rate": 1e-05, "loss": 0.2495, "step": 13170 }, { "epoch": 0.04516389903572677, "grad_norm": 1.1307048797607422, "learning_rate": 1e-05, "loss": 0.2549, "step": 13180 }, { "epoch": 0.0451981660304428, "grad_norm": 1.0327478647232056, "learning_rate": 1e-05, "loss": 0.2451, "step": 13190 }, { "epoch": 0.04523243302515883, "grad_norm": 1.2401607036590576, "learning_rate": 1e-05, "loss": 0.2492, "step": 13200 }, { "epoch": 0.04526670001987486, "grad_norm": 1.095413088798523, "learning_rate": 1e-05, "loss": 0.2487, "step": 13210 }, { "epoch": 0.04530096701459089, "grad_norm": 1.2537821531295776, "learning_rate": 1e-05, "loss": 0.246, "step": 13220 }, { "epoch": 0.04533523400930692, "grad_norm": 1.1861079931259155, "learning_rate": 1e-05, "loss": 0.2487, "step": 13230 }, { "epoch": 0.04536950100402295, "grad_norm": 1.1059224605560303, "learning_rate": 1e-05, "loss": 0.2465, "step": 13240 }, { "epoch": 0.04540376799873898, "grad_norm": 1.159122109413147, "learning_rate": 1e-05, "loss": 0.2472, "step": 13250 }, { "epoch": 0.045438034993455, "grad_norm": 1.0307060480117798, "learning_rate": 1e-05, "loss": 0.2583, "step": 13260 }, { "epoch": 0.04547230198817103, "grad_norm": 1.0377501249313354, "learning_rate": 1e-05, "loss": 0.2547, "step": 13270 }, { "epoch": 0.04550656898288706, "grad_norm": 1.124543309211731, "learning_rate": 1e-05, "loss": 0.2396, "step": 13280 }, { "epoch": 0.04554083597760309, "grad_norm": 1.0829116106033325, "learning_rate": 1e-05, "loss": 0.2417, "step": 13290 }, { "epoch": 0.04557510297231912, "grad_norm": 1.057477355003357, "learning_rate": 1e-05, "loss": 0.2527, "step": 13300 }, { "epoch": 0.04560936996703515, "grad_norm": 1.062674641609192, "learning_rate": 1e-05, "loss": 0.2423, "step": 13310 }, { "epoch": 0.04564363696175118, "grad_norm": 1.131895661354065, "learning_rate": 1e-05, "loss": 0.2517, "step": 13320 }, { "epoch": 0.04567790395646721, "grad_norm": 1.016940951347351, "learning_rate": 1e-05, "loss": 0.2419, "step": 13330 }, { "epoch": 0.04571217095118324, "grad_norm": 1.273378849029541, "learning_rate": 1e-05, "loss": 0.2465, "step": 13340 }, { "epoch": 0.04574643794589927, "grad_norm": 1.0757806301116943, "learning_rate": 1e-05, "loss": 0.2447, "step": 13350 }, { "epoch": 0.0457807049406153, "grad_norm": 1.3264166116714478, "learning_rate": 1e-05, "loss": 0.2545, "step": 13360 }, { "epoch": 0.04581497193533133, "grad_norm": 1.1011106967926025, "learning_rate": 1e-05, "loss": 0.269, "step": 13370 }, { "epoch": 0.04584923893004736, "grad_norm": 1.0483593940734863, "learning_rate": 1e-05, "loss": 0.2443, "step": 13380 }, { "epoch": 0.04588350592476339, "grad_norm": 1.2940049171447754, "learning_rate": 1e-05, "loss": 0.2459, "step": 13390 }, { "epoch": 0.04591777291947942, "grad_norm": 1.1630951166152954, "learning_rate": 1e-05, "loss": 0.2702, "step": 13400 }, { "epoch": 0.04595203991419545, "grad_norm": 1.0715082883834839, "learning_rate": 1e-05, "loss": 0.2402, "step": 13410 }, { "epoch": 0.04598630690891147, "grad_norm": 1.0946441888809204, "learning_rate": 1e-05, "loss": 0.2452, "step": 13420 }, { "epoch": 0.0460205739036275, "grad_norm": 1.0796674489974976, "learning_rate": 1e-05, "loss": 0.2449, "step": 13430 }, { "epoch": 0.04605484089834353, "grad_norm": 1.0534013509750366, "learning_rate": 1e-05, "loss": 0.2385, "step": 13440 }, { "epoch": 0.04608910789305956, "grad_norm": 1.0427377223968506, "learning_rate": 1e-05, "loss": 0.2557, "step": 13450 }, { "epoch": 0.04612337488777559, "grad_norm": 1.1708178520202637, "learning_rate": 1e-05, "loss": 0.2452, "step": 13460 }, { "epoch": 0.04615764188249162, "grad_norm": 1.0531684160232544, "learning_rate": 1e-05, "loss": 0.2594, "step": 13470 }, { "epoch": 0.04619190887720765, "grad_norm": 1.1972299814224243, "learning_rate": 1e-05, "loss": 0.2566, "step": 13480 }, { "epoch": 0.04622617587192368, "grad_norm": 1.0194915533065796, "learning_rate": 1e-05, "loss": 0.2504, "step": 13490 }, { "epoch": 0.04626044286663971, "grad_norm": 1.2437708377838135, "learning_rate": 1e-05, "loss": 0.251, "step": 13500 }, { "epoch": 0.04626044286663971, "eval_cer": 12.744752160874933, "eval_loss": 0.24938170611858368, "eval_normalized_cer": 8.952837729816148, "eval_runtime": 229.544, "eval_samples_per_second": 2.231, "eval_steps_per_second": 0.035, "step": 13500 }, { "epoch": 0.04629470986135574, "grad_norm": 1.2545411586761475, "learning_rate": 1e-05, "loss": 0.2714, "step": 13510 }, { "epoch": 0.04632897685607177, "grad_norm": 1.0949839353561401, "learning_rate": 1e-05, "loss": 0.2442, "step": 13520 }, { "epoch": 0.0463632438507878, "grad_norm": 1.1343241930007935, "learning_rate": 1e-05, "loss": 0.2454, "step": 13530 }, { "epoch": 0.04639751084550383, "grad_norm": 1.2311172485351562, "learning_rate": 1e-05, "loss": 0.2608, "step": 13540 }, { "epoch": 0.04643177784021986, "grad_norm": 1.0095285177230835, "learning_rate": 1e-05, "loss": 0.2558, "step": 13550 }, { "epoch": 0.04646604483493589, "grad_norm": 1.0752111673355103, "learning_rate": 1e-05, "loss": 0.237, "step": 13560 }, { "epoch": 0.04650031182965192, "grad_norm": 1.0850863456726074, "learning_rate": 1e-05, "loss": 0.2552, "step": 13570 }, { "epoch": 0.04653457882436794, "grad_norm": 1.185105323791504, "learning_rate": 1e-05, "loss": 0.2567, "step": 13580 }, { "epoch": 0.04656884581908397, "grad_norm": 1.221077799797058, "learning_rate": 1e-05, "loss": 0.2591, "step": 13590 }, { "epoch": 0.0466031128138, "grad_norm": 1.23322594165802, "learning_rate": 1e-05, "loss": 0.2485, "step": 13600 }, { "epoch": 0.04663737980851603, "grad_norm": 1.1576459407806396, "learning_rate": 1e-05, "loss": 0.2573, "step": 13610 }, { "epoch": 0.04667164680323206, "grad_norm": 1.202359914779663, "learning_rate": 1e-05, "loss": 0.2555, "step": 13620 }, { "epoch": 0.04670591379794809, "grad_norm": 1.1896847486495972, "learning_rate": 1e-05, "loss": 0.2586, "step": 13630 }, { "epoch": 0.04674018079266412, "grad_norm": 0.8355448246002197, "learning_rate": 1e-05, "loss": 0.2374, "step": 13640 }, { "epoch": 0.04677444778738015, "grad_norm": 1.1680315732955933, "learning_rate": 1e-05, "loss": 0.2365, "step": 13650 }, { "epoch": 0.04680871478209618, "grad_norm": 1.2542601823806763, "learning_rate": 1e-05, "loss": 0.2494, "step": 13660 }, { "epoch": 0.04684298177681221, "grad_norm": 1.1530771255493164, "learning_rate": 1e-05, "loss": 0.2268, "step": 13670 }, { "epoch": 0.04687724877152824, "grad_norm": 1.0363566875457764, "learning_rate": 1e-05, "loss": 0.2623, "step": 13680 }, { "epoch": 0.04691151576624427, "grad_norm": 1.0039604902267456, "learning_rate": 1e-05, "loss": 0.2524, "step": 13690 }, { "epoch": 0.0469457827609603, "grad_norm": 1.2030223608016968, "learning_rate": 1e-05, "loss": 0.2346, "step": 13700 }, { "epoch": 0.04698004975567633, "grad_norm": 1.1221191883087158, "learning_rate": 1e-05, "loss": 0.2483, "step": 13710 }, { "epoch": 0.04701431675039236, "grad_norm": 0.9777095913887024, "learning_rate": 1e-05, "loss": 0.2484, "step": 13720 }, { "epoch": 0.04704858374510839, "grad_norm": 1.1439709663391113, "learning_rate": 1e-05, "loss": 0.2447, "step": 13730 }, { "epoch": 0.04708285073982441, "grad_norm": 1.0818963050842285, "learning_rate": 1e-05, "loss": 0.2378, "step": 13740 }, { "epoch": 0.04711711773454044, "grad_norm": 1.1691282987594604, "learning_rate": 1e-05, "loss": 0.2433, "step": 13750 }, { "epoch": 0.04715138472925647, "grad_norm": 1.1172744035720825, "learning_rate": 1e-05, "loss": 0.2297, "step": 13760 }, { "epoch": 0.0471856517239725, "grad_norm": 1.170318841934204, "learning_rate": 1e-05, "loss": 0.2451, "step": 13770 }, { "epoch": 0.04721991871868853, "grad_norm": 1.152566909790039, "learning_rate": 1e-05, "loss": 0.2497, "step": 13780 }, { "epoch": 0.04725418571340456, "grad_norm": 1.023813247680664, "learning_rate": 1e-05, "loss": 0.2654, "step": 13790 }, { "epoch": 0.04728845270812059, "grad_norm": 1.159853458404541, "learning_rate": 1e-05, "loss": 0.2537, "step": 13800 }, { "epoch": 0.04732271970283662, "grad_norm": 1.0579999685287476, "learning_rate": 1e-05, "loss": 0.2524, "step": 13810 }, { "epoch": 0.04735698669755265, "grad_norm": 1.1741176843643188, "learning_rate": 1e-05, "loss": 0.2574, "step": 13820 }, { "epoch": 0.04739125369226868, "grad_norm": 1.1931475400924683, "learning_rate": 1e-05, "loss": 0.2504, "step": 13830 }, { "epoch": 0.04742552068698471, "grad_norm": 0.9977951645851135, "learning_rate": 1e-05, "loss": 0.2515, "step": 13840 }, { "epoch": 0.04745978768170074, "grad_norm": 1.177424669265747, "learning_rate": 1e-05, "loss": 0.2399, "step": 13850 }, { "epoch": 0.04749405467641677, "grad_norm": 1.0663032531738281, "learning_rate": 1e-05, "loss": 0.2464, "step": 13860 }, { "epoch": 0.0475283216711328, "grad_norm": 0.9985357522964478, "learning_rate": 1e-05, "loss": 0.2518, "step": 13870 }, { "epoch": 0.04756258866584883, "grad_norm": 1.1378836631774902, "learning_rate": 1e-05, "loss": 0.2366, "step": 13880 }, { "epoch": 0.04759685566056486, "grad_norm": 1.130223035812378, "learning_rate": 1e-05, "loss": 0.2521, "step": 13890 }, { "epoch": 0.04763112265528089, "grad_norm": 1.1601965427398682, "learning_rate": 1e-05, "loss": 0.2635, "step": 13900 }, { "epoch": 0.04766538964999691, "grad_norm": 1.0559511184692383, "learning_rate": 1e-05, "loss": 0.2496, "step": 13910 }, { "epoch": 0.04769965664471294, "grad_norm": 1.0258302688598633, "learning_rate": 1e-05, "loss": 0.2472, "step": 13920 }, { "epoch": 0.04773392363942897, "grad_norm": 1.1403284072875977, "learning_rate": 1e-05, "loss": 0.2544, "step": 13930 }, { "epoch": 0.047768190634145, "grad_norm": 1.1353199481964111, "learning_rate": 1e-05, "loss": 0.2502, "step": 13940 }, { "epoch": 0.04780245762886103, "grad_norm": 1.2859739065170288, "learning_rate": 1e-05, "loss": 0.2314, "step": 13950 }, { "epoch": 0.04783672462357706, "grad_norm": 1.1392145156860352, "learning_rate": 1e-05, "loss": 0.2493, "step": 13960 }, { "epoch": 0.04787099161829309, "grad_norm": 1.055909276008606, "learning_rate": 1e-05, "loss": 0.2406, "step": 13970 }, { "epoch": 0.04790525861300912, "grad_norm": 1.0544830560684204, "learning_rate": 1e-05, "loss": 0.248, "step": 13980 }, { "epoch": 0.04793952560772515, "grad_norm": 1.0616220235824585, "learning_rate": 1e-05, "loss": 0.2489, "step": 13990 }, { "epoch": 0.04797379260244118, "grad_norm": 1.0163713693618774, "learning_rate": 1e-05, "loss": 0.2359, "step": 14000 }, { "epoch": 0.04797379260244118, "eval_cer": 12.683012877050626, "eval_loss": 0.24888557195663452, "eval_normalized_cer": 9.082733812949641, "eval_runtime": 228.6884, "eval_samples_per_second": 2.239, "eval_steps_per_second": 0.035, "step": 14000 }, { "epoch": 0.04800805959715721, "grad_norm": 1.0705435276031494, "learning_rate": 1e-05, "loss": 0.2407, "step": 14010 }, { "epoch": 0.04804232659187324, "grad_norm": 0.9681057333946228, "learning_rate": 1e-05, "loss": 0.2506, "step": 14020 }, { "epoch": 0.04807659358658927, "grad_norm": 1.0451055765151978, "learning_rate": 1e-05, "loss": 0.2523, "step": 14030 }, { "epoch": 0.0481108605813053, "grad_norm": 1.0783635377883911, "learning_rate": 1e-05, "loss": 0.242, "step": 14040 }, { "epoch": 0.04814512757602133, "grad_norm": 1.1066149473190308, "learning_rate": 1e-05, "loss": 0.2514, "step": 14050 }, { "epoch": 0.04817939457073736, "grad_norm": 1.163319706916809, "learning_rate": 1e-05, "loss": 0.2336, "step": 14060 }, { "epoch": 0.04821366156545338, "grad_norm": 1.1571089029312134, "learning_rate": 1e-05, "loss": 0.2626, "step": 14070 }, { "epoch": 0.04824792856016941, "grad_norm": 1.153372049331665, "learning_rate": 1e-05, "loss": 0.2499, "step": 14080 }, { "epoch": 0.04828219555488544, "grad_norm": 1.1359412670135498, "learning_rate": 1e-05, "loss": 0.245, "step": 14090 }, { "epoch": 0.04831646254960147, "grad_norm": 1.1467552185058594, "learning_rate": 1e-05, "loss": 0.2185, "step": 14100 }, { "epoch": 0.0483507295443175, "grad_norm": 1.1838459968566895, "learning_rate": 1e-05, "loss": 0.2236, "step": 14110 }, { "epoch": 0.04838499653903353, "grad_norm": 1.1658817529678345, "learning_rate": 1e-05, "loss": 0.2527, "step": 14120 }, { "epoch": 0.04841926353374956, "grad_norm": 1.0678468942642212, "learning_rate": 1e-05, "loss": 0.2299, "step": 14130 }, { "epoch": 0.04845353052846559, "grad_norm": 1.169755220413208, "learning_rate": 1e-05, "loss": 0.256, "step": 14140 }, { "epoch": 0.04848779752318162, "grad_norm": 1.1786571741104126, "learning_rate": 1e-05, "loss": 0.2294, "step": 14150 }, { "epoch": 0.04852206451789765, "grad_norm": 1.0959957838058472, "learning_rate": 1e-05, "loss": 0.2415, "step": 14160 }, { "epoch": 0.04855633151261368, "grad_norm": 1.3755067586898804, "learning_rate": 1e-05, "loss": 0.2402, "step": 14170 }, { "epoch": 0.04859059850732971, "grad_norm": 1.0811392068862915, "learning_rate": 1e-05, "loss": 0.2565, "step": 14180 }, { "epoch": 0.04862486550204574, "grad_norm": 1.0909180641174316, "learning_rate": 1e-05, "loss": 0.2338, "step": 14190 }, { "epoch": 0.04865913249676177, "grad_norm": 1.168319821357727, "learning_rate": 1e-05, "loss": 0.2422, "step": 14200 }, { "epoch": 0.0486933994914778, "grad_norm": 1.0762922763824463, "learning_rate": 1e-05, "loss": 0.2419, "step": 14210 }, { "epoch": 0.04872766648619383, "grad_norm": 0.9447901248931885, "learning_rate": 1e-05, "loss": 0.2257, "step": 14220 }, { "epoch": 0.048761933480909854, "grad_norm": 1.0396913290023804, "learning_rate": 1e-05, "loss": 0.2351, "step": 14230 }, { "epoch": 0.048796200475625884, "grad_norm": 1.0487229824066162, "learning_rate": 1e-05, "loss": 0.229, "step": 14240 }, { "epoch": 0.048830467470341914, "grad_norm": 1.1879597902297974, "learning_rate": 1e-05, "loss": 0.251, "step": 14250 }, { "epoch": 0.048864734465057943, "grad_norm": 1.2501291036605835, "learning_rate": 1e-05, "loss": 0.2421, "step": 14260 }, { "epoch": 0.04889900145977397, "grad_norm": 1.119624137878418, "learning_rate": 1e-05, "loss": 0.242, "step": 14270 }, { "epoch": 0.04893326845449, "grad_norm": 1.0920354127883911, "learning_rate": 1e-05, "loss": 0.2352, "step": 14280 }, { "epoch": 0.04896753544920603, "grad_norm": 1.0519214868545532, "learning_rate": 1e-05, "loss": 0.2466, "step": 14290 }, { "epoch": 0.04900180244392206, "grad_norm": 1.1111136674880981, "learning_rate": 1e-05, "loss": 0.2438, "step": 14300 }, { "epoch": 0.04903606943863809, "grad_norm": 1.2424713373184204, "learning_rate": 1e-05, "loss": 0.2358, "step": 14310 }, { "epoch": 0.04907033643335412, "grad_norm": 1.0222002267837524, "learning_rate": 1e-05, "loss": 0.2302, "step": 14320 }, { "epoch": 0.04910460342807015, "grad_norm": 0.9916470050811768, "learning_rate": 1e-05, "loss": 0.2068, "step": 14330 }, { "epoch": 0.04913887042278618, "grad_norm": 1.31315016746521, "learning_rate": 1e-05, "loss": 0.2489, "step": 14340 }, { "epoch": 0.04917313741750221, "grad_norm": 1.1765952110290527, "learning_rate": 1e-05, "loss": 0.2504, "step": 14350 }, { "epoch": 0.04920740441221824, "grad_norm": 1.1690514087677002, "learning_rate": 1e-05, "loss": 0.2441, "step": 14360 }, { "epoch": 0.04924167140693427, "grad_norm": 1.1756752729415894, "learning_rate": 1e-05, "loss": 0.2669, "step": 14370 }, { "epoch": 0.0492759384016503, "grad_norm": 1.1072384119033813, "learning_rate": 1e-05, "loss": 0.2426, "step": 14380 }, { "epoch": 0.049310205396366324, "grad_norm": 1.2008529901504517, "learning_rate": 1e-05, "loss": 0.2545, "step": 14390 }, { "epoch": 0.049344472391082354, "grad_norm": 1.1737167835235596, "learning_rate": 1e-05, "loss": 0.25, "step": 14400 }, { "epoch": 0.049378739385798384, "grad_norm": 1.0450342893600464, "learning_rate": 1e-05, "loss": 0.2539, "step": 14410 }, { "epoch": 0.049413006380514414, "grad_norm": 1.0435712337493896, "learning_rate": 1e-05, "loss": 0.2404, "step": 14420 }, { "epoch": 0.049447273375230444, "grad_norm": 1.2220741510391235, "learning_rate": 1e-05, "loss": 0.2544, "step": 14430 }, { "epoch": 0.049481540369946474, "grad_norm": 1.285495400428772, "learning_rate": 1e-05, "loss": 0.2563, "step": 14440 }, { "epoch": 0.049515807364662504, "grad_norm": 1.2037091255187988, "learning_rate": 1e-05, "loss": 0.269, "step": 14450 }, { "epoch": 0.04955007435937853, "grad_norm": 1.1641725301742554, "learning_rate": 1e-05, "loss": 0.2582, "step": 14460 }, { "epoch": 0.04958434135409456, "grad_norm": 1.30322265625, "learning_rate": 1e-05, "loss": 0.2637, "step": 14470 }, { "epoch": 0.04961860834881059, "grad_norm": 1.0870246887207031, "learning_rate": 1e-05, "loss": 0.2319, "step": 14480 }, { "epoch": 0.04965287534352662, "grad_norm": 1.1934154033660889, "learning_rate": 1e-05, "loss": 0.2654, "step": 14490 }, { "epoch": 0.04968714233824265, "grad_norm": 1.0471813678741455, "learning_rate": 1e-05, "loss": 0.2672, "step": 14500 }, { "epoch": 0.04968714233824265, "eval_cer": 12.859410830834362, "eval_loss": 0.2491358071565628, "eval_normalized_cer": 9.292565947242206, "eval_runtime": 235.6794, "eval_samples_per_second": 2.172, "eval_steps_per_second": 0.034, "step": 14500 }, { "epoch": 0.04972140933295868, "grad_norm": 1.1028441190719604, "learning_rate": 1e-05, "loss": 0.2506, "step": 14510 }, { "epoch": 0.04975567632767471, "grad_norm": 1.0829942226409912, "learning_rate": 1e-05, "loss": 0.2624, "step": 14520 }, { "epoch": 0.04978994332239074, "grad_norm": 1.094115138053894, "learning_rate": 1e-05, "loss": 0.2393, "step": 14530 }, { "epoch": 0.04982421031710677, "grad_norm": 1.1440691947937012, "learning_rate": 1e-05, "loss": 0.2454, "step": 14540 }, { "epoch": 0.0498584773118228, "grad_norm": 1.171846866607666, "learning_rate": 1e-05, "loss": 0.2528, "step": 14550 }, { "epoch": 0.049892744306538825, "grad_norm": 1.1416808366775513, "learning_rate": 1e-05, "loss": 0.2545, "step": 14560 }, { "epoch": 0.049927011301254855, "grad_norm": 1.2240614891052246, "learning_rate": 1e-05, "loss": 0.2511, "step": 14570 }, { "epoch": 0.049961278295970885, "grad_norm": 1.224147081375122, "learning_rate": 1e-05, "loss": 0.2662, "step": 14580 }, { "epoch": 0.049995545290686914, "grad_norm": 1.0746549367904663, "learning_rate": 1e-05, "loss": 0.2466, "step": 14590 }, { "epoch": 0.050029812285402944, "grad_norm": 1.0464617013931274, "learning_rate": 1e-05, "loss": 0.2483, "step": 14600 }, { "epoch": 0.050064079280118974, "grad_norm": 1.0312144756317139, "learning_rate": 1e-05, "loss": 0.2496, "step": 14610 }, { "epoch": 0.050098346274835004, "grad_norm": 1.109796166419983, "learning_rate": 1e-05, "loss": 0.2459, "step": 14620 }, { "epoch": 0.050132613269551034, "grad_norm": 1.100741982460022, "learning_rate": 1e-05, "loss": 0.2442, "step": 14630 }, { "epoch": 0.050166880264267064, "grad_norm": 1.187683343887329, "learning_rate": 1e-05, "loss": 0.2434, "step": 14640 }, { "epoch": 0.05020114725898309, "grad_norm": 1.1926337480545044, "learning_rate": 1e-05, "loss": 0.2554, "step": 14650 }, { "epoch": 0.05023541425369912, "grad_norm": 0.9210227727890015, "learning_rate": 1e-05, "loss": 0.2497, "step": 14660 }, { "epoch": 0.05026968124841515, "grad_norm": 1.010360836982727, "learning_rate": 1e-05, "loss": 0.2505, "step": 14670 }, { "epoch": 0.05030394824313118, "grad_norm": 1.0252940654754639, "learning_rate": 1e-05, "loss": 0.2461, "step": 14680 }, { "epoch": 0.05033821523784721, "grad_norm": 1.297200322151184, "learning_rate": 1e-05, "loss": 0.2526, "step": 14690 }, { "epoch": 0.05037248223256324, "grad_norm": 1.2773388624191284, "learning_rate": 1e-05, "loss": 0.2484, "step": 14700 }, { "epoch": 0.05040674922727927, "grad_norm": 1.110484004020691, "learning_rate": 1e-05, "loss": 0.2542, "step": 14710 }, { "epoch": 0.050441016221995295, "grad_norm": 1.0664464235305786, "learning_rate": 1e-05, "loss": 0.2528, "step": 14720 }, { "epoch": 0.050475283216711325, "grad_norm": 1.1421204805374146, "learning_rate": 1e-05, "loss": 0.2464, "step": 14730 }, { "epoch": 0.050509550211427355, "grad_norm": 1.115225911140442, "learning_rate": 1e-05, "loss": 0.2584, "step": 14740 }, { "epoch": 0.050543817206143385, "grad_norm": 1.1027865409851074, "learning_rate": 1e-05, "loss": 0.2606, "step": 14750 }, { "epoch": 0.050578084200859415, "grad_norm": 1.1306570768356323, "learning_rate": 1e-05, "loss": 0.2561, "step": 14760 }, { "epoch": 0.050612351195575445, "grad_norm": 1.0982325077056885, "learning_rate": 1e-05, "loss": 0.2688, "step": 14770 }, { "epoch": 0.050646618190291474, "grad_norm": 1.004801630973816, "learning_rate": 1e-05, "loss": 0.2596, "step": 14780 }, { "epoch": 0.050680885185007504, "grad_norm": 1.2542450428009033, "learning_rate": 1e-05, "loss": 0.2521, "step": 14790 }, { "epoch": 0.050715152179723534, "grad_norm": 1.311880111694336, "learning_rate": 1e-05, "loss": 0.2426, "step": 14800 }, { "epoch": 0.050749419174439564, "grad_norm": 1.119271159172058, "learning_rate": 1e-05, "loss": 0.2345, "step": 14810 }, { "epoch": 0.050783686169155594, "grad_norm": 1.1003872156143188, "learning_rate": 1e-05, "loss": 0.2518, "step": 14820 }, { "epoch": 0.050817953163871624, "grad_norm": 1.17613685131073, "learning_rate": 1e-05, "loss": 0.2546, "step": 14830 }, { "epoch": 0.050852220158587653, "grad_norm": 1.188706398010254, "learning_rate": 1e-05, "loss": 0.247, "step": 14840 }, { "epoch": 0.05088648715330368, "grad_norm": 1.1993244886398315, "learning_rate": 1e-05, "loss": 0.2619, "step": 14850 }, { "epoch": 0.05092075414801971, "grad_norm": 1.0679277181625366, "learning_rate": 1e-05, "loss": 0.2452, "step": 14860 }, { "epoch": 0.05095502114273574, "grad_norm": 1.1368016004562378, "learning_rate": 1e-05, "loss": 0.2453, "step": 14870 }, { "epoch": 0.050989288137451766, "grad_norm": 1.0620750188827515, "learning_rate": 1e-05, "loss": 0.2365, "step": 14880 }, { "epoch": 0.051023555132167796, "grad_norm": 1.1344637870788574, "learning_rate": 1e-05, "loss": 0.2479, "step": 14890 }, { "epoch": 0.051057822126883826, "grad_norm": 1.1324440240859985, "learning_rate": 1e-05, "loss": 0.2501, "step": 14900 }, { "epoch": 0.051092089121599855, "grad_norm": 1.139269232749939, "learning_rate": 1e-05, "loss": 0.2467, "step": 14910 }, { "epoch": 0.051126356116315885, "grad_norm": 1.1169798374176025, "learning_rate": 1e-05, "loss": 0.2616, "step": 14920 }, { "epoch": 0.051160623111031915, "grad_norm": 1.057564377784729, "learning_rate": 1e-05, "loss": 0.2554, "step": 14930 }, { "epoch": 0.051194890105747945, "grad_norm": 1.084874153137207, "learning_rate": 1e-05, "loss": 0.2399, "step": 14940 }, { "epoch": 0.051229157100463975, "grad_norm": 1.1470558643341064, "learning_rate": 1e-05, "loss": 0.252, "step": 14950 }, { "epoch": 0.051263424095180005, "grad_norm": 1.0080534219741821, "learning_rate": 1e-05, "loss": 0.256, "step": 14960 }, { "epoch": 0.051297691089896034, "grad_norm": 1.071164608001709, "learning_rate": 1e-05, "loss": 0.2371, "step": 14970 }, { "epoch": 0.051331958084612064, "grad_norm": 1.1828765869140625, "learning_rate": 1e-05, "loss": 0.2346, "step": 14980 }, { "epoch": 0.051366225079328094, "grad_norm": 1.2067548036575317, "learning_rate": 1e-05, "loss": 0.2414, "step": 14990 }, { "epoch": 0.051400492074044124, "grad_norm": 1.0730314254760742, "learning_rate": 1e-05, "loss": 0.2563, "step": 15000 }, { "epoch": 0.051400492074044124, "eval_cer": 12.568354207091199, "eval_loss": 0.2477913200855255, "eval_normalized_cer": 8.832933653077538, "eval_runtime": 229.5255, "eval_samples_per_second": 2.231, "eval_steps_per_second": 0.035, "step": 15000 }, { "epoch": 0.051434759068760154, "grad_norm": 1.0130051374435425, "learning_rate": 1e-05, "loss": 0.2571, "step": 15010 }, { "epoch": 0.051469026063476184, "grad_norm": 1.1843127012252808, "learning_rate": 1e-05, "loss": 0.2501, "step": 15020 }, { "epoch": 0.051503293058192214, "grad_norm": 1.1182798147201538, "learning_rate": 1e-05, "loss": 0.2559, "step": 15030 }, { "epoch": 0.051537560052908236, "grad_norm": 1.1553055047988892, "learning_rate": 1e-05, "loss": 0.2396, "step": 15040 }, { "epoch": 0.051571827047624266, "grad_norm": 1.030463695526123, "learning_rate": 1e-05, "loss": 0.2368, "step": 15050 }, { "epoch": 0.051606094042340296, "grad_norm": 1.2701278924942017, "learning_rate": 1e-05, "loss": 0.2495, "step": 15060 }, { "epoch": 0.051640361037056326, "grad_norm": 1.1329874992370605, "learning_rate": 1e-05, "loss": 0.2389, "step": 15070 }, { "epoch": 0.051674628031772356, "grad_norm": 1.132430911064148, "learning_rate": 1e-05, "loss": 0.2479, "step": 15080 }, { "epoch": 0.051708895026488386, "grad_norm": 1.1582975387573242, "learning_rate": 1e-05, "loss": 0.2511, "step": 15090 }, { "epoch": 0.051743162021204415, "grad_norm": 1.0918657779693604, "learning_rate": 1e-05, "loss": 0.2335, "step": 15100 }, { "epoch": 0.051777429015920445, "grad_norm": 1.1993087530136108, "learning_rate": 1e-05, "loss": 0.2468, "step": 15110 }, { "epoch": 0.051811696010636475, "grad_norm": 1.1809076070785522, "learning_rate": 1e-05, "loss": 0.2652, "step": 15120 }, { "epoch": 0.051845963005352505, "grad_norm": 1.2104005813598633, "learning_rate": 1e-05, "loss": 0.2444, "step": 15130 }, { "epoch": 0.051880230000068535, "grad_norm": 1.0615415573120117, "learning_rate": 1e-05, "loss": 0.2558, "step": 15140 }, { "epoch": 0.051914496994784565, "grad_norm": 1.1397675275802612, "learning_rate": 1e-05, "loss": 0.2543, "step": 15150 }, { "epoch": 0.051948763989500595, "grad_norm": 1.1353163719177246, "learning_rate": 1e-05, "loss": 0.2445, "step": 15160 }, { "epoch": 0.051983030984216624, "grad_norm": 1.1440542936325073, "learning_rate": 1e-05, "loss": 0.2396, "step": 15170 }, { "epoch": 0.052017297978932654, "grad_norm": 1.2371265888214111, "learning_rate": 1e-05, "loss": 0.2318, "step": 15180 }, { "epoch": 0.052051564973648684, "grad_norm": 1.03389310836792, "learning_rate": 1e-05, "loss": 0.2312, "step": 15190 }, { "epoch": 0.052085831968364714, "grad_norm": 1.3557147979736328, "learning_rate": 1e-05, "loss": 0.2387, "step": 15200 }, { "epoch": 0.05212009896308074, "grad_norm": 1.1793631315231323, "learning_rate": 1e-05, "loss": 0.2528, "step": 15210 }, { "epoch": 0.05215436595779677, "grad_norm": 1.1020557880401611, "learning_rate": 1e-05, "loss": 0.2438, "step": 15220 }, { "epoch": 0.052188632952512796, "grad_norm": 1.0566291809082031, "learning_rate": 1e-05, "loss": 0.2493, "step": 15230 }, { "epoch": 0.052222899947228826, "grad_norm": 1.0652698278427124, "learning_rate": 1e-05, "loss": 0.2385, "step": 15240 }, { "epoch": 0.052257166941944856, "grad_norm": 1.337236762046814, "learning_rate": 1e-05, "loss": 0.2532, "step": 15250 }, { "epoch": 0.052291433936660886, "grad_norm": 1.1121892929077148, "learning_rate": 1e-05, "loss": 0.2428, "step": 15260 }, { "epoch": 0.052325700931376916, "grad_norm": 1.1201363801956177, "learning_rate": 1e-05, "loss": 0.2477, "step": 15270 }, { "epoch": 0.052359967926092946, "grad_norm": 1.1055474281311035, "learning_rate": 1e-05, "loss": 0.243, "step": 15280 }, { "epoch": 0.052394234920808976, "grad_norm": 1.1349745988845825, "learning_rate": 1e-05, "loss": 0.2441, "step": 15290 }, { "epoch": 0.052428501915525005, "grad_norm": 1.0891187191009521, "learning_rate": 1e-05, "loss": 0.2544, "step": 15300 }, { "epoch": 0.052462768910241035, "grad_norm": 1.0537917613983154, "learning_rate": 1e-05, "loss": 0.2492, "step": 15310 }, { "epoch": 0.052497035904957065, "grad_norm": 1.1704713106155396, "learning_rate": 1e-05, "loss": 0.267, "step": 15320 }, { "epoch": 0.052531302899673095, "grad_norm": 0.9952285289764404, "learning_rate": 1e-05, "loss": 0.2291, "step": 15330 }, { "epoch": 0.052565569894389125, "grad_norm": 0.9887141585350037, "learning_rate": 1e-05, "loss": 0.2507, "step": 15340 }, { "epoch": 0.052599836889105155, "grad_norm": 1.20647394657135, "learning_rate": 1e-05, "loss": 0.2512, "step": 15350 }, { "epoch": 0.052634103883821184, "grad_norm": 1.0504127740859985, "learning_rate": 1e-05, "loss": 0.2559, "step": 15360 }, { "epoch": 0.05266837087853721, "grad_norm": 1.010195255279541, "learning_rate": 1e-05, "loss": 0.2292, "step": 15370 }, { "epoch": 0.05270263787325324, "grad_norm": 1.0885406732559204, "learning_rate": 1e-05, "loss": 0.2244, "step": 15380 }, { "epoch": 0.05273690486796927, "grad_norm": 0.9946883916854858, "learning_rate": 1e-05, "loss": 0.2488, "step": 15390 }, { "epoch": 0.0527711718626853, "grad_norm": 1.163482666015625, "learning_rate": 1e-05, "loss": 0.2401, "step": 15400 }, { "epoch": 0.05280543885740133, "grad_norm": 1.0451022386550903, "learning_rate": 1e-05, "loss": 0.2324, "step": 15410 }, { "epoch": 0.05283970585211736, "grad_norm": 1.0925875902175903, "learning_rate": 1e-05, "loss": 0.241, "step": 15420 }, { "epoch": 0.052873972846833386, "grad_norm": 0.9957507252693176, "learning_rate": 1e-05, "loss": 0.2377, "step": 15430 }, { "epoch": 0.052908239841549416, "grad_norm": 1.1372512578964233, "learning_rate": 1e-05, "loss": 0.2565, "step": 15440 }, { "epoch": 0.052942506836265446, "grad_norm": 1.2347620725631714, "learning_rate": 1e-05, "loss": 0.2392, "step": 15450 }, { "epoch": 0.052976773830981476, "grad_norm": 1.0665175914764404, "learning_rate": 1e-05, "loss": 0.2433, "step": 15460 }, { "epoch": 0.053011040825697506, "grad_norm": 1.2257585525512695, "learning_rate": 1e-05, "loss": 0.2371, "step": 15470 }, { "epoch": 0.053045307820413536, "grad_norm": 1.1391757726669312, "learning_rate": 1e-05, "loss": 0.2368, "step": 15480 }, { "epoch": 0.053079574815129565, "grad_norm": 1.0963542461395264, "learning_rate": 1e-05, "loss": 0.2318, "step": 15490 }, { "epoch": 0.053113841809845595, "grad_norm": 1.0512635707855225, "learning_rate": 1e-05, "loss": 0.2445, "step": 15500 }, { "epoch": 0.053113841809845595, "eval_cer": 12.947609807726229, "eval_loss": 0.2476710081100464, "eval_normalized_cer": 9.192645883293366, "eval_runtime": 227.5054, "eval_samples_per_second": 2.25, "eval_steps_per_second": 0.035, "step": 15500 }, { "epoch": 0.053148108804561625, "grad_norm": 1.1827884912490845, "learning_rate": 1e-05, "loss": 0.2591, "step": 15510 }, { "epoch": 0.053182375799277655, "grad_norm": 1.1053601503372192, "learning_rate": 1e-05, "loss": 0.2313, "step": 15520 }, { "epoch": 0.05321664279399368, "grad_norm": 0.9013387560844421, "learning_rate": 1e-05, "loss": 0.2418, "step": 15530 }, { "epoch": 0.05325090978870971, "grad_norm": 1.2677943706512451, "learning_rate": 1e-05, "loss": 0.2374, "step": 15540 }, { "epoch": 0.05328517678342574, "grad_norm": 1.2304824590682983, "learning_rate": 1e-05, "loss": 0.2598, "step": 15550 }, { "epoch": 0.05331944377814177, "grad_norm": 1.0899723768234253, "learning_rate": 1e-05, "loss": 0.2278, "step": 15560 }, { "epoch": 0.0533537107728578, "grad_norm": 1.1334359645843506, "learning_rate": 1e-05, "loss": 0.2428, "step": 15570 }, { "epoch": 0.05338797776757383, "grad_norm": 0.9581900835037231, "learning_rate": 1e-05, "loss": 0.2353, "step": 15580 }, { "epoch": 0.05342224476228986, "grad_norm": 1.1402366161346436, "learning_rate": 1e-05, "loss": 0.2224, "step": 15590 }, { "epoch": 0.05345651175700589, "grad_norm": 1.0247036218643188, "learning_rate": 1e-05, "loss": 0.2364, "step": 15600 }, { "epoch": 0.05349077875172192, "grad_norm": 1.0679134130477905, "learning_rate": 1e-05, "loss": 0.2338, "step": 15610 }, { "epoch": 0.053525045746437946, "grad_norm": 1.0253273248672485, "learning_rate": 1e-05, "loss": 0.2338, "step": 15620 }, { "epoch": 0.053559312741153976, "grad_norm": 1.066657543182373, "learning_rate": 1e-05, "loss": 0.2321, "step": 15630 }, { "epoch": 0.053593579735870006, "grad_norm": 1.1637462377548218, "learning_rate": 1e-05, "loss": 0.2452, "step": 15640 }, { "epoch": 0.053627846730586036, "grad_norm": 1.0810452699661255, "learning_rate": 1e-05, "loss": 0.2385, "step": 15650 }, { "epoch": 0.053662113725302066, "grad_norm": 1.1184097528457642, "learning_rate": 1e-05, "loss": 0.2448, "step": 15660 }, { "epoch": 0.053696380720018096, "grad_norm": 0.9914514422416687, "learning_rate": 1e-05, "loss": 0.2246, "step": 15670 }, { "epoch": 0.053730647714734125, "grad_norm": 1.1371618509292603, "learning_rate": 1e-05, "loss": 0.2434, "step": 15680 }, { "epoch": 0.05376491470945015, "grad_norm": 1.0953712463378906, "learning_rate": 1e-05, "loss": 0.2414, "step": 15690 }, { "epoch": 0.05379918170416618, "grad_norm": 1.05868399143219, "learning_rate": 1e-05, "loss": 0.2533, "step": 15700 }, { "epoch": 0.05383344869888221, "grad_norm": 1.2226771116256714, "learning_rate": 1e-05, "loss": 0.2342, "step": 15710 }, { "epoch": 0.05386771569359824, "grad_norm": 1.1739540100097656, "learning_rate": 1e-05, "loss": 0.2335, "step": 15720 }, { "epoch": 0.05390198268831427, "grad_norm": 1.1677112579345703, "learning_rate": 1e-05, "loss": 0.2373, "step": 15730 }, { "epoch": 0.0539362496830303, "grad_norm": 1.040004849433899, "learning_rate": 1e-05, "loss": 0.2385, "step": 15740 }, { "epoch": 0.05397051667774633, "grad_norm": 1.0462521314620972, "learning_rate": 1e-05, "loss": 0.2213, "step": 15750 }, { "epoch": 0.05400478367246236, "grad_norm": 1.1457821130752563, "learning_rate": 1e-05, "loss": 0.234, "step": 15760 }, { "epoch": 0.05403905066717839, "grad_norm": 1.1003904342651367, "learning_rate": 1e-05, "loss": 0.2378, "step": 15770 }, { "epoch": 0.05407331766189442, "grad_norm": 0.9855090975761414, "learning_rate": 1e-05, "loss": 0.2456, "step": 15780 }, { "epoch": 0.05410758465661045, "grad_norm": 0.9081568717956543, "learning_rate": 1e-05, "loss": 0.2283, "step": 15790 }, { "epoch": 0.05414185165132648, "grad_norm": 1.0517041683197021, "learning_rate": 1e-05, "loss": 0.241, "step": 15800 }, { "epoch": 0.054176118646042506, "grad_norm": 1.098231315612793, "learning_rate": 1e-05, "loss": 0.2364, "step": 15810 }, { "epoch": 0.054210385640758536, "grad_norm": 1.2353124618530273, "learning_rate": 1e-05, "loss": 0.223, "step": 15820 }, { "epoch": 0.054244652635474566, "grad_norm": 1.2910332679748535, "learning_rate": 1e-05, "loss": 0.2391, "step": 15830 }, { "epoch": 0.054278919630190596, "grad_norm": 0.9715086221694946, "learning_rate": 1e-05, "loss": 0.232, "step": 15840 }, { "epoch": 0.054313186624906626, "grad_norm": 0.9929107427597046, "learning_rate": 1e-05, "loss": 0.2425, "step": 15850 }, { "epoch": 0.05434745361962265, "grad_norm": 1.104021430015564, "learning_rate": 1e-05, "loss": 0.2242, "step": 15860 }, { "epoch": 0.05438172061433868, "grad_norm": 1.1469321250915527, "learning_rate": 1e-05, "loss": 0.2403, "step": 15870 }, { "epoch": 0.05441598760905471, "grad_norm": 1.158736228942871, "learning_rate": 1e-05, "loss": 0.2543, "step": 15880 }, { "epoch": 0.05445025460377074, "grad_norm": 1.034866213798523, "learning_rate": 1e-05, "loss": 0.2524, "step": 15890 }, { "epoch": 0.05448452159848677, "grad_norm": 1.0600367784500122, "learning_rate": 1e-05, "loss": 0.2434, "step": 15900 }, { "epoch": 0.0545187885932028, "grad_norm": 1.2206172943115234, "learning_rate": 1e-05, "loss": 0.2419, "step": 15910 }, { "epoch": 0.05455305558791883, "grad_norm": 1.3504046201705933, "learning_rate": 1e-05, "loss": 0.2663, "step": 15920 }, { "epoch": 0.05458732258263486, "grad_norm": 0.9881328344345093, "learning_rate": 1e-05, "loss": 0.2507, "step": 15930 }, { "epoch": 0.05462158957735089, "grad_norm": 1.104619026184082, "learning_rate": 1e-05, "loss": 0.2552, "step": 15940 }, { "epoch": 0.05465585657206692, "grad_norm": 0.9734206795692444, "learning_rate": 1e-05, "loss": 0.2552, "step": 15950 }, { "epoch": 0.05469012356678295, "grad_norm": 1.0191655158996582, "learning_rate": 1e-05, "loss": 0.2564, "step": 15960 }, { "epoch": 0.05472439056149898, "grad_norm": 1.0736887454986572, "learning_rate": 1e-05, "loss": 0.235, "step": 15970 }, { "epoch": 0.05475865755621501, "grad_norm": 0.9910275936126709, "learning_rate": 1e-05, "loss": 0.2476, "step": 15980 }, { "epoch": 0.05479292455093104, "grad_norm": 1.2393155097961426, "learning_rate": 1e-05, "loss": 0.2613, "step": 15990 }, { "epoch": 0.05482719154564707, "grad_norm": 1.1756526231765747, "learning_rate": 1e-05, "loss": 0.2568, "step": 16000 }, { "epoch": 0.05482719154564707, "eval_cer": 13.300405715293703, "eval_loss": 0.2448866218328476, "eval_normalized_cer": 9.672262190247801, "eval_runtime": 228.492, "eval_samples_per_second": 2.241, "eval_steps_per_second": 0.035, "step": 16000 }, { "epoch": 0.054861458540363096, "grad_norm": 1.2805075645446777, "learning_rate": 1e-05, "loss": 0.2613, "step": 16010 }, { "epoch": 0.05489572553507912, "grad_norm": 1.01006019115448, "learning_rate": 1e-05, "loss": 0.2767, "step": 16020 }, { "epoch": 0.05492999252979515, "grad_norm": 1.2456789016723633, "learning_rate": 1e-05, "loss": 0.2519, "step": 16030 }, { "epoch": 0.05496425952451118, "grad_norm": 1.2959610223770142, "learning_rate": 1e-05, "loss": 0.2583, "step": 16040 }, { "epoch": 0.05499852651922721, "grad_norm": 1.1990777254104614, "learning_rate": 1e-05, "loss": 0.2465, "step": 16050 }, { "epoch": 0.05503279351394324, "grad_norm": 1.0876649618148804, "learning_rate": 1e-05, "loss": 0.2602, "step": 16060 }, { "epoch": 0.05506706050865927, "grad_norm": 0.9991684556007385, "learning_rate": 1e-05, "loss": 0.2363, "step": 16070 }, { "epoch": 0.0551013275033753, "grad_norm": 1.1685731410980225, "learning_rate": 1e-05, "loss": 0.2489, "step": 16080 }, { "epoch": 0.05513559449809133, "grad_norm": 1.086849331855774, "learning_rate": 1e-05, "loss": 0.2525, "step": 16090 }, { "epoch": 0.05516986149280736, "grad_norm": 1.0985273122787476, "learning_rate": 1e-05, "loss": 0.2482, "step": 16100 }, { "epoch": 0.05520412848752339, "grad_norm": 1.112008810043335, "learning_rate": 1e-05, "loss": 0.244, "step": 16110 }, { "epoch": 0.05523839548223942, "grad_norm": 1.068474531173706, "learning_rate": 1e-05, "loss": 0.2349, "step": 16120 }, { "epoch": 0.05527266247695545, "grad_norm": 1.001871943473816, "learning_rate": 1e-05, "loss": 0.242, "step": 16130 }, { "epoch": 0.05530692947167148, "grad_norm": 1.1889883279800415, "learning_rate": 1e-05, "loss": 0.2293, "step": 16140 }, { "epoch": 0.05534119646638751, "grad_norm": 1.1890829801559448, "learning_rate": 1e-05, "loss": 0.2399, "step": 16150 }, { "epoch": 0.05537546346110354, "grad_norm": 1.0574150085449219, "learning_rate": 1e-05, "loss": 0.2379, "step": 16160 }, { "epoch": 0.05540973045581957, "grad_norm": 1.1458338499069214, "learning_rate": 1e-05, "loss": 0.2501, "step": 16170 }, { "epoch": 0.05544399745053559, "grad_norm": 1.0738166570663452, "learning_rate": 1e-05, "loss": 0.2292, "step": 16180 }, { "epoch": 0.05547826444525162, "grad_norm": 1.1803652048110962, "learning_rate": 1e-05, "loss": 0.2286, "step": 16190 }, { "epoch": 0.05551253143996765, "grad_norm": 1.02376127243042, "learning_rate": 1e-05, "loss": 0.2495, "step": 16200 }, { "epoch": 0.05554679843468368, "grad_norm": 0.9988088011741638, "learning_rate": 1e-05, "loss": 0.2191, "step": 16210 }, { "epoch": 0.05558106542939971, "grad_norm": 1.2596487998962402, "learning_rate": 1e-05, "loss": 0.2373, "step": 16220 }, { "epoch": 0.05561533242411574, "grad_norm": 1.1729799509048462, "learning_rate": 1e-05, "loss": 0.2355, "step": 16230 }, { "epoch": 0.05564959941883177, "grad_norm": 1.0790057182312012, "learning_rate": 1e-05, "loss": 0.2211, "step": 16240 }, { "epoch": 0.0556838664135478, "grad_norm": 1.044366717338562, "learning_rate": 1e-05, "loss": 0.226, "step": 16250 }, { "epoch": 0.05571813340826383, "grad_norm": 1.1206332445144653, "learning_rate": 1e-05, "loss": 0.2121, "step": 16260 }, { "epoch": 0.05575240040297986, "grad_norm": 1.0622124671936035, "learning_rate": 1e-05, "loss": 0.2179, "step": 16270 }, { "epoch": 0.05578666739769589, "grad_norm": 1.1448779106140137, "learning_rate": 1e-05, "loss": 0.2266, "step": 16280 }, { "epoch": 0.05582093439241192, "grad_norm": 0.9783304929733276, "learning_rate": 1e-05, "loss": 0.2254, "step": 16290 }, { "epoch": 0.05585520138712795, "grad_norm": 1.2645550966262817, "learning_rate": 1e-05, "loss": 0.2235, "step": 16300 }, { "epoch": 0.05588946838184398, "grad_norm": 1.1020660400390625, "learning_rate": 1e-05, "loss": 0.2363, "step": 16310 }, { "epoch": 0.05592373537656001, "grad_norm": 1.1464896202087402, "learning_rate": 1e-05, "loss": 0.2407, "step": 16320 }, { "epoch": 0.05595800237127604, "grad_norm": 1.2940075397491455, "learning_rate": 1e-05, "loss": 0.2315, "step": 16330 }, { "epoch": 0.05599226936599206, "grad_norm": 1.0869808197021484, "learning_rate": 1e-05, "loss": 0.2176, "step": 16340 }, { "epoch": 0.05602653636070809, "grad_norm": 1.025872826576233, "learning_rate": 1e-05, "loss": 0.2138, "step": 16350 }, { "epoch": 0.05606080335542412, "grad_norm": 1.0538456439971924, "learning_rate": 1e-05, "loss": 0.2204, "step": 16360 }, { "epoch": 0.05609507035014015, "grad_norm": 1.0765165090560913, "learning_rate": 1e-05, "loss": 0.2327, "step": 16370 }, { "epoch": 0.05612933734485618, "grad_norm": 1.1909908056259155, "learning_rate": 1e-05, "loss": 0.239, "step": 16380 }, { "epoch": 0.05616360433957221, "grad_norm": 1.249849557876587, "learning_rate": 1e-05, "loss": 0.2318, "step": 16390 }, { "epoch": 0.05619787133428824, "grad_norm": 1.0325443744659424, "learning_rate": 1e-05, "loss": 0.2372, "step": 16400 }, { "epoch": 0.05623213832900427, "grad_norm": 2.3129172325134277, "learning_rate": 1e-05, "loss": 0.2334, "step": 16410 }, { "epoch": 0.0562664053237203, "grad_norm": 1.1196421384811401, "learning_rate": 1e-05, "loss": 0.2389, "step": 16420 }, { "epoch": 0.05630067231843633, "grad_norm": 1.170823335647583, "learning_rate": 1e-05, "loss": 0.2435, "step": 16430 }, { "epoch": 0.05633493931315236, "grad_norm": 1.2110240459442139, "learning_rate": 1e-05, "loss": 0.2499, "step": 16440 }, { "epoch": 0.05636920630786839, "grad_norm": 1.2487872838974, "learning_rate": 1e-05, "loss": 0.2556, "step": 16450 }, { "epoch": 0.05640347330258442, "grad_norm": 1.0886249542236328, "learning_rate": 1e-05, "loss": 0.2297, "step": 16460 }, { "epoch": 0.05643774029730045, "grad_norm": 1.274640679359436, "learning_rate": 1e-05, "loss": 0.236, "step": 16470 }, { "epoch": 0.05647200729201648, "grad_norm": 1.1046870946884155, "learning_rate": 1e-05, "loss": 0.2497, "step": 16480 }, { "epoch": 0.05650627428673251, "grad_norm": 1.0462591648101807, "learning_rate": 1e-05, "loss": 0.2298, "step": 16490 }, { "epoch": 0.05654054128144854, "grad_norm": 1.1058859825134277, "learning_rate": 1e-05, "loss": 0.2367, "step": 16500 }, { "epoch": 0.05654054128144854, "eval_cer": 13.247486329158582, "eval_loss": 0.24819624423980713, "eval_normalized_cer": 9.602318145483613, "eval_runtime": 228.0591, "eval_samples_per_second": 2.245, "eval_steps_per_second": 0.035, "step": 16500 }, { "epoch": 0.05657480827616456, "grad_norm": 1.1613426208496094, "learning_rate": 1e-05, "loss": 0.2318, "step": 16510 }, { "epoch": 0.05660907527088059, "grad_norm": 1.1319721937179565, "learning_rate": 1e-05, "loss": 0.2461, "step": 16520 }, { "epoch": 0.05664334226559662, "grad_norm": 1.2690738439559937, "learning_rate": 1e-05, "loss": 0.2392, "step": 16530 }, { "epoch": 0.05667760926031265, "grad_norm": 1.119575023651123, "learning_rate": 1e-05, "loss": 0.2467, "step": 16540 }, { "epoch": 0.05671187625502868, "grad_norm": 1.119841456413269, "learning_rate": 1e-05, "loss": 0.223, "step": 16550 }, { "epoch": 0.05674614324974471, "grad_norm": 1.0248748064041138, "learning_rate": 1e-05, "loss": 0.2392, "step": 16560 }, { "epoch": 0.05678041024446074, "grad_norm": 1.0252872705459595, "learning_rate": 1e-05, "loss": 0.2426, "step": 16570 }, { "epoch": 0.05681467723917677, "grad_norm": 1.1694291830062866, "learning_rate": 1e-05, "loss": 0.2078, "step": 16580 }, { "epoch": 0.0568489442338928, "grad_norm": 1.152016520500183, "learning_rate": 1e-05, "loss": 0.2423, "step": 16590 }, { "epoch": 0.05688321122860883, "grad_norm": 0.9418520927429199, "learning_rate": 1e-05, "loss": 0.2214, "step": 16600 }, { "epoch": 0.05691747822332486, "grad_norm": 1.1249022483825684, "learning_rate": 1e-05, "loss": 0.2222, "step": 16610 }, { "epoch": 0.05695174521804089, "grad_norm": 1.0502556562423706, "learning_rate": 1e-05, "loss": 0.2366, "step": 16620 }, { "epoch": 0.05698601221275692, "grad_norm": 1.0879663228988647, "learning_rate": 1e-05, "loss": 0.2253, "step": 16630 }, { "epoch": 0.05702027920747295, "grad_norm": 1.0701017379760742, "learning_rate": 1e-05, "loss": 0.2369, "step": 16640 }, { "epoch": 0.05705454620218898, "grad_norm": 1.0117312669754028, "learning_rate": 1e-05, "loss": 0.2419, "step": 16650 }, { "epoch": 0.05708881319690501, "grad_norm": 1.1599793434143066, "learning_rate": 1e-05, "loss": 0.2444, "step": 16660 }, { "epoch": 0.05712308019162103, "grad_norm": 1.1197978258132935, "learning_rate": 1e-05, "loss": 0.2388, "step": 16670 }, { "epoch": 0.05715734718633706, "grad_norm": 1.1201450824737549, "learning_rate": 1e-05, "loss": 0.2401, "step": 16680 }, { "epoch": 0.05719161418105309, "grad_norm": 1.0935217142105103, "learning_rate": 1e-05, "loss": 0.2191, "step": 16690 }, { "epoch": 0.05722588117576912, "grad_norm": 1.237023949623108, "learning_rate": 1e-05, "loss": 0.2509, "step": 16700 }, { "epoch": 0.05726014817048515, "grad_norm": 1.0313085317611694, "learning_rate": 1e-05, "loss": 0.2271, "step": 16710 }, { "epoch": 0.05729441516520118, "grad_norm": 1.2026563882827759, "learning_rate": 1e-05, "loss": 0.2344, "step": 16720 }, { "epoch": 0.05732868215991721, "grad_norm": 1.2139136791229248, "learning_rate": 1e-05, "loss": 0.2425, "step": 16730 }, { "epoch": 0.05736294915463324, "grad_norm": 1.06145179271698, "learning_rate": 1e-05, "loss": 0.2547, "step": 16740 }, { "epoch": 0.05739721614934927, "grad_norm": 1.1620399951934814, "learning_rate": 1e-05, "loss": 0.228, "step": 16750 }, { "epoch": 0.0574314831440653, "grad_norm": 1.0586286783218384, "learning_rate": 1e-05, "loss": 0.2202, "step": 16760 }, { "epoch": 0.05746575013878133, "grad_norm": 1.0937540531158447, "learning_rate": 1e-05, "loss": 0.2415, "step": 16770 }, { "epoch": 0.05750001713349736, "grad_norm": 1.0289047956466675, "learning_rate": 1e-05, "loss": 0.2327, "step": 16780 }, { "epoch": 0.05753428412821339, "grad_norm": 1.0515446662902832, "learning_rate": 1e-05, "loss": 0.2306, "step": 16790 }, { "epoch": 0.05756855112292942, "grad_norm": 0.9734529852867126, "learning_rate": 1e-05, "loss": 0.2196, "step": 16800 }, { "epoch": 0.05760281811764545, "grad_norm": 1.0374795198440552, "learning_rate": 1e-05, "loss": 0.2263, "step": 16810 }, { "epoch": 0.05763708511236148, "grad_norm": 1.0560572147369385, "learning_rate": 1e-05, "loss": 0.2466, "step": 16820 }, { "epoch": 0.0576713521070775, "grad_norm": 0.9350127577781677, "learning_rate": 1e-05, "loss": 0.229, "step": 16830 }, { "epoch": 0.05770561910179353, "grad_norm": 1.180124282836914, "learning_rate": 1e-05, "loss": 0.2395, "step": 16840 }, { "epoch": 0.05773988609650956, "grad_norm": 1.17545747756958, "learning_rate": 1e-05, "loss": 0.2427, "step": 16850 }, { "epoch": 0.05777415309122559, "grad_norm": 1.1822388172149658, "learning_rate": 1e-05, "loss": 0.2525, "step": 16860 }, { "epoch": 0.05780842008594162, "grad_norm": 1.0005474090576172, "learning_rate": 1e-05, "loss": 0.2215, "step": 16870 }, { "epoch": 0.05784268708065765, "grad_norm": 1.2070783376693726, "learning_rate": 1e-05, "loss": 0.2476, "step": 16880 }, { "epoch": 0.05787695407537368, "grad_norm": 1.3960411548614502, "learning_rate": 1e-05, "loss": 0.2535, "step": 16890 }, { "epoch": 0.05791122107008971, "grad_norm": 1.1246318817138672, "learning_rate": 1e-05, "loss": 0.2532, "step": 16900 }, { "epoch": 0.05794548806480574, "grad_norm": 1.042833685874939, "learning_rate": 1e-05, "loss": 0.2453, "step": 16910 }, { "epoch": 0.05797975505952177, "grad_norm": 1.1697344779968262, "learning_rate": 1e-05, "loss": 0.2396, "step": 16920 }, { "epoch": 0.0580140220542378, "grad_norm": 1.1674904823303223, "learning_rate": 1e-05, "loss": 0.2413, "step": 16930 }, { "epoch": 0.05804828904895383, "grad_norm": 1.2486639022827148, "learning_rate": 1e-05, "loss": 0.2397, "step": 16940 }, { "epoch": 0.05808255604366986, "grad_norm": 1.0194487571716309, "learning_rate": 1e-05, "loss": 0.2323, "step": 16950 }, { "epoch": 0.05811682303838589, "grad_norm": 1.1312390565872192, "learning_rate": 1e-05, "loss": 0.238, "step": 16960 }, { "epoch": 0.05815109003310192, "grad_norm": 1.0825895071029663, "learning_rate": 1e-05, "loss": 0.2523, "step": 16970 }, { "epoch": 0.05818535702781795, "grad_norm": 1.1746137142181396, "learning_rate": 1e-05, "loss": 0.2389, "step": 16980 }, { "epoch": 0.05821962402253397, "grad_norm": 1.1029731035232544, "learning_rate": 1e-05, "loss": 0.2347, "step": 16990 }, { "epoch": 0.05825389101725, "grad_norm": 1.1937546730041504, "learning_rate": 1e-05, "loss": 0.2492, "step": 17000 }, { "epoch": 0.05825389101725, "eval_cer": 12.621273593226318, "eval_loss": 0.24555271863937378, "eval_normalized_cer": 9.272581934452438, "eval_runtime": 227.9675, "eval_samples_per_second": 2.246, "eval_steps_per_second": 0.035, "step": 17000 }, { "epoch": 0.05828815801196603, "grad_norm": 1.1056500673294067, "learning_rate": 1e-05, "loss": 0.2267, "step": 17010 }, { "epoch": 0.05832242500668206, "grad_norm": 1.1719837188720703, "learning_rate": 1e-05, "loss": 0.2567, "step": 17020 }, { "epoch": 0.05835669200139809, "grad_norm": 1.0683894157409668, "learning_rate": 1e-05, "loss": 0.2185, "step": 17030 }, { "epoch": 0.05839095899611412, "grad_norm": 1.0233598947525024, "learning_rate": 1e-05, "loss": 0.2233, "step": 17040 }, { "epoch": 0.05842522599083015, "grad_norm": 0.9998723864555359, "learning_rate": 1e-05, "loss": 0.236, "step": 17050 }, { "epoch": 0.05845949298554618, "grad_norm": 1.1496772766113281, "learning_rate": 1e-05, "loss": 0.2409, "step": 17060 }, { "epoch": 0.05849375998026221, "grad_norm": 1.112373948097229, "learning_rate": 1e-05, "loss": 0.2435, "step": 17070 }, { "epoch": 0.05852802697497824, "grad_norm": 0.9641957879066467, "learning_rate": 1e-05, "loss": 0.2398, "step": 17080 }, { "epoch": 0.05856229396969427, "grad_norm": 1.1222591400146484, "learning_rate": 1e-05, "loss": 0.2295, "step": 17090 }, { "epoch": 0.0585965609644103, "grad_norm": 1.1070595979690552, "learning_rate": 1e-05, "loss": 0.2393, "step": 17100 }, { "epoch": 0.05863082795912633, "grad_norm": 1.2548716068267822, "learning_rate": 1e-05, "loss": 0.2446, "step": 17110 }, { "epoch": 0.05866509495384236, "grad_norm": 1.2774968147277832, "learning_rate": 1e-05, "loss": 0.2496, "step": 17120 }, { "epoch": 0.05869936194855839, "grad_norm": 1.0819348096847534, "learning_rate": 1e-05, "loss": 0.2413, "step": 17130 }, { "epoch": 0.05873362894327442, "grad_norm": 1.1262927055358887, "learning_rate": 1e-05, "loss": 0.2231, "step": 17140 }, { "epoch": 0.05876789593799045, "grad_norm": 1.0115349292755127, "learning_rate": 1e-05, "loss": 0.2367, "step": 17150 }, { "epoch": 0.05880216293270647, "grad_norm": 1.1211316585540771, "learning_rate": 1e-05, "loss": 0.2455, "step": 17160 }, { "epoch": 0.0588364299274225, "grad_norm": 1.213244080543518, "learning_rate": 1e-05, "loss": 0.2067, "step": 17170 }, { "epoch": 0.05887069692213853, "grad_norm": 1.1806918382644653, "learning_rate": 1e-05, "loss": 0.2359, "step": 17180 }, { "epoch": 0.05890496391685456, "grad_norm": 1.0859711170196533, "learning_rate": 1e-05, "loss": 0.2327, "step": 17190 }, { "epoch": 0.05893923091157059, "grad_norm": 1.084893822669983, "learning_rate": 1e-05, "loss": 0.2515, "step": 17200 }, { "epoch": 0.05897349790628662, "grad_norm": 1.1239533424377441, "learning_rate": 1e-05, "loss": 0.219, "step": 17210 }, { "epoch": 0.05900776490100265, "grad_norm": 0.9819833040237427, "learning_rate": 1e-05, "loss": 0.2283, "step": 17220 }, { "epoch": 0.05904203189571868, "grad_norm": 1.075325608253479, "learning_rate": 1e-05, "loss": 0.219, "step": 17230 }, { "epoch": 0.05907629889043471, "grad_norm": 1.141569972038269, "learning_rate": 1e-05, "loss": 0.2436, "step": 17240 }, { "epoch": 0.05911056588515074, "grad_norm": 1.0299127101898193, "learning_rate": 1e-05, "loss": 0.2275, "step": 17250 }, { "epoch": 0.05914483287986677, "grad_norm": 1.084275245666504, "learning_rate": 1e-05, "loss": 0.2119, "step": 17260 }, { "epoch": 0.0591790998745828, "grad_norm": 1.1073039770126343, "learning_rate": 1e-05, "loss": 0.2272, "step": 17270 }, { "epoch": 0.05921336686929883, "grad_norm": 1.2507880926132202, "learning_rate": 1e-05, "loss": 0.2328, "step": 17280 }, { "epoch": 0.05924763386401486, "grad_norm": 0.9464664459228516, "learning_rate": 1e-05, "loss": 0.2265, "step": 17290 }, { "epoch": 0.05928190085873089, "grad_norm": 1.1672245264053345, "learning_rate": 1e-05, "loss": 0.2426, "step": 17300 }, { "epoch": 0.05931616785344692, "grad_norm": 1.1788960695266724, "learning_rate": 1e-05, "loss": 0.2235, "step": 17310 }, { "epoch": 0.05935043484816294, "grad_norm": 1.2637144327163696, "learning_rate": 1e-05, "loss": 0.2304, "step": 17320 }, { "epoch": 0.05938470184287897, "grad_norm": 1.0590788125991821, "learning_rate": 1e-05, "loss": 0.2486, "step": 17330 }, { "epoch": 0.059418968837595, "grad_norm": 0.9958922266960144, "learning_rate": 1e-05, "loss": 0.2322, "step": 17340 }, { "epoch": 0.05945323583231103, "grad_norm": 1.1164788007736206, "learning_rate": 1e-05, "loss": 0.2338, "step": 17350 }, { "epoch": 0.05948750282702706, "grad_norm": 1.0397776365280151, "learning_rate": 1e-05, "loss": 0.2293, "step": 17360 }, { "epoch": 0.05952176982174309, "grad_norm": 1.0913232564926147, "learning_rate": 1e-05, "loss": 0.2352, "step": 17370 }, { "epoch": 0.05955603681645912, "grad_norm": 1.1954469680786133, "learning_rate": 1e-05, "loss": 0.2473, "step": 17380 }, { "epoch": 0.05959030381117515, "grad_norm": 0.9494290947914124, "learning_rate": 1e-05, "loss": 0.2181, "step": 17390 }, { "epoch": 0.05962457080589118, "grad_norm": 0.9757588505744934, "learning_rate": 1e-05, "loss": 0.214, "step": 17400 }, { "epoch": 0.05965883780060721, "grad_norm": 1.0311897993087769, "learning_rate": 1e-05, "loss": 0.2359, "step": 17410 }, { "epoch": 0.05969310479532324, "grad_norm": 1.087958574295044, "learning_rate": 1e-05, "loss": 0.2268, "step": 17420 }, { "epoch": 0.05972737179003927, "grad_norm": 1.053572654724121, "learning_rate": 1e-05, "loss": 0.2295, "step": 17430 }, { "epoch": 0.0597616387847553, "grad_norm": 1.2911629676818848, "learning_rate": 1e-05, "loss": 0.2391, "step": 17440 }, { "epoch": 0.05979590577947133, "grad_norm": 1.1952745914459229, "learning_rate": 1e-05, "loss": 0.2498, "step": 17450 }, { "epoch": 0.05983017277418736, "grad_norm": 1.3044365644454956, "learning_rate": 1e-05, "loss": 0.2454, "step": 17460 }, { "epoch": 0.05986443976890339, "grad_norm": 0.9195178151130676, "learning_rate": 1e-05, "loss": 0.2312, "step": 17470 }, { "epoch": 0.059898706763619414, "grad_norm": 1.1249533891677856, "learning_rate": 1e-05, "loss": 0.2393, "step": 17480 }, { "epoch": 0.059932973758335444, "grad_norm": 1.0563664436340332, "learning_rate": 1e-05, "loss": 0.2279, "step": 17490 }, { "epoch": 0.05996724075305147, "grad_norm": 1.069501519203186, "learning_rate": 1e-05, "loss": 0.2615, "step": 17500 }, { "epoch": 0.05996724075305147, "eval_cer": 12.744752160874933, "eval_loss": 0.24637901782989502, "eval_normalized_cer": 9.102717825739408, "eval_runtime": 227.2598, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.035, "step": 17500 }, { "epoch": 0.0600015077477675, "grad_norm": 1.0828664302825928, "learning_rate": 1e-05, "loss": 0.2585, "step": 17510 }, { "epoch": 0.06003577474248353, "grad_norm": 1.144832968711853, "learning_rate": 1e-05, "loss": 0.2707, "step": 17520 }, { "epoch": 0.06007004173719956, "grad_norm": 1.1772241592407227, "learning_rate": 1e-05, "loss": 0.2484, "step": 17530 }, { "epoch": 0.06010430873191559, "grad_norm": 1.0854761600494385, "learning_rate": 1e-05, "loss": 0.2543, "step": 17540 }, { "epoch": 0.06013857572663162, "grad_norm": 0.9568967223167419, "learning_rate": 1e-05, "loss": 0.2328, "step": 17550 }, { "epoch": 0.06017284272134765, "grad_norm": 1.1131020784378052, "learning_rate": 1e-05, "loss": 0.2502, "step": 17560 }, { "epoch": 0.06020710971606368, "grad_norm": 1.1434942483901978, "learning_rate": 1e-05, "loss": 0.2459, "step": 17570 }, { "epoch": 0.06024137671077971, "grad_norm": 1.0522421598434448, "learning_rate": 1e-05, "loss": 0.2276, "step": 17580 }, { "epoch": 0.06027564370549574, "grad_norm": 0.9989955425262451, "learning_rate": 1e-05, "loss": 0.237, "step": 17590 }, { "epoch": 0.06030991070021177, "grad_norm": 1.0393595695495605, "learning_rate": 1e-05, "loss": 0.2534, "step": 17600 }, { "epoch": 0.0603441776949278, "grad_norm": 1.1289571523666382, "learning_rate": 1e-05, "loss": 0.2408, "step": 17610 }, { "epoch": 0.06037844468964383, "grad_norm": 1.4361021518707275, "learning_rate": 1e-05, "loss": 0.2517, "step": 17620 }, { "epoch": 0.06041271168435986, "grad_norm": 1.1056772470474243, "learning_rate": 1e-05, "loss": 0.2496, "step": 17630 }, { "epoch": 0.060446978679075884, "grad_norm": 1.1014364957809448, "learning_rate": 1e-05, "loss": 0.2345, "step": 17640 }, { "epoch": 0.060481245673791914, "grad_norm": 1.040041208267212, "learning_rate": 1e-05, "loss": 0.2478, "step": 17650 }, { "epoch": 0.060515512668507944, "grad_norm": 1.0661914348602295, "learning_rate": 1e-05, "loss": 0.243, "step": 17660 }, { "epoch": 0.060549779663223974, "grad_norm": 1.0410475730895996, "learning_rate": 1e-05, "loss": 0.2423, "step": 17670 }, { "epoch": 0.060584046657940004, "grad_norm": 1.140838861465454, "learning_rate": 1e-05, "loss": 0.2258, "step": 17680 }, { "epoch": 0.06061831365265603, "grad_norm": 1.060164451599121, "learning_rate": 1e-05, "loss": 0.2318, "step": 17690 }, { "epoch": 0.06065258064737206, "grad_norm": 0.982888400554657, "learning_rate": 1e-05, "loss": 0.227, "step": 17700 }, { "epoch": 0.06068684764208809, "grad_norm": 1.038228988647461, "learning_rate": 1e-05, "loss": 0.2193, "step": 17710 }, { "epoch": 0.06072111463680412, "grad_norm": 1.1862796545028687, "learning_rate": 1e-05, "loss": 0.229, "step": 17720 }, { "epoch": 0.06075538163152015, "grad_norm": 1.1852322816848755, "learning_rate": 1e-05, "loss": 0.2319, "step": 17730 }, { "epoch": 0.06078964862623618, "grad_norm": 1.2296260595321655, "learning_rate": 1e-05, "loss": 0.228, "step": 17740 }, { "epoch": 0.06082391562095221, "grad_norm": 0.94486004114151, "learning_rate": 1e-05, "loss": 0.2152, "step": 17750 }, { "epoch": 0.06085818261566824, "grad_norm": 1.045202612876892, "learning_rate": 1e-05, "loss": 0.2366, "step": 17760 }, { "epoch": 0.06089244961038427, "grad_norm": 1.0080078840255737, "learning_rate": 1e-05, "loss": 0.2116, "step": 17770 }, { "epoch": 0.0609267166051003, "grad_norm": 1.0263158082962036, "learning_rate": 1e-05, "loss": 0.2189, "step": 17780 }, { "epoch": 0.06096098359981633, "grad_norm": 1.137341022491455, "learning_rate": 1e-05, "loss": 0.2388, "step": 17790 }, { "epoch": 0.06099525059453236, "grad_norm": 1.1267235279083252, "learning_rate": 1e-05, "loss": 0.2309, "step": 17800 }, { "epoch": 0.061029517589248385, "grad_norm": 1.1590142250061035, "learning_rate": 1e-05, "loss": 0.215, "step": 17810 }, { "epoch": 0.061063784583964414, "grad_norm": 1.0661910772323608, "learning_rate": 1e-05, "loss": 0.2269, "step": 17820 }, { "epoch": 0.061098051578680444, "grad_norm": 1.178369402885437, "learning_rate": 1e-05, "loss": 0.238, "step": 17830 }, { "epoch": 0.061132318573396474, "grad_norm": 1.1601570844650269, "learning_rate": 1e-05, "loss": 0.2183, "step": 17840 }, { "epoch": 0.061166585568112504, "grad_norm": 1.098443627357483, "learning_rate": 1e-05, "loss": 0.2478, "step": 17850 }, { "epoch": 0.061200852562828534, "grad_norm": 1.3805477619171143, "learning_rate": 1e-05, "loss": 0.2497, "step": 17860 }, { "epoch": 0.061235119557544564, "grad_norm": 1.1133577823638916, "learning_rate": 1e-05, "loss": 0.2335, "step": 17870 }, { "epoch": 0.061269386552260593, "grad_norm": 1.1253570318222046, "learning_rate": 1e-05, "loss": 0.2199, "step": 17880 }, { "epoch": 0.06130365354697662, "grad_norm": 1.1934068202972412, "learning_rate": 1e-05, "loss": 0.2304, "step": 17890 }, { "epoch": 0.06133792054169265, "grad_norm": 1.1207107305526733, "learning_rate": 1e-05, "loss": 0.2419, "step": 17900 }, { "epoch": 0.06137218753640868, "grad_norm": 0.9266690015792847, "learning_rate": 1e-05, "loss": 0.234, "step": 17910 }, { "epoch": 0.06140645453112471, "grad_norm": 1.296251893043518, "learning_rate": 1e-05, "loss": 0.2322, "step": 17920 }, { "epoch": 0.06144072152584074, "grad_norm": 1.2122479677200317, "learning_rate": 1e-05, "loss": 0.2467, "step": 17930 }, { "epoch": 0.06147498852055677, "grad_norm": 1.2585302591323853, "learning_rate": 1e-05, "loss": 0.2653, "step": 17940 }, { "epoch": 0.0615092555152728, "grad_norm": 1.0809286832809448, "learning_rate": 1e-05, "loss": 0.231, "step": 17950 }, { "epoch": 0.06154352250998883, "grad_norm": 1.1828172206878662, "learning_rate": 1e-05, "loss": 0.245, "step": 17960 }, { "epoch": 0.061577789504704855, "grad_norm": 1.2036045789718628, "learning_rate": 1e-05, "loss": 0.2543, "step": 17970 }, { "epoch": 0.061612056499420885, "grad_norm": 1.0810980796813965, "learning_rate": 1e-05, "loss": 0.2474, "step": 17980 }, { "epoch": 0.061646323494136915, "grad_norm": 1.0696234703063965, "learning_rate": 1e-05, "loss": 0.2277, "step": 17990 }, { "epoch": 0.061680590488852945, "grad_norm": 1.0389044284820557, "learning_rate": 1e-05, "loss": 0.2471, "step": 18000 }, { "epoch": 0.061680590488852945, "eval_cer": 13.08872817075322, "eval_loss": 0.24683812260627747, "eval_normalized_cer": 9.30255795363709, "eval_runtime": 227.6579, "eval_samples_per_second": 2.249, "eval_steps_per_second": 0.035, "step": 18000 }, { "epoch": 0.061714857483568974, "grad_norm": 1.071568250656128, "learning_rate": 1e-05, "loss": 0.2496, "step": 18010 }, { "epoch": 0.061749124478285004, "grad_norm": 1.141601800918579, "learning_rate": 1e-05, "loss": 0.2607, "step": 18020 }, { "epoch": 0.061783391473001034, "grad_norm": 1.1443320512771606, "learning_rate": 1e-05, "loss": 0.2493, "step": 18030 }, { "epoch": 0.061817658467717064, "grad_norm": 1.1623589992523193, "learning_rate": 1e-05, "loss": 0.2597, "step": 18040 }, { "epoch": 0.061851925462433094, "grad_norm": 1.1392161846160889, "learning_rate": 1e-05, "loss": 0.2445, "step": 18050 }, { "epoch": 0.061886192457149124, "grad_norm": 1.1709184646606445, "learning_rate": 1e-05, "loss": 0.2671, "step": 18060 }, { "epoch": 0.061920459451865154, "grad_norm": 1.2600747346878052, "learning_rate": 1e-05, "loss": 0.2624, "step": 18070 }, { "epoch": 0.06195472644658118, "grad_norm": 0.9713681936264038, "learning_rate": 1e-05, "loss": 0.2399, "step": 18080 }, { "epoch": 0.06198899344129721, "grad_norm": 1.1438798904418945, "learning_rate": 1e-05, "loss": 0.2584, "step": 18090 }, { "epoch": 0.06202326043601324, "grad_norm": 0.9961438775062561, "learning_rate": 1e-05, "loss": 0.251, "step": 18100 }, { "epoch": 0.06205752743072927, "grad_norm": 1.0935770273208618, "learning_rate": 1e-05, "loss": 0.2478, "step": 18110 }, { "epoch": 0.0620917944254453, "grad_norm": 1.1075562238693237, "learning_rate": 1e-05, "loss": 0.2538, "step": 18120 }, { "epoch": 0.062126061420161326, "grad_norm": 1.1211317777633667, "learning_rate": 1e-05, "loss": 0.2524, "step": 18130 }, { "epoch": 0.062160328414877355, "grad_norm": 1.1476017236709595, "learning_rate": 1e-05, "loss": 0.242, "step": 18140 }, { "epoch": 0.062194595409593385, "grad_norm": 1.217791199684143, "learning_rate": 1e-05, "loss": 0.258, "step": 18150 }, { "epoch": 0.062228862404309415, "grad_norm": 1.0843958854675293, "learning_rate": 1e-05, "loss": 0.2562, "step": 18160 }, { "epoch": 0.062263129399025445, "grad_norm": 0.9931436777114868, "learning_rate": 1e-05, "loss": 0.2478, "step": 18170 }, { "epoch": 0.062297396393741475, "grad_norm": 1.0133415460586548, "learning_rate": 1e-05, "loss": 0.2526, "step": 18180 }, { "epoch": 0.062331663388457505, "grad_norm": 1.2083938121795654, "learning_rate": 1e-05, "loss": 0.2604, "step": 18190 }, { "epoch": 0.062365930383173535, "grad_norm": 1.0791544914245605, "learning_rate": 1e-05, "loss": 0.2543, "step": 18200 }, { "epoch": 0.062400197377889564, "grad_norm": 1.117967128753662, "learning_rate": 1e-05, "loss": 0.2551, "step": 18210 }, { "epoch": 0.062434464372605594, "grad_norm": 1.1042882204055786, "learning_rate": 1e-05, "loss": 0.2492, "step": 18220 }, { "epoch": 0.062468731367321624, "grad_norm": 1.3039363622665405, "learning_rate": 1e-05, "loss": 0.2631, "step": 18230 }, { "epoch": 0.06250299836203765, "grad_norm": 1.123572587966919, "learning_rate": 1e-05, "loss": 0.2432, "step": 18240 }, { "epoch": 0.06253726535675368, "grad_norm": 1.1475626230239868, "learning_rate": 1e-05, "loss": 0.2355, "step": 18250 }, { "epoch": 0.06257153235146971, "grad_norm": 1.0993095636367798, "learning_rate": 1e-05, "loss": 0.257, "step": 18260 }, { "epoch": 0.06260579934618574, "grad_norm": 1.0722532272338867, "learning_rate": 1e-05, "loss": 0.2385, "step": 18270 }, { "epoch": 0.06264006634090177, "grad_norm": 1.051962971687317, "learning_rate": 1e-05, "loss": 0.2436, "step": 18280 }, { "epoch": 0.0626743333356178, "grad_norm": 1.1819945573806763, "learning_rate": 1e-05, "loss": 0.2556, "step": 18290 }, { "epoch": 0.06270860033033383, "grad_norm": 1.0978039503097534, "learning_rate": 1e-05, "loss": 0.2352, "step": 18300 }, { "epoch": 0.06274286732504986, "grad_norm": 1.1181819438934326, "learning_rate": 1e-05, "loss": 0.2395, "step": 18310 }, { "epoch": 0.06277713431976589, "grad_norm": 1.1631795167922974, "learning_rate": 1e-05, "loss": 0.2475, "step": 18320 }, { "epoch": 0.06281140131448192, "grad_norm": 1.160852313041687, "learning_rate": 1e-05, "loss": 0.2481, "step": 18330 }, { "epoch": 0.06284566830919795, "grad_norm": 0.9235048890113831, "learning_rate": 1e-05, "loss": 0.2289, "step": 18340 }, { "epoch": 0.06287993530391398, "grad_norm": 1.215062141418457, "learning_rate": 1e-05, "loss": 0.2366, "step": 18350 }, { "epoch": 0.06291420229863001, "grad_norm": 1.2124155759811401, "learning_rate": 1e-05, "loss": 0.2343, "step": 18360 }, { "epoch": 0.06294846929334603, "grad_norm": 1.0954176187515259, "learning_rate": 1e-05, "loss": 0.2468, "step": 18370 }, { "epoch": 0.06298273628806206, "grad_norm": 1.1772414445877075, "learning_rate": 1e-05, "loss": 0.2383, "step": 18380 }, { "epoch": 0.0630170032827781, "grad_norm": 1.0725330114364624, "learning_rate": 1e-05, "loss": 0.2434, "step": 18390 }, { "epoch": 0.06305127027749412, "grad_norm": 1.178328037261963, "learning_rate": 1e-05, "loss": 0.2377, "step": 18400 }, { "epoch": 0.06308553727221015, "grad_norm": 1.1152503490447998, "learning_rate": 1e-05, "loss": 0.2386, "step": 18410 }, { "epoch": 0.06311980426692618, "grad_norm": 1.273404836654663, "learning_rate": 1e-05, "loss": 0.2674, "step": 18420 }, { "epoch": 0.06315407126164221, "grad_norm": 1.1588289737701416, "learning_rate": 1e-05, "loss": 0.2365, "step": 18430 }, { "epoch": 0.06318833825635824, "grad_norm": 1.245696783065796, "learning_rate": 1e-05, "loss": 0.2304, "step": 18440 }, { "epoch": 0.06322260525107427, "grad_norm": 1.0686434507369995, "learning_rate": 1e-05, "loss": 0.2459, "step": 18450 }, { "epoch": 0.0632568722457903, "grad_norm": 1.135810375213623, "learning_rate": 1e-05, "loss": 0.237, "step": 18460 }, { "epoch": 0.06329113924050633, "grad_norm": 0.9825658202171326, "learning_rate": 1e-05, "loss": 0.2323, "step": 18470 }, { "epoch": 0.06332540623522236, "grad_norm": 0.9794711470603943, "learning_rate": 1e-05, "loss": 0.2259, "step": 18480 }, { "epoch": 0.06335967322993839, "grad_norm": 1.1655373573303223, "learning_rate": 1e-05, "loss": 0.24, "step": 18490 }, { "epoch": 0.06339394022465442, "grad_norm": 1.1974196434020996, "learning_rate": 1e-05, "loss": 0.2414, "step": 18500 }, { "epoch": 0.06339394022465442, "eval_cer": 12.885870523901923, "eval_loss": 0.24566347897052765, "eval_normalized_cer": 9.05275779376499, "eval_runtime": 227.2597, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.035, "step": 18500 }, { "epoch": 0.06342820721937045, "grad_norm": 1.1439276933670044, "learning_rate": 1e-05, "loss": 0.2357, "step": 18510 }, { "epoch": 0.06346247421408648, "grad_norm": 1.0722652673721313, "learning_rate": 1e-05, "loss": 0.2413, "step": 18520 }, { "epoch": 0.06349674120880251, "grad_norm": 1.071835994720459, "learning_rate": 1e-05, "loss": 0.215, "step": 18530 }, { "epoch": 0.06353100820351854, "grad_norm": 1.0905380249023438, "learning_rate": 1e-05, "loss": 0.2262, "step": 18540 }, { "epoch": 0.06356527519823456, "grad_norm": 0.9569874405860901, "learning_rate": 1e-05, "loss": 0.2111, "step": 18550 }, { "epoch": 0.0635995421929506, "grad_norm": 1.1387230157852173, "learning_rate": 1e-05, "loss": 0.2278, "step": 18560 }, { "epoch": 0.06363380918766662, "grad_norm": 1.1088684797286987, "learning_rate": 1e-05, "loss": 0.236, "step": 18570 }, { "epoch": 0.06366807618238265, "grad_norm": 1.1542891263961792, "learning_rate": 1e-05, "loss": 0.2248, "step": 18580 }, { "epoch": 0.06370234317709868, "grad_norm": 1.0670695304870605, "learning_rate": 1e-05, "loss": 0.2258, "step": 18590 }, { "epoch": 0.06373661017181471, "grad_norm": 1.1448674201965332, "learning_rate": 1e-05, "loss": 0.2445, "step": 18600 }, { "epoch": 0.06377087716653074, "grad_norm": 1.283347487449646, "learning_rate": 1e-05, "loss": 0.2318, "step": 18610 }, { "epoch": 0.06380514416124677, "grad_norm": 1.2149467468261719, "learning_rate": 1e-05, "loss": 0.2383, "step": 18620 }, { "epoch": 0.0638394111559628, "grad_norm": 1.0885416269302368, "learning_rate": 1e-05, "loss": 0.2213, "step": 18630 }, { "epoch": 0.06387367815067883, "grad_norm": 1.1276935338974, "learning_rate": 1e-05, "loss": 0.2265, "step": 18640 }, { "epoch": 0.06390794514539486, "grad_norm": 1.15458345413208, "learning_rate": 1e-05, "loss": 0.2366, "step": 18650 }, { "epoch": 0.0639422121401109, "grad_norm": 1.1190539598464966, "learning_rate": 1e-05, "loss": 0.2243, "step": 18660 }, { "epoch": 0.06397647913482692, "grad_norm": 1.1128675937652588, "learning_rate": 1e-05, "loss": 0.2351, "step": 18670 }, { "epoch": 0.06401074612954295, "grad_norm": 1.0355507135391235, "learning_rate": 1e-05, "loss": 0.2245, "step": 18680 }, { "epoch": 0.06404501312425898, "grad_norm": 1.2043906450271606, "learning_rate": 1e-05, "loss": 0.2343, "step": 18690 }, { "epoch": 0.064079280118975, "grad_norm": 1.089877724647522, "learning_rate": 1e-05, "loss": 0.2363, "step": 18700 }, { "epoch": 0.06411354711369104, "grad_norm": 1.1575262546539307, "learning_rate": 1e-05, "loss": 0.2482, "step": 18710 }, { "epoch": 0.06414781410840706, "grad_norm": 1.0474443435668945, "learning_rate": 1e-05, "loss": 0.2422, "step": 18720 }, { "epoch": 0.0641820811031231, "grad_norm": 0.9865725040435791, "learning_rate": 1e-05, "loss": 0.2245, "step": 18730 }, { "epoch": 0.06421634809783912, "grad_norm": 1.126033067703247, "learning_rate": 1e-05, "loss": 0.2336, "step": 18740 }, { "epoch": 0.06425061509255516, "grad_norm": 1.0604937076568604, "learning_rate": 1e-05, "loss": 0.2341, "step": 18750 }, { "epoch": 0.06428488208727118, "grad_norm": 1.0804321765899658, "learning_rate": 1e-05, "loss": 0.258, "step": 18760 }, { "epoch": 0.06431914908198721, "grad_norm": 1.0379873514175415, "learning_rate": 1e-05, "loss": 0.2503, "step": 18770 }, { "epoch": 0.06435341607670324, "grad_norm": 1.169416069984436, "learning_rate": 1e-05, "loss": 0.2207, "step": 18780 }, { "epoch": 0.06438768307141927, "grad_norm": 1.037367820739746, "learning_rate": 1e-05, "loss": 0.2359, "step": 18790 }, { "epoch": 0.0644219500661353, "grad_norm": 1.0716742277145386, "learning_rate": 1e-05, "loss": 0.2641, "step": 18800 }, { "epoch": 0.06445621706085133, "grad_norm": 1.097924828529358, "learning_rate": 1e-05, "loss": 0.255, "step": 18810 }, { "epoch": 0.06449048405556736, "grad_norm": 1.2333221435546875, "learning_rate": 1e-05, "loss": 0.2481, "step": 18820 }, { "epoch": 0.0645247510502834, "grad_norm": 1.1249961853027344, "learning_rate": 1e-05, "loss": 0.2337, "step": 18830 }, { "epoch": 0.06455901804499942, "grad_norm": 1.2257179021835327, "learning_rate": 1e-05, "loss": 0.2496, "step": 18840 }, { "epoch": 0.06459328503971545, "grad_norm": 1.141400694847107, "learning_rate": 1e-05, "loss": 0.2447, "step": 18850 }, { "epoch": 0.06462755203443148, "grad_norm": 0.9263083338737488, "learning_rate": 1e-05, "loss": 0.2446, "step": 18860 }, { "epoch": 0.0646618190291475, "grad_norm": 1.0573432445526123, "learning_rate": 1e-05, "loss": 0.2482, "step": 18870 }, { "epoch": 0.06469608602386354, "grad_norm": 1.2113983631134033, "learning_rate": 1e-05, "loss": 0.2459, "step": 18880 }, { "epoch": 0.06473035301857956, "grad_norm": 1.2218704223632812, "learning_rate": 1e-05, "loss": 0.2285, "step": 18890 }, { "epoch": 0.0647646200132956, "grad_norm": 1.0855586528778076, "learning_rate": 1e-05, "loss": 0.2393, "step": 18900 }, { "epoch": 0.06479888700801162, "grad_norm": 1.1320980787277222, "learning_rate": 1e-05, "loss": 0.2438, "step": 18910 }, { "epoch": 0.06483315400272766, "grad_norm": 1.2386951446533203, "learning_rate": 1e-05, "loss": 0.2554, "step": 18920 }, { "epoch": 0.06486742099744368, "grad_norm": 1.1133781671524048, "learning_rate": 1e-05, "loss": 0.2331, "step": 18930 }, { "epoch": 0.06490168799215972, "grad_norm": 1.149234652519226, "learning_rate": 1e-05, "loss": 0.2708, "step": 18940 }, { "epoch": 0.06493595498687574, "grad_norm": 1.2425532341003418, "learning_rate": 1e-05, "loss": 0.2443, "step": 18950 }, { "epoch": 0.06497022198159177, "grad_norm": 1.070039987564087, "learning_rate": 1e-05, "loss": 0.2586, "step": 18960 }, { "epoch": 0.0650044889763078, "grad_norm": 1.2911726236343384, "learning_rate": 1e-05, "loss": 0.2472, "step": 18970 }, { "epoch": 0.06503875597102383, "grad_norm": 1.2151503562927246, "learning_rate": 1e-05, "loss": 0.2594, "step": 18980 }, { "epoch": 0.06507302296573986, "grad_norm": 1.1766403913497925, "learning_rate": 1e-05, "loss": 0.2394, "step": 18990 }, { "epoch": 0.0651072899604559, "grad_norm": 1.0176496505737305, "learning_rate": 1e-05, "loss": 0.2448, "step": 19000 }, { "epoch": 0.0651072899604559, "eval_cer": 12.691832774739813, "eval_loss": 0.24545185267925262, "eval_normalized_cer": 9.082733812949641, "eval_runtime": 228.5313, "eval_samples_per_second": 2.24, "eval_steps_per_second": 0.035, "step": 19000 }, { "epoch": 0.06514155695517192, "grad_norm": 1.2762819528579712, "learning_rate": 1e-05, "loss": 0.2723, "step": 19010 }, { "epoch": 0.06517582394988795, "grad_norm": 1.2979320287704468, "learning_rate": 1e-05, "loss": 0.2491, "step": 19020 }, { "epoch": 0.06521009094460398, "grad_norm": 1.2459253072738647, "learning_rate": 1e-05, "loss": 0.242, "step": 19030 }, { "epoch": 0.06524435793932, "grad_norm": 1.130518913269043, "learning_rate": 1e-05, "loss": 0.2644, "step": 19040 }, { "epoch": 0.06527862493403604, "grad_norm": 1.0955489873886108, "learning_rate": 1e-05, "loss": 0.255, "step": 19050 }, { "epoch": 0.06531289192875206, "grad_norm": 1.0841457843780518, "learning_rate": 1e-05, "loss": 0.2397, "step": 19060 }, { "epoch": 0.0653471589234681, "grad_norm": 1.0940138101577759, "learning_rate": 1e-05, "loss": 0.2474, "step": 19070 }, { "epoch": 0.06538142591818412, "grad_norm": 1.1498796939849854, "learning_rate": 1e-05, "loss": 0.2495, "step": 19080 }, { "epoch": 0.06541569291290016, "grad_norm": 1.0884509086608887, "learning_rate": 1e-05, "loss": 0.2384, "step": 19090 }, { "epoch": 0.06544995990761618, "grad_norm": 1.119750738143921, "learning_rate": 1e-05, "loss": 0.2509, "step": 19100 }, { "epoch": 0.06548422690233222, "grad_norm": 0.973027765750885, "learning_rate": 1e-05, "loss": 0.2466, "step": 19110 }, { "epoch": 0.06551849389704824, "grad_norm": 1.1478537321090698, "learning_rate": 1e-05, "loss": 0.2464, "step": 19120 }, { "epoch": 0.06555276089176428, "grad_norm": 1.2103520631790161, "learning_rate": 1e-05, "loss": 0.2445, "step": 19130 }, { "epoch": 0.0655870278864803, "grad_norm": 1.000458002090454, "learning_rate": 1e-05, "loss": 0.2498, "step": 19140 }, { "epoch": 0.06562129488119633, "grad_norm": 1.1135921478271484, "learning_rate": 1e-05, "loss": 0.2369, "step": 19150 }, { "epoch": 0.06565556187591236, "grad_norm": 1.0207141637802124, "learning_rate": 1e-05, "loss": 0.243, "step": 19160 }, { "epoch": 0.0656898288706284, "grad_norm": 1.1605093479156494, "learning_rate": 1e-05, "loss": 0.2366, "step": 19170 }, { "epoch": 0.06572409586534442, "grad_norm": 1.242141604423523, "learning_rate": 1e-05, "loss": 0.2397, "step": 19180 }, { "epoch": 0.06575836286006044, "grad_norm": 0.9129644632339478, "learning_rate": 1e-05, "loss": 0.2433, "step": 19190 }, { "epoch": 0.06579262985477648, "grad_norm": 0.9504500031471252, "learning_rate": 1e-05, "loss": 0.2429, "step": 19200 }, { "epoch": 0.0658268968494925, "grad_norm": 1.2368412017822266, "learning_rate": 1e-05, "loss": 0.2406, "step": 19210 }, { "epoch": 0.06586116384420854, "grad_norm": 1.2066251039505005, "learning_rate": 1e-05, "loss": 0.2491, "step": 19220 }, { "epoch": 0.06589543083892456, "grad_norm": 1.1131211519241333, "learning_rate": 1e-05, "loss": 0.2605, "step": 19230 }, { "epoch": 0.0659296978336406, "grad_norm": 1.211876392364502, "learning_rate": 1e-05, "loss": 0.2627, "step": 19240 }, { "epoch": 0.06596396482835662, "grad_norm": 1.0012997388839722, "learning_rate": 1e-05, "loss": 0.2563, "step": 19250 }, { "epoch": 0.06599823182307266, "grad_norm": 1.2079874277114868, "learning_rate": 1e-05, "loss": 0.2423, "step": 19260 }, { "epoch": 0.06603249881778868, "grad_norm": 1.0952426195144653, "learning_rate": 1e-05, "loss": 0.2573, "step": 19270 }, { "epoch": 0.06606676581250472, "grad_norm": 0.9400359392166138, "learning_rate": 1e-05, "loss": 0.2516, "step": 19280 }, { "epoch": 0.06610103280722074, "grad_norm": 1.026219367980957, "learning_rate": 1e-05, "loss": 0.2436, "step": 19290 }, { "epoch": 0.06613529980193678, "grad_norm": 1.0502564907073975, "learning_rate": 1e-05, "loss": 0.2471, "step": 19300 }, { "epoch": 0.0661695667966528, "grad_norm": 1.2147547006607056, "learning_rate": 1e-05, "loss": 0.2358, "step": 19310 }, { "epoch": 0.06620383379136884, "grad_norm": 1.2542741298675537, "learning_rate": 1e-05, "loss": 0.2489, "step": 19320 }, { "epoch": 0.06623810078608486, "grad_norm": 1.031640887260437, "learning_rate": 1e-05, "loss": 0.2391, "step": 19330 }, { "epoch": 0.0662723677808009, "grad_norm": 1.2736186981201172, "learning_rate": 1e-05, "loss": 0.2391, "step": 19340 }, { "epoch": 0.06630663477551692, "grad_norm": 1.091873288154602, "learning_rate": 1e-05, "loss": 0.2407, "step": 19350 }, { "epoch": 0.06634090177023294, "grad_norm": 1.1229465007781982, "learning_rate": 1e-05, "loss": 0.2414, "step": 19360 }, { "epoch": 0.06637516876494898, "grad_norm": 1.2272335290908813, "learning_rate": 1e-05, "loss": 0.247, "step": 19370 }, { "epoch": 0.066409435759665, "grad_norm": 1.063962697982788, "learning_rate": 1e-05, "loss": 0.241, "step": 19380 }, { "epoch": 0.06644370275438104, "grad_norm": 1.2155866622924805, "learning_rate": 1e-05, "loss": 0.252, "step": 19390 }, { "epoch": 0.06647796974909706, "grad_norm": 0.9977822303771973, "learning_rate": 1e-05, "loss": 0.2334, "step": 19400 }, { "epoch": 0.0665122367438131, "grad_norm": 1.1466742753982544, "learning_rate": 1e-05, "loss": 0.2434, "step": 19410 }, { "epoch": 0.06654650373852912, "grad_norm": 0.9723268747329712, "learning_rate": 1e-05, "loss": 0.245, "step": 19420 }, { "epoch": 0.06658077073324516, "grad_norm": 0.9507194757461548, "learning_rate": 1e-05, "loss": 0.2268, "step": 19430 }, { "epoch": 0.06661503772796118, "grad_norm": 1.1839369535446167, "learning_rate": 1e-05, "loss": 0.24, "step": 19440 }, { "epoch": 0.06664930472267722, "grad_norm": 1.1277563571929932, "learning_rate": 1e-05, "loss": 0.2448, "step": 19450 }, { "epoch": 0.06668357171739324, "grad_norm": 0.9276403188705444, "learning_rate": 1e-05, "loss": 0.2489, "step": 19460 }, { "epoch": 0.06671783871210928, "grad_norm": 0.9769749045372009, "learning_rate": 1e-05, "loss": 0.2361, "step": 19470 }, { "epoch": 0.0667521057068253, "grad_norm": 1.1307224035263062, "learning_rate": 1e-05, "loss": 0.2413, "step": 19480 }, { "epoch": 0.06678637270154134, "grad_norm": 1.0125936269760132, "learning_rate": 1e-05, "loss": 0.2496, "step": 19490 }, { "epoch": 0.06682063969625736, "grad_norm": 1.3682901859283447, "learning_rate": 1e-05, "loss": 0.2573, "step": 19500 }, { "epoch": 0.06682063969625736, "eval_cer": 12.436055741753396, "eval_loss": 0.24245715141296387, "eval_normalized_cer": 8.81294964028777, "eval_runtime": 227.5796, "eval_samples_per_second": 2.25, "eval_steps_per_second": 0.035, "step": 19500 }, { "epoch": 0.06685490669097338, "grad_norm": 1.3106615543365479, "learning_rate": 1e-05, "loss": 0.2473, "step": 19510 }, { "epoch": 0.06688917368568942, "grad_norm": 1.1861129999160767, "learning_rate": 1e-05, "loss": 0.2478, "step": 19520 }, { "epoch": 0.06692344068040544, "grad_norm": 1.0658881664276123, "learning_rate": 1e-05, "loss": 0.2629, "step": 19530 }, { "epoch": 0.06695770767512148, "grad_norm": 1.3097602128982544, "learning_rate": 1e-05, "loss": 0.2332, "step": 19540 }, { "epoch": 0.0669919746698375, "grad_norm": 1.1135023832321167, "learning_rate": 1e-05, "loss": 0.2613, "step": 19550 }, { "epoch": 0.06702624166455354, "grad_norm": 1.2106831073760986, "learning_rate": 1e-05, "loss": 0.2518, "step": 19560 }, { "epoch": 0.06706050865926956, "grad_norm": 1.0989844799041748, "learning_rate": 1e-05, "loss": 0.2602, "step": 19570 }, { "epoch": 0.0670947756539856, "grad_norm": 1.0839687585830688, "learning_rate": 1e-05, "loss": 0.245, "step": 19580 }, { "epoch": 0.06712904264870162, "grad_norm": 1.249485731124878, "learning_rate": 1e-05, "loss": 0.251, "step": 19590 }, { "epoch": 0.06716330964341766, "grad_norm": 1.0540473461151123, "learning_rate": 1e-05, "loss": 0.2682, "step": 19600 }, { "epoch": 0.06719757663813368, "grad_norm": 1.1540120840072632, "learning_rate": 1e-05, "loss": 0.264, "step": 19610 }, { "epoch": 0.06723184363284972, "grad_norm": 1.2563894987106323, "learning_rate": 1e-05, "loss": 0.2627, "step": 19620 }, { "epoch": 0.06726611062756574, "grad_norm": 1.1335647106170654, "learning_rate": 1e-05, "loss": 0.2662, "step": 19630 }, { "epoch": 0.06730037762228178, "grad_norm": 1.0809390544891357, "learning_rate": 1e-05, "loss": 0.2477, "step": 19640 }, { "epoch": 0.0673346446169978, "grad_norm": 1.0609475374221802, "learning_rate": 1e-05, "loss": 0.2491, "step": 19650 }, { "epoch": 0.06736891161171384, "grad_norm": 1.2045155763626099, "learning_rate": 1e-05, "loss": 0.2586, "step": 19660 }, { "epoch": 0.06740317860642986, "grad_norm": 1.0101896524429321, "learning_rate": 1e-05, "loss": 0.2514, "step": 19670 }, { "epoch": 0.06743744560114588, "grad_norm": 1.070760726928711, "learning_rate": 1e-05, "loss": 0.2677, "step": 19680 }, { "epoch": 0.06747171259586192, "grad_norm": 1.1094374656677246, "learning_rate": 1e-05, "loss": 0.2601, "step": 19690 }, { "epoch": 0.06750597959057794, "grad_norm": 1.2062901258468628, "learning_rate": 1e-05, "loss": 0.2351, "step": 19700 }, { "epoch": 0.06754024658529398, "grad_norm": 1.2172263860702515, "learning_rate": 1e-05, "loss": 0.248, "step": 19710 }, { "epoch": 0.06757451358001, "grad_norm": 0.9713698625564575, "learning_rate": 1e-05, "loss": 0.2366, "step": 19720 }, { "epoch": 0.06760878057472604, "grad_norm": 1.046995997428894, "learning_rate": 1e-05, "loss": 0.2225, "step": 19730 }, { "epoch": 0.06764304756944206, "grad_norm": 1.002658724784851, "learning_rate": 1e-05, "loss": 0.2286, "step": 19740 }, { "epoch": 0.0676773145641581, "grad_norm": 1.072398066520691, "learning_rate": 1e-05, "loss": 0.232, "step": 19750 }, { "epoch": 0.06771158155887412, "grad_norm": 1.1962021589279175, "learning_rate": 1e-05, "loss": 0.2416, "step": 19760 }, { "epoch": 0.06774584855359016, "grad_norm": 1.19411039352417, "learning_rate": 1e-05, "loss": 0.2448, "step": 19770 }, { "epoch": 0.06778011554830618, "grad_norm": 1.0695290565490723, "learning_rate": 1e-05, "loss": 0.2273, "step": 19780 }, { "epoch": 0.06781438254302222, "grad_norm": 1.0701625347137451, "learning_rate": 1e-05, "loss": 0.2355, "step": 19790 }, { "epoch": 0.06784864953773824, "grad_norm": 1.1018661260604858, "learning_rate": 1e-05, "loss": 0.2301, "step": 19800 }, { "epoch": 0.06788291653245428, "grad_norm": 1.2072187662124634, "learning_rate": 1e-05, "loss": 0.2311, "step": 19810 }, { "epoch": 0.0679171835271703, "grad_norm": 1.2599499225616455, "learning_rate": 1e-05, "loss": 0.2327, "step": 19820 }, { "epoch": 0.06795145052188634, "grad_norm": 1.0052101612091064, "learning_rate": 1e-05, "loss": 0.2303, "step": 19830 }, { "epoch": 0.06798571751660236, "grad_norm": 1.1168869733810425, "learning_rate": 1e-05, "loss": 0.239, "step": 19840 }, { "epoch": 0.06801998451131838, "grad_norm": 1.166634440422058, "learning_rate": 1e-05, "loss": 0.2255, "step": 19850 }, { "epoch": 0.06805425150603442, "grad_norm": 1.0218929052352905, "learning_rate": 1e-05, "loss": 0.2244, "step": 19860 }, { "epoch": 0.06808851850075044, "grad_norm": 1.1705201864242554, "learning_rate": 1e-05, "loss": 0.2177, "step": 19870 }, { "epoch": 0.06812278549546648, "grad_norm": 0.9937231540679932, "learning_rate": 1e-05, "loss": 0.2384, "step": 19880 }, { "epoch": 0.0681570524901825, "grad_norm": 1.045133352279663, "learning_rate": 1e-05, "loss": 0.2208, "step": 19890 }, { "epoch": 0.06819131948489854, "grad_norm": 1.1548084020614624, "learning_rate": 1e-05, "loss": 0.2262, "step": 19900 }, { "epoch": 0.06822558647961456, "grad_norm": 1.2487162351608276, "learning_rate": 1e-05, "loss": 0.234, "step": 19910 }, { "epoch": 0.0682598534743306, "grad_norm": 1.0547343492507935, "learning_rate": 1e-05, "loss": 0.2337, "step": 19920 }, { "epoch": 0.06829412046904662, "grad_norm": 0.8701609373092651, "learning_rate": 1e-05, "loss": 0.2132, "step": 19930 }, { "epoch": 0.06832838746376266, "grad_norm": 1.0996358394622803, "learning_rate": 1e-05, "loss": 0.2201, "step": 19940 }, { "epoch": 0.06836265445847868, "grad_norm": 1.0274715423583984, "learning_rate": 1e-05, "loss": 0.221, "step": 19950 }, { "epoch": 0.06839692145319472, "grad_norm": 0.9839652180671692, "learning_rate": 1e-05, "loss": 0.2142, "step": 19960 }, { "epoch": 0.06843118844791074, "grad_norm": 1.121183156967163, "learning_rate": 1e-05, "loss": 0.2204, "step": 19970 }, { "epoch": 0.06846545544262678, "grad_norm": 1.0347726345062256, "learning_rate": 1e-05, "loss": 0.2388, "step": 19980 }, { "epoch": 0.0684997224373428, "grad_norm": 1.0449944734573364, "learning_rate": 1e-05, "loss": 0.2242, "step": 19990 }, { "epoch": 0.06853398943205882, "grad_norm": 1.0335705280303955, "learning_rate": 1e-05, "loss": 0.2323, "step": 20000 }, { "epoch": 0.06853398943205882, "eval_cer": 13.097548068442405, "eval_loss": 0.2434115707874298, "eval_normalized_cer": 9.482414068745005, "eval_runtime": 242.1003, "eval_samples_per_second": 2.115, "eval_steps_per_second": 0.033, "step": 20000 }, { "epoch": 0.06856825642677486, "grad_norm": 1.1393816471099854, "learning_rate": 1e-05, "loss": 0.2257, "step": 20010 }, { "epoch": 0.06860252342149088, "grad_norm": 1.2440309524536133, "learning_rate": 1e-05, "loss": 0.2045, "step": 20020 }, { "epoch": 0.06863679041620692, "grad_norm": 1.2834079265594482, "learning_rate": 1e-05, "loss": 0.2312, "step": 20030 }, { "epoch": 0.06867105741092294, "grad_norm": 1.0295487642288208, "learning_rate": 1e-05, "loss": 0.2288, "step": 20040 }, { "epoch": 0.06870532440563898, "grad_norm": 1.0935391187667847, "learning_rate": 1e-05, "loss": 0.2191, "step": 20050 }, { "epoch": 0.068739591400355, "grad_norm": 1.2545008659362793, "learning_rate": 1e-05, "loss": 0.2204, "step": 20060 }, { "epoch": 0.06877385839507104, "grad_norm": 1.0660568475723267, "learning_rate": 1e-05, "loss": 0.2214, "step": 20070 }, { "epoch": 0.06880812538978706, "grad_norm": 1.0966823101043701, "learning_rate": 1e-05, "loss": 0.2148, "step": 20080 }, { "epoch": 0.0688423923845031, "grad_norm": 1.1360703706741333, "learning_rate": 1e-05, "loss": 0.2145, "step": 20090 }, { "epoch": 0.06887665937921912, "grad_norm": 1.0804933309555054, "learning_rate": 1e-05, "loss": 0.2256, "step": 20100 }, { "epoch": 0.06891092637393516, "grad_norm": 1.2052994966506958, "learning_rate": 1e-05, "loss": 0.2188, "step": 20110 }, { "epoch": 0.06894519336865118, "grad_norm": 1.2262095212936401, "learning_rate": 1e-05, "loss": 0.2283, "step": 20120 }, { "epoch": 0.06897946036336722, "grad_norm": 1.2067344188690186, "learning_rate": 1e-05, "loss": 0.2107, "step": 20130 }, { "epoch": 0.06901372735808324, "grad_norm": 1.1655231714248657, "learning_rate": 1e-05, "loss": 0.2097, "step": 20140 }, { "epoch": 0.06904799435279928, "grad_norm": 1.1139166355133057, "learning_rate": 1e-05, "loss": 0.2079, "step": 20150 }, { "epoch": 0.0690822613475153, "grad_norm": 1.0625183582305908, "learning_rate": 1e-05, "loss": 0.2097, "step": 20160 }, { "epoch": 0.06911652834223132, "grad_norm": 1.0871198177337646, "learning_rate": 1e-05, "loss": 0.2041, "step": 20170 }, { "epoch": 0.06915079533694736, "grad_norm": 1.1072641611099243, "learning_rate": 1e-05, "loss": 0.2178, "step": 20180 }, { "epoch": 0.06918506233166338, "grad_norm": 1.1643656492233276, "learning_rate": 1e-05, "loss": 0.2264, "step": 20190 }, { "epoch": 0.06921932932637942, "grad_norm": 1.0108036994934082, "learning_rate": 1e-05, "loss": 0.2069, "step": 20200 }, { "epoch": 0.06925359632109544, "grad_norm": 1.1025501489639282, "learning_rate": 1e-05, "loss": 0.2146, "step": 20210 }, { "epoch": 0.06928786331581148, "grad_norm": 1.2304129600524902, "learning_rate": 1e-05, "loss": 0.2333, "step": 20220 }, { "epoch": 0.0693221303105275, "grad_norm": 1.2792093753814697, "learning_rate": 1e-05, "loss": 0.2453, "step": 20230 }, { "epoch": 0.06935639730524354, "grad_norm": 0.9987597465515137, "learning_rate": 1e-05, "loss": 0.2484, "step": 20240 }, { "epoch": 0.06939066429995956, "grad_norm": 1.2693513631820679, "learning_rate": 1e-05, "loss": 0.2439, "step": 20250 }, { "epoch": 0.0694249312946756, "grad_norm": 1.2504794597625732, "learning_rate": 1e-05, "loss": 0.2478, "step": 20260 }, { "epoch": 0.06945919828939162, "grad_norm": 1.1483858823776245, "learning_rate": 1e-05, "loss": 0.2615, "step": 20270 }, { "epoch": 0.06949346528410766, "grad_norm": 1.1267510652542114, "learning_rate": 1e-05, "loss": 0.2492, "step": 20280 }, { "epoch": 0.06952773227882368, "grad_norm": 1.0131007432937622, "learning_rate": 1e-05, "loss": 0.2426, "step": 20290 }, { "epoch": 0.06956199927353972, "grad_norm": 1.1513702869415283, "learning_rate": 1e-05, "loss": 0.2473, "step": 20300 }, { "epoch": 0.06959626626825574, "grad_norm": 1.0932912826538086, "learning_rate": 1e-05, "loss": 0.2444, "step": 20310 }, { "epoch": 0.06963053326297178, "grad_norm": 1.1496869325637817, "learning_rate": 1e-05, "loss": 0.2543, "step": 20320 }, { "epoch": 0.0696648002576878, "grad_norm": 1.0396265983581543, "learning_rate": 1e-05, "loss": 0.2528, "step": 20330 }, { "epoch": 0.06969906725240382, "grad_norm": 1.2144852876663208, "learning_rate": 1e-05, "loss": 0.263, "step": 20340 }, { "epoch": 0.06973333424711986, "grad_norm": 1.0001530647277832, "learning_rate": 1e-05, "loss": 0.2411, "step": 20350 }, { "epoch": 0.06976760124183588, "grad_norm": 1.1693116426467896, "learning_rate": 1e-05, "loss": 0.2643, "step": 20360 }, { "epoch": 0.06980186823655192, "grad_norm": 1.1341391801834106, "learning_rate": 1e-05, "loss": 0.2432, "step": 20370 }, { "epoch": 0.06983613523126794, "grad_norm": 1.0701422691345215, "learning_rate": 1e-05, "loss": 0.2526, "step": 20380 }, { "epoch": 0.06987040222598398, "grad_norm": 1.1302570104599, "learning_rate": 1e-05, "loss": 0.2557, "step": 20390 }, { "epoch": 0.0699046692207, "grad_norm": 0.940927267074585, "learning_rate": 1e-05, "loss": 0.2379, "step": 20400 }, { "epoch": 0.06993893621541604, "grad_norm": 1.210973858833313, "learning_rate": 1e-05, "loss": 0.2565, "step": 20410 }, { "epoch": 0.06997320321013206, "grad_norm": 1.1883857250213623, "learning_rate": 1e-05, "loss": 0.2688, "step": 20420 }, { "epoch": 0.0700074702048481, "grad_norm": 1.0678610801696777, "learning_rate": 1e-05, "loss": 0.2414, "step": 20430 }, { "epoch": 0.07004173719956412, "grad_norm": 1.0714175701141357, "learning_rate": 1e-05, "loss": 0.2607, "step": 20440 }, { "epoch": 0.07007600419428016, "grad_norm": 1.171006441116333, "learning_rate": 1e-05, "loss": 0.2542, "step": 20450 }, { "epoch": 0.07011027118899618, "grad_norm": 1.1522200107574463, "learning_rate": 1e-05, "loss": 0.2412, "step": 20460 }, { "epoch": 0.07014453818371222, "grad_norm": 1.2820340394973755, "learning_rate": 1e-05, "loss": 0.2648, "step": 20470 }, { "epoch": 0.07017880517842824, "grad_norm": 1.191225290298462, "learning_rate": 1e-05, "loss": 0.2448, "step": 20480 }, { "epoch": 0.07021307217314426, "grad_norm": 1.1028372049331665, "learning_rate": 1e-05, "loss": 0.2448, "step": 20490 }, { "epoch": 0.0702473391678603, "grad_norm": 1.0639230012893677, "learning_rate": 1e-05, "loss": 0.2435, "step": 20500 }, { "epoch": 0.0702473391678603, "eval_cer": 12.136179220321045, "eval_loss": 0.2431151568889618, "eval_normalized_cer": 8.593125499600319, "eval_runtime": 226.8501, "eval_samples_per_second": 2.257, "eval_steps_per_second": 0.035, "step": 20500 }, { "epoch": 0.07028160616257632, "grad_norm": 1.1122690439224243, "learning_rate": 1e-05, "loss": 0.2462, "step": 20510 }, { "epoch": 0.07031587315729236, "grad_norm": 1.1983146667480469, "learning_rate": 1e-05, "loss": 0.2684, "step": 20520 }, { "epoch": 0.07035014015200838, "grad_norm": 1.0748060941696167, "learning_rate": 1e-05, "loss": 0.2446, "step": 20530 }, { "epoch": 0.07038440714672442, "grad_norm": 1.0608043670654297, "learning_rate": 1e-05, "loss": 0.2461, "step": 20540 }, { "epoch": 0.07041867414144044, "grad_norm": 1.0185233354568481, "learning_rate": 1e-05, "loss": 0.2558, "step": 20550 }, { "epoch": 0.07045294113615648, "grad_norm": 1.0136014223098755, "learning_rate": 1e-05, "loss": 0.237, "step": 20560 }, { "epoch": 0.0704872081308725, "grad_norm": 1.0096406936645508, "learning_rate": 1e-05, "loss": 0.257, "step": 20570 }, { "epoch": 0.07052147512558854, "grad_norm": 1.0398333072662354, "learning_rate": 1e-05, "loss": 0.2586, "step": 20580 }, { "epoch": 0.07055574212030456, "grad_norm": 1.0201914310455322, "learning_rate": 1e-05, "loss": 0.2369, "step": 20590 }, { "epoch": 0.0705900091150206, "grad_norm": 1.2178388833999634, "learning_rate": 1e-05, "loss": 0.2593, "step": 20600 }, { "epoch": 0.07062427610973662, "grad_norm": 1.120365858078003, "learning_rate": 1e-05, "loss": 0.2616, "step": 20610 }, { "epoch": 0.07065854310445266, "grad_norm": 1.3457388877868652, "learning_rate": 1e-05, "loss": 0.2561, "step": 20620 }, { "epoch": 0.07069281009916868, "grad_norm": 1.004683256149292, "learning_rate": 1e-05, "loss": 0.2421, "step": 20630 }, { "epoch": 0.07072707709388472, "grad_norm": 1.122775912284851, "learning_rate": 1e-05, "loss": 0.236, "step": 20640 }, { "epoch": 0.07076134408860074, "grad_norm": 1.1064289808273315, "learning_rate": 1e-05, "loss": 0.2383, "step": 20650 }, { "epoch": 0.07079561108331676, "grad_norm": 1.0714755058288574, "learning_rate": 1e-05, "loss": 0.2376, "step": 20660 }, { "epoch": 0.0708298780780328, "grad_norm": 1.0543241500854492, "learning_rate": 1e-05, "loss": 0.2575, "step": 20670 }, { "epoch": 0.07086414507274882, "grad_norm": 1.0677913427352905, "learning_rate": 1e-05, "loss": 0.2505, "step": 20680 }, { "epoch": 0.07089841206746486, "grad_norm": 0.9517323970794678, "learning_rate": 1e-05, "loss": 0.2273, "step": 20690 }, { "epoch": 0.07093267906218088, "grad_norm": 1.2108800411224365, "learning_rate": 1e-05, "loss": 0.2436, "step": 20700 }, { "epoch": 0.07096694605689692, "grad_norm": 1.1150835752487183, "learning_rate": 1e-05, "loss": 0.2434, "step": 20710 }, { "epoch": 0.07100121305161294, "grad_norm": 1.1059845685958862, "learning_rate": 1e-05, "loss": 0.2603, "step": 20720 }, { "epoch": 0.07103548004632898, "grad_norm": 1.2442775964736938, "learning_rate": 1e-05, "loss": 0.2608, "step": 20730 }, { "epoch": 0.071069747041045, "grad_norm": 1.2519625425338745, "learning_rate": 1e-05, "loss": 0.2354, "step": 20740 }, { "epoch": 0.07110401403576104, "grad_norm": 1.049765706062317, "learning_rate": 1e-05, "loss": 0.2397, "step": 20750 }, { "epoch": 0.07113828103047706, "grad_norm": 1.1388698816299438, "learning_rate": 1e-05, "loss": 0.235, "step": 20760 }, { "epoch": 0.0711725480251931, "grad_norm": 1.265163779258728, "learning_rate": 1e-05, "loss": 0.2259, "step": 20770 }, { "epoch": 0.07120681501990912, "grad_norm": 1.1405965089797974, "learning_rate": 1e-05, "loss": 0.2256, "step": 20780 }, { "epoch": 0.07124108201462516, "grad_norm": 1.0761326551437378, "learning_rate": 1e-05, "loss": 0.2207, "step": 20790 }, { "epoch": 0.07127534900934118, "grad_norm": 0.958968460559845, "learning_rate": 1e-05, "loss": 0.2206, "step": 20800 }, { "epoch": 0.0713096160040572, "grad_norm": 1.0726120471954346, "learning_rate": 1e-05, "loss": 0.2268, "step": 20810 }, { "epoch": 0.07134388299877324, "grad_norm": 1.0687642097473145, "learning_rate": 1e-05, "loss": 0.2205, "step": 20820 }, { "epoch": 0.07137814999348926, "grad_norm": 1.1116247177124023, "learning_rate": 1e-05, "loss": 0.2343, "step": 20830 }, { "epoch": 0.0714124169882053, "grad_norm": 1.1039427518844604, "learning_rate": 1e-05, "loss": 0.2401, "step": 20840 }, { "epoch": 0.07144668398292132, "grad_norm": 1.1759549379348755, "learning_rate": 1e-05, "loss": 0.2346, "step": 20850 }, { "epoch": 0.07148095097763736, "grad_norm": 1.1680679321289062, "learning_rate": 1e-05, "loss": 0.2225, "step": 20860 }, { "epoch": 0.07151521797235338, "grad_norm": 1.0508878231048584, "learning_rate": 1e-05, "loss": 0.2326, "step": 20870 }, { "epoch": 0.07154948496706942, "grad_norm": 1.0902260541915894, "learning_rate": 1e-05, "loss": 0.2208, "step": 20880 }, { "epoch": 0.07158375196178544, "grad_norm": 1.2788329124450684, "learning_rate": 1e-05, "loss": 0.2138, "step": 20890 }, { "epoch": 0.07161801895650148, "grad_norm": 1.062601089477539, "learning_rate": 1e-05, "loss": 0.223, "step": 20900 }, { "epoch": 0.0716522859512175, "grad_norm": 1.0100822448730469, "learning_rate": 1e-05, "loss": 0.2252, "step": 20910 }, { "epoch": 0.07168655294593354, "grad_norm": 1.2648316621780396, "learning_rate": 1e-05, "loss": 0.2331, "step": 20920 }, { "epoch": 0.07172081994064956, "grad_norm": 1.2350354194641113, "learning_rate": 1e-05, "loss": 0.2405, "step": 20930 }, { "epoch": 0.0717550869353656, "grad_norm": 1.0435272455215454, "learning_rate": 1e-05, "loss": 0.2181, "step": 20940 }, { "epoch": 0.07178935393008162, "grad_norm": 0.9889156818389893, "learning_rate": 1e-05, "loss": 0.229, "step": 20950 }, { "epoch": 0.07182362092479766, "grad_norm": 1.131627082824707, "learning_rate": 1e-05, "loss": 0.2493, "step": 20960 }, { "epoch": 0.07185788791951368, "grad_norm": 1.1382976770401, "learning_rate": 1e-05, "loss": 0.2149, "step": 20970 }, { "epoch": 0.0718921549142297, "grad_norm": 1.1330755949020386, "learning_rate": 1e-05, "loss": 0.2281, "step": 20980 }, { "epoch": 0.07192642190894574, "grad_norm": 1.165480375289917, "learning_rate": 1e-05, "loss": 0.2376, "step": 20990 }, { "epoch": 0.07196068890366177, "grad_norm": 1.2060060501098633, "learning_rate": 1e-05, "loss": 0.2231, "step": 21000 }, { "epoch": 0.07196068890366177, "eval_cer": 12.956429705415417, "eval_loss": 0.2448483556509018, "eval_normalized_cer": 9.502398081534773, "eval_runtime": 227.7694, "eval_samples_per_second": 2.248, "eval_steps_per_second": 0.035, "step": 21000 }, { "epoch": 0.0719949558983778, "grad_norm": 1.4561773538589478, "learning_rate": 1e-05, "loss": 0.2405, "step": 21010 }, { "epoch": 0.07202922289309382, "grad_norm": 1.0566471815109253, "learning_rate": 1e-05, "loss": 0.2223, "step": 21020 }, { "epoch": 0.07206348988780986, "grad_norm": 1.1329096555709839, "learning_rate": 1e-05, "loss": 0.245, "step": 21030 }, { "epoch": 0.07209775688252588, "grad_norm": 1.1300888061523438, "learning_rate": 1e-05, "loss": 0.2312, "step": 21040 }, { "epoch": 0.07213202387724192, "grad_norm": 0.9974931478500366, "learning_rate": 1e-05, "loss": 0.2148, "step": 21050 }, { "epoch": 0.07216629087195794, "grad_norm": 1.1847643852233887, "learning_rate": 1e-05, "loss": 0.2357, "step": 21060 }, { "epoch": 0.07220055786667398, "grad_norm": 1.0534117221832275, "learning_rate": 1e-05, "loss": 0.2469, "step": 21070 }, { "epoch": 0.07223482486139, "grad_norm": 1.1248983144760132, "learning_rate": 1e-05, "loss": 0.2222, "step": 21080 }, { "epoch": 0.07226909185610604, "grad_norm": 0.9976629614830017, "learning_rate": 1e-05, "loss": 0.2374, "step": 21090 }, { "epoch": 0.07230335885082206, "grad_norm": 1.0231010913848877, "learning_rate": 1e-05, "loss": 0.2341, "step": 21100 }, { "epoch": 0.0723376258455381, "grad_norm": 1.1117150783538818, "learning_rate": 1e-05, "loss": 0.2159, "step": 21110 }, { "epoch": 0.07237189284025412, "grad_norm": 0.9355179071426392, "learning_rate": 1e-05, "loss": 0.2196, "step": 21120 }, { "epoch": 0.07240615983497016, "grad_norm": 1.1800999641418457, "learning_rate": 1e-05, "loss": 0.224, "step": 21130 }, { "epoch": 0.07244042682968618, "grad_norm": 1.061677098274231, "learning_rate": 1e-05, "loss": 0.2348, "step": 21140 }, { "epoch": 0.0724746938244022, "grad_norm": 1.0895639657974243, "learning_rate": 1e-05, "loss": 0.2364, "step": 21150 }, { "epoch": 0.07250896081911824, "grad_norm": 1.0691415071487427, "learning_rate": 1e-05, "loss": 0.2219, "step": 21160 }, { "epoch": 0.07254322781383427, "grad_norm": 1.1175389289855957, "learning_rate": 1e-05, "loss": 0.2223, "step": 21170 }, { "epoch": 0.0725774948085503, "grad_norm": 1.064145565032959, "learning_rate": 1e-05, "loss": 0.226, "step": 21180 }, { "epoch": 0.07261176180326633, "grad_norm": 1.0937738418579102, "learning_rate": 1e-05, "loss": 0.2292, "step": 21190 }, { "epoch": 0.07264602879798236, "grad_norm": 1.106714129447937, "learning_rate": 1e-05, "loss": 0.2164, "step": 21200 }, { "epoch": 0.07268029579269839, "grad_norm": 0.9919803738594055, "learning_rate": 1e-05, "loss": 0.2361, "step": 21210 }, { "epoch": 0.07271456278741442, "grad_norm": 1.055652141571045, "learning_rate": 1e-05, "loss": 0.2194, "step": 21220 }, { "epoch": 0.07274882978213044, "grad_norm": 1.098995327949524, "learning_rate": 1e-05, "loss": 0.2339, "step": 21230 }, { "epoch": 0.07278309677684648, "grad_norm": 1.0142837762832642, "learning_rate": 1e-05, "loss": 0.2222, "step": 21240 }, { "epoch": 0.0728173637715625, "grad_norm": 1.1160062551498413, "learning_rate": 1e-05, "loss": 0.2358, "step": 21250 }, { "epoch": 0.07285163076627854, "grad_norm": 1.0952587127685547, "learning_rate": 1e-05, "loss": 0.2329, "step": 21260 }, { "epoch": 0.07288589776099456, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 0.2513, "step": 21270 }, { "epoch": 0.0729201647557106, "grad_norm": 1.2418158054351807, "learning_rate": 1e-05, "loss": 0.2421, "step": 21280 }, { "epoch": 0.07295443175042662, "grad_norm": 1.184396505355835, "learning_rate": 1e-05, "loss": 0.2604, "step": 21290 }, { "epoch": 0.07298869874514265, "grad_norm": 1.0499835014343262, "learning_rate": 1e-05, "loss": 0.2575, "step": 21300 }, { "epoch": 0.07302296573985868, "grad_norm": 1.371702790260315, "learning_rate": 1e-05, "loss": 0.2452, "step": 21310 }, { "epoch": 0.0730572327345747, "grad_norm": 1.0391615629196167, "learning_rate": 1e-05, "loss": 0.2492, "step": 21320 }, { "epoch": 0.07309149972929074, "grad_norm": 1.1651289463043213, "learning_rate": 1e-05, "loss": 0.2413, "step": 21330 }, { "epoch": 0.07312576672400677, "grad_norm": 1.1316466331481934, "learning_rate": 1e-05, "loss": 0.2438, "step": 21340 }, { "epoch": 0.0731600337187228, "grad_norm": 1.0570766925811768, "learning_rate": 1e-05, "loss": 0.2494, "step": 21350 }, { "epoch": 0.07319430071343883, "grad_norm": 1.0197632312774658, "learning_rate": 1e-05, "loss": 0.2429, "step": 21360 }, { "epoch": 0.07322856770815486, "grad_norm": 1.2528513669967651, "learning_rate": 1e-05, "loss": 0.2415, "step": 21370 }, { "epoch": 0.07326283470287089, "grad_norm": 1.3180269002914429, "learning_rate": 1e-05, "loss": 0.2618, "step": 21380 }, { "epoch": 0.07329710169758692, "grad_norm": 1.0602507591247559, "learning_rate": 1e-05, "loss": 0.2586, "step": 21390 }, { "epoch": 0.07333136869230295, "grad_norm": 0.9718620181083679, "learning_rate": 1e-05, "loss": 0.2519, "step": 21400 }, { "epoch": 0.07336563568701898, "grad_norm": 1.1401866674423218, "learning_rate": 1e-05, "loss": 0.2645, "step": 21410 }, { "epoch": 0.073399902681735, "grad_norm": 1.2167593240737915, "learning_rate": 1e-05, "loss": 0.2361, "step": 21420 }, { "epoch": 0.07343416967645104, "grad_norm": 1.2634971141815186, "learning_rate": 1e-05, "loss": 0.2551, "step": 21430 }, { "epoch": 0.07346843667116706, "grad_norm": 1.0504337549209595, "learning_rate": 1e-05, "loss": 0.2408, "step": 21440 }, { "epoch": 0.0735027036658831, "grad_norm": 1.1132304668426514, "learning_rate": 1e-05, "loss": 0.2534, "step": 21450 }, { "epoch": 0.07353697066059912, "grad_norm": 1.2953345775604248, "learning_rate": 1e-05, "loss": 0.241, "step": 21460 }, { "epoch": 0.07357123765531515, "grad_norm": 0.9960566759109497, "learning_rate": 1e-05, "loss": 0.2418, "step": 21470 }, { "epoch": 0.07360550465003118, "grad_norm": 1.1548551321029663, "learning_rate": 1e-05, "loss": 0.2709, "step": 21480 }, { "epoch": 0.0736397716447472, "grad_norm": 1.068936824798584, "learning_rate": 1e-05, "loss": 0.242, "step": 21490 }, { "epoch": 0.07367403863946324, "grad_norm": 1.0421861410140991, "learning_rate": 1e-05, "loss": 0.249, "step": 21500 }, { "epoch": 0.07367403863946324, "eval_cer": 12.524254718645263, "eval_loss": 0.2399865686893463, "eval_normalized_cer": 8.962829736211031, "eval_runtime": 227.9979, "eval_samples_per_second": 2.246, "eval_steps_per_second": 0.035, "step": 21500 }, { "epoch": 0.07370830563417927, "grad_norm": 1.3439817428588867, "learning_rate": 1e-05, "loss": 0.2592, "step": 21510 }, { "epoch": 0.0737425726288953, "grad_norm": 1.2546987533569336, "learning_rate": 1e-05, "loss": 0.2451, "step": 21520 }, { "epoch": 0.07377683962361133, "grad_norm": 0.9786490797996521, "learning_rate": 1e-05, "loss": 0.2273, "step": 21530 }, { "epoch": 0.07381110661832736, "grad_norm": 1.1565337181091309, "learning_rate": 1e-05, "loss": 0.2455, "step": 21540 }, { "epoch": 0.07384537361304339, "grad_norm": 1.2526615858078003, "learning_rate": 1e-05, "loss": 0.2445, "step": 21550 }, { "epoch": 0.07387964060775942, "grad_norm": 1.2073777914047241, "learning_rate": 1e-05, "loss": 0.2536, "step": 21560 }, { "epoch": 0.07391390760247545, "grad_norm": 1.0333545207977295, "learning_rate": 1e-05, "loss": 0.2433, "step": 21570 }, { "epoch": 0.07394817459719148, "grad_norm": 0.9934136867523193, "learning_rate": 1e-05, "loss": 0.2608, "step": 21580 }, { "epoch": 0.0739824415919075, "grad_norm": 1.152581810951233, "learning_rate": 1e-05, "loss": 0.2449, "step": 21590 }, { "epoch": 0.07401670858662354, "grad_norm": 1.0302467346191406, "learning_rate": 1e-05, "loss": 0.2542, "step": 21600 }, { "epoch": 0.07405097558133956, "grad_norm": 1.1744670867919922, "learning_rate": 1e-05, "loss": 0.2503, "step": 21610 }, { "epoch": 0.0740852425760556, "grad_norm": 1.1475284099578857, "learning_rate": 1e-05, "loss": 0.2571, "step": 21620 }, { "epoch": 0.07411950957077162, "grad_norm": 1.157768964767456, "learning_rate": 1e-05, "loss": 0.2336, "step": 21630 }, { "epoch": 0.07415377656548765, "grad_norm": 1.1291224956512451, "learning_rate": 1e-05, "loss": 0.2516, "step": 21640 }, { "epoch": 0.07418804356020368, "grad_norm": 1.1889537572860718, "learning_rate": 1e-05, "loss": 0.2428, "step": 21650 }, { "epoch": 0.07422231055491971, "grad_norm": 1.0830391645431519, "learning_rate": 1e-05, "loss": 0.2469, "step": 21660 }, { "epoch": 0.07425657754963574, "grad_norm": 1.0361846685409546, "learning_rate": 1e-05, "loss": 0.2397, "step": 21670 }, { "epoch": 0.07429084454435177, "grad_norm": 1.008002519607544, "learning_rate": 1e-05, "loss": 0.2401, "step": 21680 }, { "epoch": 0.0743251115390678, "grad_norm": 1.2176759243011475, "learning_rate": 1e-05, "loss": 0.2562, "step": 21690 }, { "epoch": 0.07435937853378383, "grad_norm": 1.2259795665740967, "learning_rate": 1e-05, "loss": 0.2623, "step": 21700 }, { "epoch": 0.07439364552849986, "grad_norm": 1.1422061920166016, "learning_rate": 1e-05, "loss": 0.2732, "step": 21710 }, { "epoch": 0.07442791252321589, "grad_norm": 1.1725839376449585, "learning_rate": 1e-05, "loss": 0.2431, "step": 21720 }, { "epoch": 0.07446217951793192, "grad_norm": 1.026131510734558, "learning_rate": 1e-05, "loss": 0.244, "step": 21730 }, { "epoch": 0.07449644651264795, "grad_norm": 1.1673903465270996, "learning_rate": 1e-05, "loss": 0.263, "step": 21740 }, { "epoch": 0.07453071350736398, "grad_norm": 1.0802525281906128, "learning_rate": 1e-05, "loss": 0.2496, "step": 21750 }, { "epoch": 0.07456498050208, "grad_norm": 1.0692561864852905, "learning_rate": 1e-05, "loss": 0.2452, "step": 21760 }, { "epoch": 0.07459924749679604, "grad_norm": 0.9135819673538208, "learning_rate": 1e-05, "loss": 0.2359, "step": 21770 }, { "epoch": 0.07463351449151207, "grad_norm": 0.9052550792694092, "learning_rate": 1e-05, "loss": 0.2331, "step": 21780 }, { "epoch": 0.07466778148622809, "grad_norm": 1.1125128269195557, "learning_rate": 1e-05, "loss": 0.2355, "step": 21790 }, { "epoch": 0.07470204848094412, "grad_norm": 1.2584770917892456, "learning_rate": 1e-05, "loss": 0.2487, "step": 21800 }, { "epoch": 0.07473631547566015, "grad_norm": 1.0602110624313354, "learning_rate": 1e-05, "loss": 0.2262, "step": 21810 }, { "epoch": 0.07477058247037618, "grad_norm": 1.0684539079666138, "learning_rate": 1e-05, "loss": 0.2142, "step": 21820 }, { "epoch": 0.07480484946509221, "grad_norm": 1.0720919370651245, "learning_rate": 1e-05, "loss": 0.2363, "step": 21830 }, { "epoch": 0.07483911645980824, "grad_norm": 0.9669879078865051, "learning_rate": 1e-05, "loss": 0.2338, "step": 21840 }, { "epoch": 0.07487338345452427, "grad_norm": 1.19837486743927, "learning_rate": 1e-05, "loss": 0.2487, "step": 21850 }, { "epoch": 0.0749076504492403, "grad_norm": 0.9803608059883118, "learning_rate": 1e-05, "loss": 0.2447, "step": 21860 }, { "epoch": 0.07494191744395633, "grad_norm": 1.0210012197494507, "learning_rate": 1e-05, "loss": 0.2314, "step": 21870 }, { "epoch": 0.07497618443867236, "grad_norm": 1.0393648147583008, "learning_rate": 1e-05, "loss": 0.2362, "step": 21880 }, { "epoch": 0.07501045143338839, "grad_norm": 1.1650710105895996, "learning_rate": 1e-05, "loss": 0.2245, "step": 21890 }, { "epoch": 0.07504471842810442, "grad_norm": 1.2479090690612793, "learning_rate": 1e-05, "loss": 0.2394, "step": 21900 }, { "epoch": 0.07507898542282045, "grad_norm": 1.105844497680664, "learning_rate": 1e-05, "loss": 0.2489, "step": 21910 }, { "epoch": 0.07511325241753648, "grad_norm": 1.1276116371154785, "learning_rate": 1e-05, "loss": 0.2398, "step": 21920 }, { "epoch": 0.0751475194122525, "grad_norm": 0.9943282604217529, "learning_rate": 1e-05, "loss": 0.2308, "step": 21930 }, { "epoch": 0.07518178640696854, "grad_norm": 0.9971685409545898, "learning_rate": 1e-05, "loss": 0.2191, "step": 21940 }, { "epoch": 0.07521605340168457, "grad_norm": 1.0626705884933472, "learning_rate": 1e-05, "loss": 0.2355, "step": 21950 }, { "epoch": 0.07525032039640059, "grad_norm": 1.1923019886016846, "learning_rate": 1e-05, "loss": 0.2165, "step": 21960 }, { "epoch": 0.07528458739111663, "grad_norm": 1.1665087938308716, "learning_rate": 1e-05, "loss": 0.2422, "step": 21970 }, { "epoch": 0.07531885438583265, "grad_norm": 1.1135221719741821, "learning_rate": 1e-05, "loss": 0.229, "step": 21980 }, { "epoch": 0.07535312138054868, "grad_norm": 1.035752296447754, "learning_rate": 1e-05, "loss": 0.2399, "step": 21990 }, { "epoch": 0.07538738837526471, "grad_norm": 1.0547956228256226, "learning_rate": 1e-05, "loss": 0.2174, "step": 22000 }, { "epoch": 0.07538738837526471, "eval_cer": 12.762391956253309, "eval_loss": 0.24175649881362915, "eval_normalized_cer": 9.012789768185451, "eval_runtime": 226.9242, "eval_samples_per_second": 2.256, "eval_steps_per_second": 0.035, "step": 22000 }, { "epoch": 0.07542165536998074, "grad_norm": 1.011830449104309, "learning_rate": 1e-05, "loss": 0.2245, "step": 22010 }, { "epoch": 0.07545592236469677, "grad_norm": 1.245556354522705, "learning_rate": 1e-05, "loss": 0.2369, "step": 22020 }, { "epoch": 0.0754901893594128, "grad_norm": 1.0645912885665894, "learning_rate": 1e-05, "loss": 0.2611, "step": 22030 }, { "epoch": 0.07552445635412883, "grad_norm": 1.173473596572876, "learning_rate": 1e-05, "loss": 0.2485, "step": 22040 }, { "epoch": 0.07555872334884486, "grad_norm": 1.1239567995071411, "learning_rate": 1e-05, "loss": 0.2523, "step": 22050 }, { "epoch": 0.07559299034356089, "grad_norm": 1.2006217241287231, "learning_rate": 1e-05, "loss": 0.2411, "step": 22060 }, { "epoch": 0.07562725733827692, "grad_norm": 1.0894744396209717, "learning_rate": 1e-05, "loss": 0.2497, "step": 22070 }, { "epoch": 0.07566152433299295, "grad_norm": 1.3402432203292847, "learning_rate": 1e-05, "loss": 0.2547, "step": 22080 }, { "epoch": 0.07569579132770898, "grad_norm": 1.247170090675354, "learning_rate": 1e-05, "loss": 0.2631, "step": 22090 }, { "epoch": 0.075730058322425, "grad_norm": 1.1156800985336304, "learning_rate": 1e-05, "loss": 0.2396, "step": 22100 }, { "epoch": 0.07576432531714104, "grad_norm": 1.0527523756027222, "learning_rate": 1e-05, "loss": 0.2524, "step": 22110 }, { "epoch": 0.07579859231185707, "grad_norm": 0.9769160151481628, "learning_rate": 1e-05, "loss": 0.2461, "step": 22120 }, { "epoch": 0.07583285930657309, "grad_norm": 1.066941261291504, "learning_rate": 1e-05, "loss": 0.2575, "step": 22130 }, { "epoch": 0.07586712630128913, "grad_norm": 1.226346492767334, "learning_rate": 1e-05, "loss": 0.2542, "step": 22140 }, { "epoch": 0.07590139329600515, "grad_norm": 0.9813686609268188, "learning_rate": 1e-05, "loss": 0.2478, "step": 22150 }, { "epoch": 0.07593566029072119, "grad_norm": 1.127979040145874, "learning_rate": 1e-05, "loss": 0.2589, "step": 22160 }, { "epoch": 0.07596992728543721, "grad_norm": 1.195841908454895, "learning_rate": 1e-05, "loss": 0.2605, "step": 22170 }, { "epoch": 0.07600419428015324, "grad_norm": 1.137445092201233, "learning_rate": 1e-05, "loss": 0.2423, "step": 22180 }, { "epoch": 0.07603846127486927, "grad_norm": 0.9853975176811218, "learning_rate": 1e-05, "loss": 0.246, "step": 22190 }, { "epoch": 0.0760727282695853, "grad_norm": 1.0872706174850464, "learning_rate": 1e-05, "loss": 0.2588, "step": 22200 }, { "epoch": 0.07610699526430133, "grad_norm": 1.1870428323745728, "learning_rate": 1e-05, "loss": 0.2482, "step": 22210 }, { "epoch": 0.07614126225901736, "grad_norm": 1.1714868545532227, "learning_rate": 1e-05, "loss": 0.2361, "step": 22220 }, { "epoch": 0.07617552925373339, "grad_norm": 1.1878902912139893, "learning_rate": 1e-05, "loss": 0.2532, "step": 22230 }, { "epoch": 0.07620979624844942, "grad_norm": 1.0626158714294434, "learning_rate": 1e-05, "loss": 0.2499, "step": 22240 }, { "epoch": 0.07624406324316545, "grad_norm": 0.9548189640045166, "learning_rate": 1e-05, "loss": 0.2515, "step": 22250 }, { "epoch": 0.07627833023788148, "grad_norm": 1.018364429473877, "learning_rate": 1e-05, "loss": 0.2523, "step": 22260 }, { "epoch": 0.0763125972325975, "grad_norm": 1.1226998567581177, "learning_rate": 1e-05, "loss": 0.2328, "step": 22270 }, { "epoch": 0.07634686422731353, "grad_norm": 1.0202254056930542, "learning_rate": 1e-05, "loss": 0.2356, "step": 22280 }, { "epoch": 0.07638113122202957, "grad_norm": 1.1206306219100952, "learning_rate": 1e-05, "loss": 0.231, "step": 22290 }, { "epoch": 0.07641539821674559, "grad_norm": 1.131699562072754, "learning_rate": 1e-05, "loss": 0.2415, "step": 22300 }, { "epoch": 0.07644966521146163, "grad_norm": 1.0941540002822876, "learning_rate": 1e-05, "loss": 0.2312, "step": 22310 }, { "epoch": 0.07648393220617765, "grad_norm": 1.0358905792236328, "learning_rate": 1e-05, "loss": 0.2464, "step": 22320 }, { "epoch": 0.07651819920089369, "grad_norm": 1.0331768989562988, "learning_rate": 1e-05, "loss": 0.239, "step": 22330 }, { "epoch": 0.07655246619560971, "grad_norm": 1.0920426845550537, "learning_rate": 1e-05, "loss": 0.2489, "step": 22340 }, { "epoch": 0.07658673319032575, "grad_norm": 1.1783214807510376, "learning_rate": 1e-05, "loss": 0.2335, "step": 22350 }, { "epoch": 0.07662100018504177, "grad_norm": 1.2217092514038086, "learning_rate": 1e-05, "loss": 0.2247, "step": 22360 }, { "epoch": 0.0766552671797578, "grad_norm": 1.1125669479370117, "learning_rate": 1e-05, "loss": 0.2493, "step": 22370 }, { "epoch": 0.07668953417447383, "grad_norm": 0.9087139964103699, "learning_rate": 1e-05, "loss": 0.2345, "step": 22380 }, { "epoch": 0.07672380116918986, "grad_norm": 1.099518060684204, "learning_rate": 1e-05, "loss": 0.2524, "step": 22390 }, { "epoch": 0.07675806816390589, "grad_norm": 1.0696626901626587, "learning_rate": 1e-05, "loss": 0.2172, "step": 22400 }, { "epoch": 0.07679233515862192, "grad_norm": 1.045519471168518, "learning_rate": 1e-05, "loss": 0.2295, "step": 22410 }, { "epoch": 0.07682660215333795, "grad_norm": 1.1601028442382812, "learning_rate": 1e-05, "loss": 0.253, "step": 22420 }, { "epoch": 0.07686086914805398, "grad_norm": 1.1112728118896484, "learning_rate": 1e-05, "loss": 0.2328, "step": 22430 }, { "epoch": 0.07689513614277001, "grad_norm": 1.0132625102996826, "learning_rate": 1e-05, "loss": 0.2302, "step": 22440 }, { "epoch": 0.07692940313748603, "grad_norm": 0.9999580383300781, "learning_rate": 1e-05, "loss": 0.2466, "step": 22450 }, { "epoch": 0.07696367013220207, "grad_norm": 1.157254934310913, "learning_rate": 1e-05, "loss": 0.2282, "step": 22460 }, { "epoch": 0.07699793712691809, "grad_norm": 1.195129632949829, "learning_rate": 1e-05, "loss": 0.2334, "step": 22470 }, { "epoch": 0.07703220412163413, "grad_norm": 1.1293895244598389, "learning_rate": 1e-05, "loss": 0.2447, "step": 22480 }, { "epoch": 0.07706647111635015, "grad_norm": 1.0318418741226196, "learning_rate": 1e-05, "loss": 0.2348, "step": 22490 }, { "epoch": 0.07710073811106619, "grad_norm": 1.0450618267059326, "learning_rate": 1e-05, "loss": 0.2459, "step": 22500 }, { "epoch": 0.07710073811106619, "eval_cer": 12.638913388604692, "eval_loss": 0.2446923851966858, "eval_normalized_cer": 9.082733812949641, "eval_runtime": 228.122, "eval_samples_per_second": 2.244, "eval_steps_per_second": 0.035, "step": 22500 }, { "epoch": 0.07713500510578221, "grad_norm": 1.0523881912231445, "learning_rate": 1e-05, "loss": 0.2564, "step": 22510 }, { "epoch": 0.07716927210049825, "grad_norm": 1.209961175918579, "learning_rate": 1e-05, "loss": 0.2204, "step": 22520 }, { "epoch": 0.07720353909521427, "grad_norm": 1.1583831310272217, "learning_rate": 1e-05, "loss": 0.2551, "step": 22530 }, { "epoch": 0.0772378060899303, "grad_norm": 1.4498618841171265, "learning_rate": 1e-05, "loss": 0.2528, "step": 22540 }, { "epoch": 0.07727207308464633, "grad_norm": 1.215000867843628, "learning_rate": 1e-05, "loss": 0.2464, "step": 22550 }, { "epoch": 0.07730634007936237, "grad_norm": 1.0598031282424927, "learning_rate": 1e-05, "loss": 0.2394, "step": 22560 }, { "epoch": 0.07734060707407839, "grad_norm": 1.1213382482528687, "learning_rate": 1e-05, "loss": 0.2566, "step": 22570 }, { "epoch": 0.07737487406879442, "grad_norm": 1.1188169717788696, "learning_rate": 1e-05, "loss": 0.2395, "step": 22580 }, { "epoch": 0.07740914106351045, "grad_norm": 1.0692533254623413, "learning_rate": 1e-05, "loss": 0.2376, "step": 22590 }, { "epoch": 0.07744340805822647, "grad_norm": 1.131622076034546, "learning_rate": 1e-05, "loss": 0.2414, "step": 22600 }, { "epoch": 0.07747767505294251, "grad_norm": 1.1728034019470215, "learning_rate": 1e-05, "loss": 0.2375, "step": 22610 }, { "epoch": 0.07751194204765853, "grad_norm": 0.9095053672790527, "learning_rate": 1e-05, "loss": 0.2347, "step": 22620 }, { "epoch": 0.07754620904237457, "grad_norm": 1.0425827503204346, "learning_rate": 1e-05, "loss": 0.2456, "step": 22630 }, { "epoch": 0.07758047603709059, "grad_norm": 1.189103364944458, "learning_rate": 1e-05, "loss": 0.2496, "step": 22640 }, { "epoch": 0.07761474303180663, "grad_norm": 1.1523323059082031, "learning_rate": 1e-05, "loss": 0.2463, "step": 22650 }, { "epoch": 0.07764901002652265, "grad_norm": 1.1360780000686646, "learning_rate": 1e-05, "loss": 0.2367, "step": 22660 }, { "epoch": 0.07768327702123869, "grad_norm": 1.0910000801086426, "learning_rate": 1e-05, "loss": 0.2428, "step": 22670 }, { "epoch": 0.07771754401595471, "grad_norm": 1.10030198097229, "learning_rate": 1e-05, "loss": 0.2441, "step": 22680 }, { "epoch": 0.07775181101067075, "grad_norm": 1.3179526329040527, "learning_rate": 1e-05, "loss": 0.2558, "step": 22690 }, { "epoch": 0.07778607800538677, "grad_norm": 1.0743062496185303, "learning_rate": 1e-05, "loss": 0.2352, "step": 22700 }, { "epoch": 0.0778203450001028, "grad_norm": 0.9239439368247986, "learning_rate": 1e-05, "loss": 0.2409, "step": 22710 }, { "epoch": 0.07785461199481883, "grad_norm": 1.1279993057250977, "learning_rate": 1e-05, "loss": 0.2573, "step": 22720 }, { "epoch": 0.07788887898953487, "grad_norm": 1.0824002027511597, "learning_rate": 1e-05, "loss": 0.2463, "step": 22730 }, { "epoch": 0.07792314598425089, "grad_norm": 1.1521611213684082, "learning_rate": 1e-05, "loss": 0.2311, "step": 22740 }, { "epoch": 0.07795741297896693, "grad_norm": 1.1287387609481812, "learning_rate": 1e-05, "loss": 0.2437, "step": 22750 }, { "epoch": 0.07799167997368295, "grad_norm": 1.250704050064087, "learning_rate": 1e-05, "loss": 0.2394, "step": 22760 }, { "epoch": 0.07802594696839897, "grad_norm": 1.2648214101791382, "learning_rate": 1e-05, "loss": 0.2433, "step": 22770 }, { "epoch": 0.07806021396311501, "grad_norm": 1.1684668064117432, "learning_rate": 1e-05, "loss": 0.2375, "step": 22780 }, { "epoch": 0.07809448095783103, "grad_norm": 1.0133503675460815, "learning_rate": 1e-05, "loss": 0.2426, "step": 22790 }, { "epoch": 0.07812874795254707, "grad_norm": 1.0540106296539307, "learning_rate": 1e-05, "loss": 0.2172, "step": 22800 }, { "epoch": 0.07816301494726309, "grad_norm": 0.9576539993286133, "learning_rate": 1e-05, "loss": 0.2228, "step": 22810 }, { "epoch": 0.07819728194197913, "grad_norm": 1.2314975261688232, "learning_rate": 1e-05, "loss": 0.2274, "step": 22820 }, { "epoch": 0.07823154893669515, "grad_norm": 1.1600431203842163, "learning_rate": 1e-05, "loss": 0.2212, "step": 22830 }, { "epoch": 0.07826581593141119, "grad_norm": 1.0336724519729614, "learning_rate": 1e-05, "loss": 0.2157, "step": 22840 }, { "epoch": 0.07830008292612721, "grad_norm": 1.209795355796814, "learning_rate": 1e-05, "loss": 0.2208, "step": 22850 }, { "epoch": 0.07833434992084325, "grad_norm": 1.052100419998169, "learning_rate": 1e-05, "loss": 0.2151, "step": 22860 }, { "epoch": 0.07836861691555927, "grad_norm": 1.0897157192230225, "learning_rate": 1e-05, "loss": 0.2235, "step": 22870 }, { "epoch": 0.0784028839102753, "grad_norm": 1.0175033807754517, "learning_rate": 1e-05, "loss": 0.2226, "step": 22880 }, { "epoch": 0.07843715090499133, "grad_norm": 1.3162877559661865, "learning_rate": 1e-05, "loss": 0.2323, "step": 22890 }, { "epoch": 0.07847141789970737, "grad_norm": 1.1104750633239746, "learning_rate": 1e-05, "loss": 0.2202, "step": 22900 }, { "epoch": 0.07850568489442339, "grad_norm": 1.1699038743972778, "learning_rate": 1e-05, "loss": 0.2262, "step": 22910 }, { "epoch": 0.07853995188913943, "grad_norm": 1.2070201635360718, "learning_rate": 1e-05, "loss": 0.2369, "step": 22920 }, { "epoch": 0.07857421888385545, "grad_norm": 1.0363352298736572, "learning_rate": 1e-05, "loss": 0.208, "step": 22930 }, { "epoch": 0.07860848587857147, "grad_norm": 1.0242831707000732, "learning_rate": 1e-05, "loss": 0.1995, "step": 22940 }, { "epoch": 0.07864275287328751, "grad_norm": 0.9983187317848206, "learning_rate": 1e-05, "loss": 0.2241, "step": 22950 }, { "epoch": 0.07867701986800353, "grad_norm": 1.2646509408950806, "learning_rate": 1e-05, "loss": 0.22, "step": 22960 }, { "epoch": 0.07871128686271957, "grad_norm": 1.0720429420471191, "learning_rate": 1e-05, "loss": 0.2157, "step": 22970 }, { "epoch": 0.07874555385743559, "grad_norm": 1.047745943069458, "learning_rate": 1e-05, "loss": 0.2166, "step": 22980 }, { "epoch": 0.07877982085215163, "grad_norm": 0.9465416073799133, "learning_rate": 1e-05, "loss": 0.2211, "step": 22990 }, { "epoch": 0.07881408784686765, "grad_norm": 1.141426920890808, "learning_rate": 1e-05, "loss": 0.2442, "step": 23000 }, { "epoch": 0.07881408784686765, "eval_cer": 12.444875639442582, "eval_loss": 0.24720345437526703, "eval_normalized_cer": 8.76298960831335, "eval_runtime": 227.2823, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.035, "step": 23000 }, { "epoch": 0.07884835484158369, "grad_norm": 1.0129847526550293, "learning_rate": 1e-05, "loss": 0.2215, "step": 23010 }, { "epoch": 0.07888262183629971, "grad_norm": 1.0782142877578735, "learning_rate": 1e-05, "loss": 0.2159, "step": 23020 }, { "epoch": 0.07891688883101575, "grad_norm": 1.0121062994003296, "learning_rate": 1e-05, "loss": 0.2124, "step": 23030 }, { "epoch": 0.07895115582573177, "grad_norm": 1.1070152521133423, "learning_rate": 1e-05, "loss": 0.2446, "step": 23040 }, { "epoch": 0.0789854228204478, "grad_norm": 1.2437150478363037, "learning_rate": 1e-05, "loss": 0.2444, "step": 23050 }, { "epoch": 0.07901968981516383, "grad_norm": 1.1498879194259644, "learning_rate": 1e-05, "loss": 0.2443, "step": 23060 }, { "epoch": 0.07905395680987987, "grad_norm": 1.145849347114563, "learning_rate": 1e-05, "loss": 0.2538, "step": 23070 }, { "epoch": 0.07908822380459589, "grad_norm": 1.1981453895568848, "learning_rate": 1e-05, "loss": 0.2478, "step": 23080 }, { "epoch": 0.07912249079931191, "grad_norm": 1.113627552986145, "learning_rate": 1e-05, "loss": 0.2401, "step": 23090 }, { "epoch": 0.07915675779402795, "grad_norm": 1.152208685874939, "learning_rate": 1e-05, "loss": 0.242, "step": 23100 }, { "epoch": 0.07919102478874397, "grad_norm": 1.2640796899795532, "learning_rate": 1e-05, "loss": 0.2674, "step": 23110 }, { "epoch": 0.07922529178346001, "grad_norm": 1.1089024543762207, "learning_rate": 1e-05, "loss": 0.2374, "step": 23120 }, { "epoch": 0.07925955877817603, "grad_norm": 1.1184459924697876, "learning_rate": 1e-05, "loss": 0.2436, "step": 23130 }, { "epoch": 0.07929382577289207, "grad_norm": 1.1597626209259033, "learning_rate": 1e-05, "loss": 0.2502, "step": 23140 }, { "epoch": 0.07932809276760809, "grad_norm": 0.9689920544624329, "learning_rate": 1e-05, "loss": 0.2369, "step": 23150 }, { "epoch": 0.07936235976232413, "grad_norm": 1.0698407888412476, "learning_rate": 1e-05, "loss": 0.2347, "step": 23160 }, { "epoch": 0.07939662675704015, "grad_norm": 1.1528737545013428, "learning_rate": 1e-05, "loss": 0.2539, "step": 23170 }, { "epoch": 0.07943089375175619, "grad_norm": 1.134964108467102, "learning_rate": 1e-05, "loss": 0.2557, "step": 23180 }, { "epoch": 0.07946516074647221, "grad_norm": 1.1838676929473877, "learning_rate": 1e-05, "loss": 0.2489, "step": 23190 }, { "epoch": 0.07949942774118825, "grad_norm": 1.007856011390686, "learning_rate": 1e-05, "loss": 0.2426, "step": 23200 }, { "epoch": 0.07953369473590427, "grad_norm": 1.0157620906829834, "learning_rate": 1e-05, "loss": 0.2569, "step": 23210 }, { "epoch": 0.0795679617306203, "grad_norm": 1.1552293300628662, "learning_rate": 1e-05, "loss": 0.2655, "step": 23220 }, { "epoch": 0.07960222872533633, "grad_norm": 1.0644792318344116, "learning_rate": 1e-05, "loss": 0.2413, "step": 23230 }, { "epoch": 0.07963649572005237, "grad_norm": 1.0649666786193848, "learning_rate": 1e-05, "loss": 0.2397, "step": 23240 }, { "epoch": 0.07967076271476839, "grad_norm": 1.0416626930236816, "learning_rate": 1e-05, "loss": 0.2419, "step": 23250 }, { "epoch": 0.07970502970948441, "grad_norm": 1.0674875974655151, "learning_rate": 1e-05, "loss": 0.2472, "step": 23260 }, { "epoch": 0.07973929670420045, "grad_norm": 1.0134835243225098, "learning_rate": 1e-05, "loss": 0.2306, "step": 23270 }, { "epoch": 0.07977356369891647, "grad_norm": 1.0753681659698486, "learning_rate": 1e-05, "loss": 0.2363, "step": 23280 }, { "epoch": 0.07980783069363251, "grad_norm": 1.0712649822235107, "learning_rate": 1e-05, "loss": 0.2321, "step": 23290 }, { "epoch": 0.07984209768834853, "grad_norm": 0.9887093305587769, "learning_rate": 1e-05, "loss": 0.2194, "step": 23300 }, { "epoch": 0.07987636468306457, "grad_norm": 1.050628900527954, "learning_rate": 1e-05, "loss": 0.2413, "step": 23310 }, { "epoch": 0.07991063167778059, "grad_norm": 1.0947946310043335, "learning_rate": 1e-05, "loss": 0.2116, "step": 23320 }, { "epoch": 0.07994489867249663, "grad_norm": 0.9869987368583679, "learning_rate": 1e-05, "loss": 0.2355, "step": 23330 }, { "epoch": 0.07997916566721265, "grad_norm": 0.9494191408157349, "learning_rate": 1e-05, "loss": 0.2268, "step": 23340 }, { "epoch": 0.08001343266192869, "grad_norm": 1.2696855068206787, "learning_rate": 1e-05, "loss": 0.2252, "step": 23350 }, { "epoch": 0.08004769965664471, "grad_norm": 1.2440532445907593, "learning_rate": 1e-05, "loss": 0.2424, "step": 23360 }, { "epoch": 0.08008196665136075, "grad_norm": 1.074804425239563, "learning_rate": 1e-05, "loss": 0.2259, "step": 23370 }, { "epoch": 0.08011623364607677, "grad_norm": 1.2430806159973145, "learning_rate": 1e-05, "loss": 0.2393, "step": 23380 }, { "epoch": 0.08015050064079281, "grad_norm": 1.1680575609207153, "learning_rate": 1e-05, "loss": 0.2316, "step": 23390 }, { "epoch": 0.08018476763550883, "grad_norm": 1.1257153749465942, "learning_rate": 1e-05, "loss": 0.2395, "step": 23400 }, { "epoch": 0.08021903463022487, "grad_norm": 1.1165947914123535, "learning_rate": 1e-05, "loss": 0.2368, "step": 23410 }, { "epoch": 0.08025330162494089, "grad_norm": 1.2653453350067139, "learning_rate": 1e-05, "loss": 0.235, "step": 23420 }, { "epoch": 0.08028756861965691, "grad_norm": 1.064660906791687, "learning_rate": 1e-05, "loss": 0.2463, "step": 23430 }, { "epoch": 0.08032183561437295, "grad_norm": 1.0772967338562012, "learning_rate": 1e-05, "loss": 0.2351, "step": 23440 }, { "epoch": 0.08035610260908897, "grad_norm": 1.1206378936767578, "learning_rate": 1e-05, "loss": 0.2302, "step": 23450 }, { "epoch": 0.08039036960380501, "grad_norm": 0.9457486271858215, "learning_rate": 1e-05, "loss": 0.2412, "step": 23460 }, { "epoch": 0.08042463659852103, "grad_norm": 1.0531508922576904, "learning_rate": 1e-05, "loss": 0.245, "step": 23470 }, { "epoch": 0.08045890359323707, "grad_norm": 1.0849158763885498, "learning_rate": 1e-05, "loss": 0.2377, "step": 23480 }, { "epoch": 0.08049317058795309, "grad_norm": 1.1281490325927734, "learning_rate": 1e-05, "loss": 0.2565, "step": 23490 }, { "epoch": 0.08052743758266913, "grad_norm": 1.111106276512146, "learning_rate": 1e-05, "loss": 0.2303, "step": 23500 }, { "epoch": 0.08052743758266913, "eval_cer": 12.180278708766979, "eval_loss": 0.24189412593841553, "eval_normalized_cer": 8.733013589128696, "eval_runtime": 228.3161, "eval_samples_per_second": 2.243, "eval_steps_per_second": 0.035, "step": 23500 }, { "epoch": 0.08056170457738515, "grad_norm": 1.0803686380386353, "learning_rate": 1e-05, "loss": 0.217, "step": 23510 }, { "epoch": 0.08059597157210119, "grad_norm": 1.0238206386566162, "learning_rate": 1e-05, "loss": 0.2235, "step": 23520 }, { "epoch": 0.08063023856681721, "grad_norm": 1.2258280515670776, "learning_rate": 1e-05, "loss": 0.2355, "step": 23530 }, { "epoch": 0.08066450556153325, "grad_norm": 1.0921289920806885, "learning_rate": 1e-05, "loss": 0.2277, "step": 23540 }, { "epoch": 0.08069877255624927, "grad_norm": 0.9965620636940002, "learning_rate": 1e-05, "loss": 0.2344, "step": 23550 }, { "epoch": 0.08073303955096531, "grad_norm": 1.0104345083236694, "learning_rate": 1e-05, "loss": 0.2327, "step": 23560 }, { "epoch": 0.08076730654568133, "grad_norm": 1.0309514999389648, "learning_rate": 1e-05, "loss": 0.2498, "step": 23570 }, { "epoch": 0.08080157354039735, "grad_norm": 0.9816809892654419, "learning_rate": 1e-05, "loss": 0.236, "step": 23580 }, { "epoch": 0.08083584053511339, "grad_norm": 0.9675121903419495, "learning_rate": 1e-05, "loss": 0.2193, "step": 23590 }, { "epoch": 0.08087010752982941, "grad_norm": 0.9846246838569641, "learning_rate": 1e-05, "loss": 0.2478, "step": 23600 }, { "epoch": 0.08090437452454545, "grad_norm": 1.154410719871521, "learning_rate": 1e-05, "loss": 0.2506, "step": 23610 }, { "epoch": 0.08093864151926147, "grad_norm": 1.148653268814087, "learning_rate": 1e-05, "loss": 0.2418, "step": 23620 }, { "epoch": 0.08097290851397751, "grad_norm": 1.1022738218307495, "learning_rate": 1e-05, "loss": 0.2348, "step": 23630 }, { "epoch": 0.08100717550869353, "grad_norm": 1.2160323858261108, "learning_rate": 1e-05, "loss": 0.2563, "step": 23640 }, { "epoch": 0.08104144250340957, "grad_norm": 1.0583524703979492, "learning_rate": 1e-05, "loss": 0.2244, "step": 23650 }, { "epoch": 0.08107570949812559, "grad_norm": 1.0356570482254028, "learning_rate": 1e-05, "loss": 0.242, "step": 23660 }, { "epoch": 0.08110997649284163, "grad_norm": 1.0162861347198486, "learning_rate": 1e-05, "loss": 0.2675, "step": 23670 }, { "epoch": 0.08114424348755765, "grad_norm": 1.1000967025756836, "learning_rate": 1e-05, "loss": 0.2326, "step": 23680 }, { "epoch": 0.08117851048227369, "grad_norm": 1.1410856246948242, "learning_rate": 1e-05, "loss": 0.2464, "step": 23690 }, { "epoch": 0.08121277747698971, "grad_norm": 1.1000715494155884, "learning_rate": 1e-05, "loss": 0.256, "step": 23700 }, { "epoch": 0.08124704447170575, "grad_norm": 1.1132537126541138, "learning_rate": 1e-05, "loss": 0.2416, "step": 23710 }, { "epoch": 0.08128131146642177, "grad_norm": 1.1105347871780396, "learning_rate": 1e-05, "loss": 0.2533, "step": 23720 }, { "epoch": 0.08131557846113781, "grad_norm": 1.1548352241516113, "learning_rate": 1e-05, "loss": 0.2426, "step": 23730 }, { "epoch": 0.08134984545585383, "grad_norm": 1.0594673156738281, "learning_rate": 1e-05, "loss": 0.2435, "step": 23740 }, { "epoch": 0.08138411245056985, "grad_norm": 1.050610065460205, "learning_rate": 1e-05, "loss": 0.244, "step": 23750 }, { "epoch": 0.08141837944528589, "grad_norm": 1.1562016010284424, "learning_rate": 1e-05, "loss": 0.2439, "step": 23760 }, { "epoch": 0.08145264644000191, "grad_norm": 0.9773512482643127, "learning_rate": 1e-05, "loss": 0.2409, "step": 23770 }, { "epoch": 0.08148691343471795, "grad_norm": 1.160895586013794, "learning_rate": 1e-05, "loss": 0.256, "step": 23780 }, { "epoch": 0.08152118042943397, "grad_norm": 1.017398476600647, "learning_rate": 1e-05, "loss": 0.2429, "step": 23790 }, { "epoch": 0.08155544742415001, "grad_norm": 1.0898586511611938, "learning_rate": 1e-05, "loss": 0.2478, "step": 23800 }, { "epoch": 0.08158971441886603, "grad_norm": 1.2312469482421875, "learning_rate": 1e-05, "loss": 0.2474, "step": 23810 }, { "epoch": 0.08162398141358207, "grad_norm": 1.0122491121292114, "learning_rate": 1e-05, "loss": 0.2463, "step": 23820 }, { "epoch": 0.08165824840829809, "grad_norm": 1.2574313879013062, "learning_rate": 1e-05, "loss": 0.2382, "step": 23830 }, { "epoch": 0.08169251540301413, "grad_norm": 1.0722161531448364, "learning_rate": 1e-05, "loss": 0.2337, "step": 23840 }, { "epoch": 0.08172678239773015, "grad_norm": 1.1575268507003784, "learning_rate": 1e-05, "loss": 0.2497, "step": 23850 }, { "epoch": 0.08176104939244619, "grad_norm": 1.1297662258148193, "learning_rate": 1e-05, "loss": 0.2474, "step": 23860 }, { "epoch": 0.08179531638716221, "grad_norm": 0.9983540773391724, "learning_rate": 1e-05, "loss": 0.2477, "step": 23870 }, { "epoch": 0.08182958338187825, "grad_norm": 0.9470457434654236, "learning_rate": 1e-05, "loss": 0.2475, "step": 23880 }, { "epoch": 0.08186385037659427, "grad_norm": 1.1658211946487427, "learning_rate": 1e-05, "loss": 0.2746, "step": 23890 }, { "epoch": 0.0818981173713103, "grad_norm": 1.1910369396209717, "learning_rate": 1e-05, "loss": 0.2558, "step": 23900 }, { "epoch": 0.08193238436602633, "grad_norm": 1.0597240924835205, "learning_rate": 1e-05, "loss": 0.2325, "step": 23910 }, { "epoch": 0.08196665136074235, "grad_norm": 1.0382158756256104, "learning_rate": 1e-05, "loss": 0.2413, "step": 23920 }, { "epoch": 0.08200091835545839, "grad_norm": 1.107107400894165, "learning_rate": 1e-05, "loss": 0.2505, "step": 23930 }, { "epoch": 0.08203518535017441, "grad_norm": 1.1651002168655396, "learning_rate": 1e-05, "loss": 0.2391, "step": 23940 }, { "epoch": 0.08206945234489045, "grad_norm": 1.079775333404541, "learning_rate": 1e-05, "loss": 0.2651, "step": 23950 }, { "epoch": 0.08210371933960647, "grad_norm": 0.9570854902267456, "learning_rate": 1e-05, "loss": 0.2501, "step": 23960 }, { "epoch": 0.08213798633432251, "grad_norm": 1.0310214757919312, "learning_rate": 1e-05, "loss": 0.2486, "step": 23970 }, { "epoch": 0.08217225332903853, "grad_norm": 1.045013189315796, "learning_rate": 1e-05, "loss": 0.2494, "step": 23980 }, { "epoch": 0.08220652032375457, "grad_norm": 1.0111870765686035, "learning_rate": 1e-05, "loss": 0.2707, "step": 23990 }, { "epoch": 0.08224078731847059, "grad_norm": 1.1841882467269897, "learning_rate": 1e-05, "loss": 0.2536, "step": 24000 }, { "epoch": 0.08224078731847059, "eval_cer": 12.621273593226318, "eval_loss": 0.24069419503211975, "eval_normalized_cer": 8.93285371702638, "eval_runtime": 226.7251, "eval_samples_per_second": 2.258, "eval_steps_per_second": 0.035, "step": 24000 }, { "epoch": 0.08227505431318663, "grad_norm": 1.059084177017212, "learning_rate": 1e-05, "loss": 0.2416, "step": 24010 }, { "epoch": 0.08230932130790265, "grad_norm": 1.10452401638031, "learning_rate": 1e-05, "loss": 0.2486, "step": 24020 }, { "epoch": 0.08234358830261869, "grad_norm": 1.1392238140106201, "learning_rate": 1e-05, "loss": 0.2594, "step": 24030 }, { "epoch": 0.08237785529733471, "grad_norm": 1.1955091953277588, "learning_rate": 1e-05, "loss": 0.2607, "step": 24040 }, { "epoch": 0.08241212229205075, "grad_norm": 0.9922770857810974, "learning_rate": 1e-05, "loss": 0.2393, "step": 24050 }, { "epoch": 0.08244638928676677, "grad_norm": 1.0853592157363892, "learning_rate": 1e-05, "loss": 0.253, "step": 24060 }, { "epoch": 0.0824806562814828, "grad_norm": 1.1436183452606201, "learning_rate": 1e-05, "loss": 0.2404, "step": 24070 }, { "epoch": 0.08251492327619883, "grad_norm": 1.0038506984710693, "learning_rate": 1e-05, "loss": 0.2452, "step": 24080 }, { "epoch": 0.08254919027091485, "grad_norm": 1.1340346336364746, "learning_rate": 1e-05, "loss": 0.2395, "step": 24090 }, { "epoch": 0.08258345726563089, "grad_norm": 1.2182914018630981, "learning_rate": 1e-05, "loss": 0.2409, "step": 24100 }, { "epoch": 0.08261772426034691, "grad_norm": 1.1426423788070679, "learning_rate": 1e-05, "loss": 0.2406, "step": 24110 }, { "epoch": 0.08265199125506295, "grad_norm": 0.9850037097930908, "learning_rate": 1e-05, "loss": 0.2216, "step": 24120 }, { "epoch": 0.08268625824977897, "grad_norm": 1.177119255065918, "learning_rate": 1e-05, "loss": 0.2389, "step": 24130 }, { "epoch": 0.08272052524449501, "grad_norm": 1.1432584524154663, "learning_rate": 1e-05, "loss": 0.2545, "step": 24140 }, { "epoch": 0.08275479223921103, "grad_norm": 1.0530070066452026, "learning_rate": 1e-05, "loss": 0.2511, "step": 24150 }, { "epoch": 0.08278905923392707, "grad_norm": 1.0670721530914307, "learning_rate": 1e-05, "loss": 0.2393, "step": 24160 }, { "epoch": 0.0828233262286431, "grad_norm": 1.0654528141021729, "learning_rate": 1e-05, "loss": 0.2453, "step": 24170 }, { "epoch": 0.08285759322335913, "grad_norm": 1.0833876132965088, "learning_rate": 1e-05, "loss": 0.2427, "step": 24180 }, { "epoch": 0.08289186021807515, "grad_norm": 1.203636646270752, "learning_rate": 1e-05, "loss": 0.2445, "step": 24190 }, { "epoch": 0.08292612721279119, "grad_norm": 1.0569822788238525, "learning_rate": 1e-05, "loss": 0.2527, "step": 24200 }, { "epoch": 0.08296039420750721, "grad_norm": 1.0819365978240967, "learning_rate": 1e-05, "loss": 0.2491, "step": 24210 }, { "epoch": 0.08299466120222325, "grad_norm": 0.956221878528595, "learning_rate": 1e-05, "loss": 0.2411, "step": 24220 }, { "epoch": 0.08302892819693927, "grad_norm": 1.1548758745193481, "learning_rate": 1e-05, "loss": 0.2466, "step": 24230 }, { "epoch": 0.0830631951916553, "grad_norm": 0.9903671741485596, "learning_rate": 1e-05, "loss": 0.2483, "step": 24240 }, { "epoch": 0.08309746218637133, "grad_norm": 1.0387729406356812, "learning_rate": 1e-05, "loss": 0.2372, "step": 24250 }, { "epoch": 0.08313172918108735, "grad_norm": 1.15946626663208, "learning_rate": 1e-05, "loss": 0.2499, "step": 24260 }, { "epoch": 0.08316599617580339, "grad_norm": 1.1431846618652344, "learning_rate": 1e-05, "loss": 0.2379, "step": 24270 }, { "epoch": 0.08320026317051941, "grad_norm": 1.1871981620788574, "learning_rate": 1e-05, "loss": 0.2366, "step": 24280 }, { "epoch": 0.08323453016523545, "grad_norm": 1.285339117050171, "learning_rate": 1e-05, "loss": 0.2408, "step": 24290 }, { "epoch": 0.08326879715995147, "grad_norm": 0.9946316480636597, "learning_rate": 1e-05, "loss": 0.2258, "step": 24300 }, { "epoch": 0.08330306415466751, "grad_norm": 1.0830858945846558, "learning_rate": 1e-05, "loss": 0.243, "step": 24310 }, { "epoch": 0.08333733114938353, "grad_norm": 1.1845496892929077, "learning_rate": 1e-05, "loss": 0.2384, "step": 24320 }, { "epoch": 0.08337159814409957, "grad_norm": 0.9513915777206421, "learning_rate": 1e-05, "loss": 0.247, "step": 24330 }, { "epoch": 0.0834058651388156, "grad_norm": 1.101516604423523, "learning_rate": 1e-05, "loss": 0.2642, "step": 24340 }, { "epoch": 0.08344013213353163, "grad_norm": 1.2533254623413086, "learning_rate": 1e-05, "loss": 0.2462, "step": 24350 }, { "epoch": 0.08347439912824765, "grad_norm": 1.1113494634628296, "learning_rate": 1e-05, "loss": 0.2542, "step": 24360 }, { "epoch": 0.08350866612296369, "grad_norm": 1.0338751077651978, "learning_rate": 1e-05, "loss": 0.2364, "step": 24370 }, { "epoch": 0.08354293311767971, "grad_norm": 1.3761131763458252, "learning_rate": 1e-05, "loss": 0.2484, "step": 24380 }, { "epoch": 0.08357720011239574, "grad_norm": 1.1954588890075684, "learning_rate": 1e-05, "loss": 0.2618, "step": 24390 }, { "epoch": 0.08361146710711177, "grad_norm": 1.1829007863998413, "learning_rate": 1e-05, "loss": 0.2477, "step": 24400 }, { "epoch": 0.0836457341018278, "grad_norm": 1.0606454610824585, "learning_rate": 1e-05, "loss": 0.2591, "step": 24410 }, { "epoch": 0.08368000109654383, "grad_norm": 1.1648555994033813, "learning_rate": 1e-05, "loss": 0.2381, "step": 24420 }, { "epoch": 0.08371426809125986, "grad_norm": 1.1213902235031128, "learning_rate": 1e-05, "loss": 0.2335, "step": 24430 }, { "epoch": 0.08374853508597589, "grad_norm": 1.0329564809799194, "learning_rate": 1e-05, "loss": 0.2505, "step": 24440 }, { "epoch": 0.08378280208069191, "grad_norm": 1.0963842868804932, "learning_rate": 1e-05, "loss": 0.2465, "step": 24450 }, { "epoch": 0.08381706907540795, "grad_norm": 1.1725986003875732, "learning_rate": 1e-05, "loss": 0.2599, "step": 24460 }, { "epoch": 0.08385133607012397, "grad_norm": 1.1225870847702026, "learning_rate": 1e-05, "loss": 0.2319, "step": 24470 }, { "epoch": 0.08388560306484001, "grad_norm": 1.0629470348358154, "learning_rate": 1e-05, "loss": 0.2599, "step": 24480 }, { "epoch": 0.08391987005955603, "grad_norm": 1.012823462486267, "learning_rate": 1e-05, "loss": 0.2225, "step": 24490 }, { "epoch": 0.08395413705427207, "grad_norm": 1.0204377174377441, "learning_rate": 1e-05, "loss": 0.2512, "step": 24500 }, { "epoch": 0.08395413705427207, "eval_cer": 12.515434820956076, "eval_loss": 0.2421187460422516, "eval_normalized_cer": 8.792965627498003, "eval_runtime": 226.7851, "eval_samples_per_second": 2.258, "eval_steps_per_second": 0.035, "step": 24500 } ], "logging_steps": 10, "max_steps": 291826, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.96759625220096e+21, "train_batch_size": 128, "trial_name": null, "trial_params": null }