diff --git "a/checkpoint-8500/trainer_state.json" "b/checkpoint-8500/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-8500/trainer_state.json" @@ -0,0 +1,6153 @@ +{ + "best_metric": 8.912869704236611, + "best_model_checkpoint": "kotoba_v2_enc_logs_epoch2_2/checkpoint-4000", + "epoch": 0.029126945508625, + "eval_steps": 500, + "global_step": 8500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 3.4266994716029415e-05, + "grad_norm": 1.0561553239822388, + "learning_rate": 1e-05, + "loss": 0.2361, + "step": 10 + }, + { + "epoch": 6.853398943205883e-05, + "grad_norm": 1.1626238822937012, + "learning_rate": 1e-05, + "loss": 0.2265, + "step": 20 + }, + { + "epoch": 0.00010280098414808825, + "grad_norm": 0.9845689535140991, + "learning_rate": 1e-05, + "loss": 0.2279, + "step": 30 + }, + { + "epoch": 0.00013706797886411766, + "grad_norm": 1.142356276512146, + "learning_rate": 1e-05, + "loss": 0.2382, + "step": 40 + }, + { + "epoch": 0.00017133497358014707, + "grad_norm": 1.0053240060806274, + "learning_rate": 1e-05, + "loss": 0.2473, + "step": 50 + }, + { + "epoch": 0.0002056019682961765, + "grad_norm": 1.1098105907440186, + "learning_rate": 1e-05, + "loss": 0.2438, + "step": 60 + }, + { + "epoch": 0.0002398689630122059, + "grad_norm": 1.191983699798584, + "learning_rate": 1e-05, + "loss": 0.2293, + "step": 70 + }, + { + "epoch": 0.0002741359577282353, + "grad_norm": 1.1295104026794434, + "learning_rate": 1e-05, + "loss": 0.2362, + "step": 80 + }, + { + "epoch": 0.0003084029524442647, + "grad_norm": 1.037972092628479, + "learning_rate": 1e-05, + "loss": 0.2455, + "step": 90 + }, + { + "epoch": 0.00034266994716029413, + "grad_norm": 1.1975648403167725, + "learning_rate": 1e-05, + "loss": 0.2459, + "step": 100 + }, + { + "epoch": 0.00037693694187632354, + "grad_norm": 1.0676342248916626, + "learning_rate": 1e-05, + "loss": 0.2271, + "step": 110 + }, + { + "epoch": 0.000411203936592353, + "grad_norm": 1.0749495029449463, + "learning_rate": 1e-05, + "loss": 0.2417, + "step": 120 + }, + { + "epoch": 0.0004454709313083824, + "grad_norm": 1.094260811805725, + "learning_rate": 1e-05, + "loss": 0.2354, + "step": 130 + }, + { + "epoch": 0.0004797379260244118, + "grad_norm": 1.0395853519439697, + "learning_rate": 1e-05, + "loss": 0.2381, + "step": 140 + }, + { + "epoch": 0.0005140049207404412, + "grad_norm": 1.2008885145187378, + "learning_rate": 1e-05, + "loss": 0.2354, + "step": 150 + }, + { + "epoch": 0.0005482719154564706, + "grad_norm": 1.0647832155227661, + "learning_rate": 1e-05, + "loss": 0.2321, + "step": 160 + }, + { + "epoch": 0.0005825389101725, + "grad_norm": 1.327071189880371, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 170 + }, + { + "epoch": 0.0006168059048885295, + "grad_norm": 1.1184055805206299, + "learning_rate": 1e-05, + "loss": 0.2242, + "step": 180 + }, + { + "epoch": 0.0006510728996045589, + "grad_norm": 1.2512784004211426, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 190 + }, + { + "epoch": 0.0006853398943205883, + "grad_norm": 1.0614465475082397, + "learning_rate": 1e-05, + "loss": 0.2382, + "step": 200 + }, + { + "epoch": 0.0007196068890366177, + "grad_norm": 1.0607149600982666, + "learning_rate": 1e-05, + "loss": 0.2381, + "step": 210 + }, + { + "epoch": 0.0007538738837526471, + "grad_norm": 1.0422028303146362, + "learning_rate": 1e-05, + "loss": 0.2294, + "step": 220 + }, + { + "epoch": 0.0007881408784686765, + "grad_norm": 1.0162984132766724, + "learning_rate": 1e-05, + "loss": 0.2275, + "step": 230 + }, + { + "epoch": 0.000822407873184706, + "grad_norm": 1.1085543632507324, + "learning_rate": 1e-05, + "loss": 0.2161, + "step": 240 + }, + { + "epoch": 0.0008566748679007354, + "grad_norm": 1.1854636669158936, + "learning_rate": 1e-05, + "loss": 0.2382, + "step": 250 + }, + { + "epoch": 0.0008909418626167648, + "grad_norm": 1.40137779712677, + "learning_rate": 1e-05, + "loss": 0.2579, + "step": 260 + }, + { + "epoch": 0.0009252088573327942, + "grad_norm": 1.0814112424850464, + "learning_rate": 1e-05, + "loss": 0.2612, + "step": 270 + }, + { + "epoch": 0.0009594758520488236, + "grad_norm": 1.083736538887024, + "learning_rate": 1e-05, + "loss": 0.2711, + "step": 280 + }, + { + "epoch": 0.000993742846764853, + "grad_norm": 1.0861411094665527, + "learning_rate": 1e-05, + "loss": 0.2642, + "step": 290 + }, + { + "epoch": 0.0010280098414808825, + "grad_norm": 1.1141265630722046, + "learning_rate": 1e-05, + "loss": 0.2585, + "step": 300 + }, + { + "epoch": 0.0010622768361969119, + "grad_norm": 1.326241374015808, + "learning_rate": 1e-05, + "loss": 0.2858, + "step": 310 + }, + { + "epoch": 0.0010965438309129413, + "grad_norm": 1.393750786781311, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 320 + }, + { + "epoch": 0.0011308108256289707, + "grad_norm": 1.0851459503173828, + "learning_rate": 1e-05, + "loss": 0.2565, + "step": 330 + }, + { + "epoch": 0.001165077820345, + "grad_norm": 1.2323757410049438, + "learning_rate": 1e-05, + "loss": 0.2465, + "step": 340 + }, + { + "epoch": 0.0011993448150610295, + "grad_norm": 1.376953125, + "learning_rate": 1e-05, + "loss": 0.2671, + "step": 350 + }, + { + "epoch": 0.001233611809777059, + "grad_norm": 1.084592580795288, + "learning_rate": 1e-05, + "loss": 0.2643, + "step": 360 + }, + { + "epoch": 0.0012678788044930883, + "grad_norm": 1.2907005548477173, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 370 + }, + { + "epoch": 0.0013021457992091177, + "grad_norm": 1.0698130130767822, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 380 + }, + { + "epoch": 0.0013364127939251471, + "grad_norm": 1.1399807929992676, + "learning_rate": 1e-05, + "loss": 0.2759, + "step": 390 + }, + { + "epoch": 0.0013706797886411765, + "grad_norm": 1.1480791568756104, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 400 + }, + { + "epoch": 0.001404946783357206, + "grad_norm": 1.3095237016677856, + "learning_rate": 1e-05, + "loss": 0.2536, + "step": 410 + }, + { + "epoch": 0.0014392137780732353, + "grad_norm": 1.068246841430664, + "learning_rate": 1e-05, + "loss": 0.2604, + "step": 420 + }, + { + "epoch": 0.0014734807727892648, + "grad_norm": 1.2310419082641602, + "learning_rate": 1e-05, + "loss": 0.2632, + "step": 430 + }, + { + "epoch": 0.0015077477675052942, + "grad_norm": 1.161867380142212, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 440 + }, + { + "epoch": 0.0015420147622213236, + "grad_norm": 1.1461217403411865, + "learning_rate": 1e-05, + "loss": 0.2592, + "step": 450 + }, + { + "epoch": 0.001576281756937353, + "grad_norm": 1.3006030321121216, + "learning_rate": 1e-05, + "loss": 0.2607, + "step": 460 + }, + { + "epoch": 0.0016105487516533824, + "grad_norm": 1.1223125457763672, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 470 + }, + { + "epoch": 0.001644815746369412, + "grad_norm": 1.2909380197525024, + "learning_rate": 1e-05, + "loss": 0.2693, + "step": 480 + }, + { + "epoch": 0.0016790827410854414, + "grad_norm": 1.2270597219467163, + "learning_rate": 1e-05, + "loss": 0.2661, + "step": 490 + }, + { + "epoch": 0.0017133497358014708, + "grad_norm": 1.1439770460128784, + "learning_rate": 1e-05, + "loss": 0.2517, + "step": 500 + }, + { + "epoch": 0.0017133497358014708, + "eval_cer": 13.0358087846181, + "eval_loss": 0.25224336981773376, + "eval_normalized_cer": 9.4224620303757, + "eval_runtime": 227.2174, + "eval_samples_per_second": 2.253, + "eval_steps_per_second": 0.035, + "step": 500 + }, + { + "epoch": 0.0017476167305175002, + "grad_norm": 1.1377454996109009, + "learning_rate": 1e-05, + "loss": 0.2579, + "step": 510 + }, + { + "epoch": 0.0017818837252335296, + "grad_norm": 1.2096498012542725, + "learning_rate": 1e-05, + "loss": 0.2727, + "step": 520 + }, + { + "epoch": 0.001816150719949559, + "grad_norm": 1.187213659286499, + "learning_rate": 1e-05, + "loss": 0.2562, + "step": 530 + }, + { + "epoch": 0.0018504177146655885, + "grad_norm": 0.969393253326416, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 540 + }, + { + "epoch": 0.0018846847093816179, + "grad_norm": 0.9745528697967529, + "learning_rate": 1e-05, + "loss": 0.2774, + "step": 550 + }, + { + "epoch": 0.0019189517040976473, + "grad_norm": 1.0725352764129639, + "learning_rate": 1e-05, + "loss": 0.2541, + "step": 560 + }, + { + "epoch": 0.0019532186988136767, + "grad_norm": 1.217871904373169, + "learning_rate": 1e-05, + "loss": 0.2395, + "step": 570 + }, + { + "epoch": 0.001987485693529706, + "grad_norm": 1.3582627773284912, + "learning_rate": 1e-05, + "loss": 0.2594, + "step": 580 + }, + { + "epoch": 0.0020217526882457355, + "grad_norm": 1.2415379285812378, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 590 + }, + { + "epoch": 0.002056019682961765, + "grad_norm": 0.9810131192207336, + "learning_rate": 1e-05, + "loss": 0.2284, + "step": 600 + }, + { + "epoch": 0.0020902866776777943, + "grad_norm": 0.9806564450263977, + "learning_rate": 1e-05, + "loss": 0.2688, + "step": 610 + }, + { + "epoch": 0.0021245536723938237, + "grad_norm": 1.2755467891693115, + "learning_rate": 1e-05, + "loss": 0.2591, + "step": 620 + }, + { + "epoch": 0.002158820667109853, + "grad_norm": 0.9300326704978943, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 630 + }, + { + "epoch": 0.0021930876618258825, + "grad_norm": 1.1276524066925049, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 640 + }, + { + "epoch": 0.002227354656541912, + "grad_norm": 1.1786876916885376, + "learning_rate": 1e-05, + "loss": 0.2443, + "step": 650 + }, + { + "epoch": 0.0022616216512579414, + "grad_norm": 1.1702712774276733, + "learning_rate": 1e-05, + "loss": 0.2627, + "step": 660 + }, + { + "epoch": 0.0022958886459739708, + "grad_norm": 1.2837899923324585, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 670 + }, + { + "epoch": 0.00233015564069, + "grad_norm": 1.0623608827590942, + "learning_rate": 1e-05, + "loss": 0.2491, + "step": 680 + }, + { + "epoch": 0.0023644226354060296, + "grad_norm": 1.1288243532180786, + "learning_rate": 1e-05, + "loss": 0.2773, + "step": 690 + }, + { + "epoch": 0.002398689630122059, + "grad_norm": 1.0192692279815674, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 700 + }, + { + "epoch": 0.0024329566248380884, + "grad_norm": 1.2274680137634277, + "learning_rate": 1e-05, + "loss": 0.2345, + "step": 710 + }, + { + "epoch": 0.002467223619554118, + "grad_norm": 1.240645170211792, + "learning_rate": 1e-05, + "loss": 0.2624, + "step": 720 + }, + { + "epoch": 0.002501490614270147, + "grad_norm": 1.0681366920471191, + "learning_rate": 1e-05, + "loss": 0.2553, + "step": 730 + }, + { + "epoch": 0.0025357576089861766, + "grad_norm": 1.0161867141723633, + "learning_rate": 1e-05, + "loss": 0.2547, + "step": 740 + }, + { + "epoch": 0.002570024603702206, + "grad_norm": 1.2384017705917358, + "learning_rate": 1e-05, + "loss": 0.2449, + "step": 750 + }, + { + "epoch": 0.0026042915984182354, + "grad_norm": 1.1739261150360107, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 760 + }, + { + "epoch": 0.002638558593134265, + "grad_norm": 1.0396535396575928, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 770 + }, + { + "epoch": 0.0026728255878502943, + "grad_norm": 1.14767324924469, + "learning_rate": 1e-05, + "loss": 0.2594, + "step": 780 + }, + { + "epoch": 0.0027070925825663237, + "grad_norm": 1.1783303022384644, + "learning_rate": 1e-05, + "loss": 0.2546, + "step": 790 + }, + { + "epoch": 0.002741359577282353, + "grad_norm": 1.1065645217895508, + "learning_rate": 1e-05, + "loss": 0.2547, + "step": 800 + }, + { + "epoch": 0.0027756265719983825, + "grad_norm": 1.256645917892456, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 810 + }, + { + "epoch": 0.002809893566714412, + "grad_norm": 1.058158278465271, + "learning_rate": 1e-05, + "loss": 0.257, + "step": 820 + }, + { + "epoch": 0.0028441605614304413, + "grad_norm": 1.0647656917572021, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 830 + }, + { + "epoch": 0.0028784275561464707, + "grad_norm": 1.1984691619873047, + "learning_rate": 1e-05, + "loss": 0.2503, + "step": 840 + }, + { + "epoch": 0.0029126945508625, + "grad_norm": 1.1380070447921753, + "learning_rate": 1e-05, + "loss": 0.245, + "step": 850 + }, + { + "epoch": 0.0029469615455785295, + "grad_norm": 1.2131065130233765, + "learning_rate": 1e-05, + "loss": 0.242, + "step": 860 + }, + { + "epoch": 0.002981228540294559, + "grad_norm": 1.1822234392166138, + "learning_rate": 1e-05, + "loss": 0.2613, + "step": 870 + }, + { + "epoch": 0.0030154955350105883, + "grad_norm": 1.0591018199920654, + "learning_rate": 1e-05, + "loss": 0.2654, + "step": 880 + }, + { + "epoch": 0.0030497625297266177, + "grad_norm": 1.2318428754806519, + "learning_rate": 1e-05, + "loss": 0.2525, + "step": 890 + }, + { + "epoch": 0.003084029524442647, + "grad_norm": 1.0146839618682861, + "learning_rate": 1e-05, + "loss": 0.2609, + "step": 900 + }, + { + "epoch": 0.0031182965191586766, + "grad_norm": 1.1508561372756958, + "learning_rate": 1e-05, + "loss": 0.2541, + "step": 910 + }, + { + "epoch": 0.003152563513874706, + "grad_norm": 1.1494849920272827, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 920 + }, + { + "epoch": 0.0031868305085907354, + "grad_norm": 1.2423807382583618, + "learning_rate": 1e-05, + "loss": 0.2573, + "step": 930 + }, + { + "epoch": 0.0032210975033067648, + "grad_norm": 1.2714438438415527, + "learning_rate": 1e-05, + "loss": 0.2545, + "step": 940 + }, + { + "epoch": 0.0032553644980227946, + "grad_norm": 1.2088007926940918, + "learning_rate": 1e-05, + "loss": 0.2773, + "step": 950 + }, + { + "epoch": 0.003289631492738824, + "grad_norm": 1.0737963914871216, + "learning_rate": 1e-05, + "loss": 0.2495, + "step": 960 + }, + { + "epoch": 0.0033238984874548534, + "grad_norm": 1.0942472219467163, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 970 + }, + { + "epoch": 0.003358165482170883, + "grad_norm": 1.1282986402511597, + "learning_rate": 1e-05, + "loss": 0.2638, + "step": 980 + }, + { + "epoch": 0.0033924324768869123, + "grad_norm": 1.0762425661087036, + "learning_rate": 1e-05, + "loss": 0.2619, + "step": 990 + }, + { + "epoch": 0.0034266994716029417, + "grad_norm": 1.09200119972229, + "learning_rate": 1e-05, + "loss": 0.2464, + "step": 1000 + }, + { + "epoch": 0.0034266994716029417, + "eval_cer": 13.80313988357735, + "eval_loss": 0.25397512316703796, + "eval_normalized_cer": 9.952038369304557, + "eval_runtime": 227.5088, + "eval_samples_per_second": 2.25, + "eval_steps_per_second": 0.035, + "step": 1000 + }, + { + "epoch": 0.003460966466318971, + "grad_norm": 0.9681844711303711, + "learning_rate": 1e-05, + "loss": 0.2567, + "step": 1010 + }, + { + "epoch": 0.0034952334610350005, + "grad_norm": 1.0064711570739746, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 1020 + }, + { + "epoch": 0.00352950045575103, + "grad_norm": 1.190294623374939, + "learning_rate": 1e-05, + "loss": 0.2654, + "step": 1030 + }, + { + "epoch": 0.0035637674504670593, + "grad_norm": 1.332492709159851, + "learning_rate": 1e-05, + "loss": 0.2725, + "step": 1040 + }, + { + "epoch": 0.0035980344451830887, + "grad_norm": 1.1110397577285767, + "learning_rate": 1e-05, + "loss": 0.2504, + "step": 1050 + }, + { + "epoch": 0.003632301439899118, + "grad_norm": 1.2327215671539307, + "learning_rate": 1e-05, + "loss": 0.2733, + "step": 1060 + }, + { + "epoch": 0.0036665684346151475, + "grad_norm": 1.1694815158843994, + "learning_rate": 1e-05, + "loss": 0.2611, + "step": 1070 + }, + { + "epoch": 0.003700835429331177, + "grad_norm": 1.212570309638977, + "learning_rate": 1e-05, + "loss": 0.2556, + "step": 1080 + }, + { + "epoch": 0.0037351024240472063, + "grad_norm": 1.1467297077178955, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 1090 + }, + { + "epoch": 0.0037693694187632357, + "grad_norm": 0.9628469347953796, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 1100 + }, + { + "epoch": 0.003803636413479265, + "grad_norm": 1.1593494415283203, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 1110 + }, + { + "epoch": 0.0038379034081952946, + "grad_norm": 1.1376386880874634, + "learning_rate": 1e-05, + "loss": 0.2504, + "step": 1120 + }, + { + "epoch": 0.003872170402911324, + "grad_norm": 1.129338026046753, + "learning_rate": 1e-05, + "loss": 0.2601, + "step": 1130 + }, + { + "epoch": 0.003906437397627353, + "grad_norm": 1.0889575481414795, + "learning_rate": 1e-05, + "loss": 0.2455, + "step": 1140 + }, + { + "epoch": 0.003940704392343382, + "grad_norm": 1.1437270641326904, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 1150 + }, + { + "epoch": 0.003974971387059412, + "grad_norm": 1.0283392667770386, + "learning_rate": 1e-05, + "loss": 0.2507, + "step": 1160 + }, + { + "epoch": 0.004009238381775441, + "grad_norm": 1.130747675895691, + "learning_rate": 1e-05, + "loss": 0.2715, + "step": 1170 + }, + { + "epoch": 0.004043505376491471, + "grad_norm": 1.3483778238296509, + "learning_rate": 1e-05, + "loss": 0.2742, + "step": 1180 + }, + { + "epoch": 0.0040777723712075, + "grad_norm": 1.0879924297332764, + "learning_rate": 1e-05, + "loss": 0.2641, + "step": 1190 + }, + { + "epoch": 0.00411203936592353, + "grad_norm": 1.1242927312850952, + "learning_rate": 1e-05, + "loss": 0.2586, + "step": 1200 + }, + { + "epoch": 0.004146306360639559, + "grad_norm": 1.0185858011245728, + "learning_rate": 1e-05, + "loss": 0.2465, + "step": 1210 + }, + { + "epoch": 0.004180573355355589, + "grad_norm": 0.9555259943008423, + "learning_rate": 1e-05, + "loss": 0.2528, + "step": 1220 + }, + { + "epoch": 0.004214840350071618, + "grad_norm": 1.210371971130371, + "learning_rate": 1e-05, + "loss": 0.2613, + "step": 1230 + }, + { + "epoch": 0.0042491073447876474, + "grad_norm": 1.1261368989944458, + "learning_rate": 1e-05, + "loss": 0.2551, + "step": 1240 + }, + { + "epoch": 0.004283374339503676, + "grad_norm": 1.2142603397369385, + "learning_rate": 1e-05, + "loss": 0.264, + "step": 1250 + }, + { + "epoch": 0.004317641334219706, + "grad_norm": 1.057758092880249, + "learning_rate": 1e-05, + "loss": 0.2587, + "step": 1260 + }, + { + "epoch": 0.004351908328935736, + "grad_norm": 1.0871245861053467, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 1270 + }, + { + "epoch": 0.004386175323651765, + "grad_norm": 1.1214648485183716, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 1280 + }, + { + "epoch": 0.004420442318367795, + "grad_norm": 1.0265707969665527, + "learning_rate": 1e-05, + "loss": 0.2123, + "step": 1290 + }, + { + "epoch": 0.004454709313083824, + "grad_norm": 1.1180216073989868, + "learning_rate": 1e-05, + "loss": 0.2245, + "step": 1300 + }, + { + "epoch": 0.004488976307799854, + "grad_norm": 1.028238296508789, + "learning_rate": 1e-05, + "loss": 0.2118, + "step": 1310 + }, + { + "epoch": 0.004523243302515883, + "grad_norm": 1.0321682691574097, + "learning_rate": 1e-05, + "loss": 0.2196, + "step": 1320 + }, + { + "epoch": 0.0045575102972319126, + "grad_norm": 1.1180269718170166, + "learning_rate": 1e-05, + "loss": 0.2403, + "step": 1330 + }, + { + "epoch": 0.0045917772919479415, + "grad_norm": 1.079560399055481, + "learning_rate": 1e-05, + "loss": 0.2309, + "step": 1340 + }, + { + "epoch": 0.004626044286663971, + "grad_norm": 1.0062284469604492, + "learning_rate": 1e-05, + "loss": 0.228, + "step": 1350 + }, + { + "epoch": 0.00466031128138, + "grad_norm": 1.1098395586013794, + "learning_rate": 1e-05, + "loss": 0.2435, + "step": 1360 + }, + { + "epoch": 0.00469457827609603, + "grad_norm": 1.0619688034057617, + "learning_rate": 1e-05, + "loss": 0.2342, + "step": 1370 + }, + { + "epoch": 0.004728845270812059, + "grad_norm": 1.1943925619125366, + "learning_rate": 1e-05, + "loss": 0.2315, + "step": 1380 + }, + { + "epoch": 0.004763112265528089, + "grad_norm": 1.0958552360534668, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 1390 + }, + { + "epoch": 0.004797379260244118, + "grad_norm": 1.0984197854995728, + "learning_rate": 1e-05, + "loss": 0.2208, + "step": 1400 + }, + { + "epoch": 0.004831646254960148, + "grad_norm": 1.0741859674453735, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 1410 + }, + { + "epoch": 0.004865913249676177, + "grad_norm": 1.1457058191299438, + "learning_rate": 1e-05, + "loss": 0.2516, + "step": 1420 + }, + { + "epoch": 0.004900180244392207, + "grad_norm": 0.9849014282226562, + "learning_rate": 1e-05, + "loss": 0.2406, + "step": 1430 + }, + { + "epoch": 0.004934447239108236, + "grad_norm": 1.1174912452697754, + "learning_rate": 1e-05, + "loss": 0.2122, + "step": 1440 + }, + { + "epoch": 0.0049687142338242654, + "grad_norm": 1.0292854309082031, + "learning_rate": 1e-05, + "loss": 0.2349, + "step": 1450 + }, + { + "epoch": 0.005002981228540294, + "grad_norm": 1.0343785285949707, + "learning_rate": 1e-05, + "loss": 0.2158, + "step": 1460 + }, + { + "epoch": 0.005037248223256324, + "grad_norm": 1.1178008317947388, + "learning_rate": 1e-05, + "loss": 0.2264, + "step": 1470 + }, + { + "epoch": 0.005071515217972353, + "grad_norm": 1.0238450765609741, + "learning_rate": 1e-05, + "loss": 0.2287, + "step": 1480 + }, + { + "epoch": 0.005105782212688383, + "grad_norm": 1.1728886365890503, + "learning_rate": 1e-05, + "loss": 0.2373, + "step": 1490 + }, + { + "epoch": 0.005140049207404412, + "grad_norm": 1.227034091949463, + "learning_rate": 1e-05, + "loss": 0.222, + "step": 1500 + }, + { + "epoch": 0.005140049207404412, + "eval_cer": 13.150467454577527, + "eval_loss": 0.25801682472229004, + "eval_normalized_cer": 9.452438049560353, + "eval_runtime": 227.9378, + "eval_samples_per_second": 2.246, + "eval_steps_per_second": 0.035, + "step": 1500 + }, + { + "epoch": 0.005174316202120442, + "grad_norm": 1.0703920125961304, + "learning_rate": 1e-05, + "loss": 0.2156, + "step": 1510 + }, + { + "epoch": 0.005208583196836471, + "grad_norm": 1.1343841552734375, + "learning_rate": 1e-05, + "loss": 0.2126, + "step": 1520 + }, + { + "epoch": 0.005242850191552501, + "grad_norm": 1.1743741035461426, + "learning_rate": 1e-05, + "loss": 0.2491, + "step": 1530 + }, + { + "epoch": 0.00527711718626853, + "grad_norm": 1.1476744413375854, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 1540 + }, + { + "epoch": 0.0053113841809845595, + "grad_norm": 1.0899590253829956, + "learning_rate": 1e-05, + "loss": 0.2361, + "step": 1550 + }, + { + "epoch": 0.0053456511757005885, + "grad_norm": 1.0281250476837158, + "learning_rate": 1e-05, + "loss": 0.2226, + "step": 1560 + }, + { + "epoch": 0.005379918170416618, + "grad_norm": 0.9932867884635925, + "learning_rate": 1e-05, + "loss": 0.2301, + "step": 1570 + }, + { + "epoch": 0.005414185165132647, + "grad_norm": 1.1992309093475342, + "learning_rate": 1e-05, + "loss": 0.2179, + "step": 1580 + }, + { + "epoch": 0.005448452159848677, + "grad_norm": 1.0017774105072021, + "learning_rate": 1e-05, + "loss": 0.2244, + "step": 1590 + }, + { + "epoch": 0.005482719154564706, + "grad_norm": 1.0827686786651611, + "learning_rate": 1e-05, + "loss": 0.2313, + "step": 1600 + }, + { + "epoch": 0.005516986149280736, + "grad_norm": 1.2260409593582153, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 1610 + }, + { + "epoch": 0.005551253143996765, + "grad_norm": 1.2530804872512817, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 1620 + }, + { + "epoch": 0.005585520138712795, + "grad_norm": 1.068452000617981, + "learning_rate": 1e-05, + "loss": 0.2138, + "step": 1630 + }, + { + "epoch": 0.005619787133428824, + "grad_norm": 1.3108712434768677, + "learning_rate": 1e-05, + "loss": 0.2284, + "step": 1640 + }, + { + "epoch": 0.005654054128144854, + "grad_norm": 1.0919209718704224, + "learning_rate": 1e-05, + "loss": 0.213, + "step": 1650 + }, + { + "epoch": 0.005688321122860883, + "grad_norm": 1.1530914306640625, + "learning_rate": 1e-05, + "loss": 0.2292, + "step": 1660 + }, + { + "epoch": 0.005722588117576912, + "grad_norm": 1.084028959274292, + "learning_rate": 1e-05, + "loss": 0.2393, + "step": 1670 + }, + { + "epoch": 0.005756855112292941, + "grad_norm": 1.247847557067871, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 1680 + }, + { + "epoch": 0.005791122107008971, + "grad_norm": 1.03806734085083, + "learning_rate": 1e-05, + "loss": 0.2317, + "step": 1690 + }, + { + "epoch": 0.005825389101725, + "grad_norm": 1.1643092632293701, + "learning_rate": 1e-05, + "loss": 0.2348, + "step": 1700 + }, + { + "epoch": 0.00585965609644103, + "grad_norm": 1.1066207885742188, + "learning_rate": 1e-05, + "loss": 0.2348, + "step": 1710 + }, + { + "epoch": 0.005893923091157059, + "grad_norm": 1.1813760995864868, + "learning_rate": 1e-05, + "loss": 0.2295, + "step": 1720 + }, + { + "epoch": 0.005928190085873089, + "grad_norm": 1.1444518566131592, + "learning_rate": 1e-05, + "loss": 0.2101, + "step": 1730 + }, + { + "epoch": 0.005962457080589118, + "grad_norm": 1.1485129594802856, + "learning_rate": 1e-05, + "loss": 0.2397, + "step": 1740 + }, + { + "epoch": 0.005996724075305148, + "grad_norm": 1.1813607215881348, + "learning_rate": 1e-05, + "loss": 0.231, + "step": 1750 + }, + { + "epoch": 0.006030991070021177, + "grad_norm": 1.4075005054473877, + "learning_rate": 1e-05, + "loss": 0.2306, + "step": 1760 + }, + { + "epoch": 0.0060652580647372065, + "grad_norm": 1.2183804512023926, + "learning_rate": 1e-05, + "loss": 0.2227, + "step": 1770 + }, + { + "epoch": 0.0060995250594532355, + "grad_norm": 1.3654927015304565, + "learning_rate": 1e-05, + "loss": 0.2341, + "step": 1780 + }, + { + "epoch": 0.006133792054169265, + "grad_norm": 1.2806668281555176, + "learning_rate": 1e-05, + "loss": 0.2226, + "step": 1790 + }, + { + "epoch": 0.006168059048885294, + "grad_norm": 1.2949618101119995, + "learning_rate": 1e-05, + "loss": 0.2698, + "step": 1800 + }, + { + "epoch": 0.006202326043601324, + "grad_norm": 1.3080159425735474, + "learning_rate": 1e-05, + "loss": 0.2691, + "step": 1810 + }, + { + "epoch": 0.006236593038317353, + "grad_norm": 1.1831908226013184, + "learning_rate": 1e-05, + "loss": 0.2644, + "step": 1820 + }, + { + "epoch": 0.006270860033033383, + "grad_norm": 1.1216965913772583, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 1830 + }, + { + "epoch": 0.006305127027749412, + "grad_norm": 1.1943161487579346, + "learning_rate": 1e-05, + "loss": 0.2769, + "step": 1840 + }, + { + "epoch": 0.006339394022465442, + "grad_norm": 1.0856040716171265, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 1850 + }, + { + "epoch": 0.006373661017181471, + "grad_norm": 1.1100040674209595, + "learning_rate": 1e-05, + "loss": 0.2576, + "step": 1860 + }, + { + "epoch": 0.006407928011897501, + "grad_norm": 1.3369051218032837, + "learning_rate": 1e-05, + "loss": 0.2684, + "step": 1870 + }, + { + "epoch": 0.0064421950066135296, + "grad_norm": 1.158797264099121, + "learning_rate": 1e-05, + "loss": 0.2474, + "step": 1880 + }, + { + "epoch": 0.006476462001329559, + "grad_norm": 1.1821873188018799, + "learning_rate": 1e-05, + "loss": 0.272, + "step": 1890 + }, + { + "epoch": 0.006510728996045589, + "grad_norm": 1.0739686489105225, + "learning_rate": 1e-05, + "loss": 0.2798, + "step": 1900 + }, + { + "epoch": 0.006544995990761618, + "grad_norm": 1.0639653205871582, + "learning_rate": 1e-05, + "loss": 0.2682, + "step": 1910 + }, + { + "epoch": 0.006579262985477648, + "grad_norm": 1.2149512767791748, + "learning_rate": 1e-05, + "loss": 0.2586, + "step": 1920 + }, + { + "epoch": 0.006613529980193677, + "grad_norm": 1.1057014465332031, + "learning_rate": 1e-05, + "loss": 0.2719, + "step": 1930 + }, + { + "epoch": 0.006647796974909707, + "grad_norm": 1.0929185152053833, + "learning_rate": 1e-05, + "loss": 0.2703, + "step": 1940 + }, + { + "epoch": 0.006682063969625736, + "grad_norm": 1.0322917699813843, + "learning_rate": 1e-05, + "loss": 0.2477, + "step": 1950 + }, + { + "epoch": 0.006716330964341766, + "grad_norm": 1.2460272312164307, + "learning_rate": 1e-05, + "loss": 0.2816, + "step": 1960 + }, + { + "epoch": 0.006750597959057795, + "grad_norm": 1.2049859762191772, + "learning_rate": 1e-05, + "loss": 0.2648, + "step": 1970 + }, + { + "epoch": 0.0067848649537738245, + "grad_norm": 1.1182633638381958, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 1980 + }, + { + "epoch": 0.0068191319484898535, + "grad_norm": 1.1514990329742432, + "learning_rate": 1e-05, + "loss": 0.2695, + "step": 1990 + }, + { + "epoch": 0.006853398943205883, + "grad_norm": 1.0150858163833618, + "learning_rate": 1e-05, + "loss": 0.2532, + "step": 2000 + }, + { + "epoch": 0.006853398943205883, + "eval_cer": 13.565002645969306, + "eval_loss": 0.2523655593395233, + "eval_normalized_cer": 9.942046362909672, + "eval_runtime": 226.5571, + "eval_samples_per_second": 2.26, + "eval_steps_per_second": 0.035, + "step": 2000 + }, + { + "epoch": 0.006887665937921912, + "grad_norm": 1.0476700067520142, + "learning_rate": 1e-05, + "loss": 0.2555, + "step": 2010 + }, + { + "epoch": 0.006921932932637942, + "grad_norm": 1.1178691387176514, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 2020 + }, + { + "epoch": 0.006956199927353971, + "grad_norm": 1.2596313953399658, + "learning_rate": 1e-05, + "loss": 0.2884, + "step": 2030 + }, + { + "epoch": 0.006990466922070001, + "grad_norm": 1.1929702758789062, + "learning_rate": 1e-05, + "loss": 0.262, + "step": 2040 + }, + { + "epoch": 0.00702473391678603, + "grad_norm": 1.1269497871398926, + "learning_rate": 1e-05, + "loss": 0.2758, + "step": 2050 + }, + { + "epoch": 0.00705900091150206, + "grad_norm": 1.1495511531829834, + "learning_rate": 1e-05, + "loss": 0.2668, + "step": 2060 + }, + { + "epoch": 0.007093267906218089, + "grad_norm": 1.0648061037063599, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 2070 + }, + { + "epoch": 0.007127534900934119, + "grad_norm": 1.3193435668945312, + "learning_rate": 1e-05, + "loss": 0.2743, + "step": 2080 + }, + { + "epoch": 0.0071618018956501476, + "grad_norm": 1.2877907752990723, + "learning_rate": 1e-05, + "loss": 0.248, + "step": 2090 + }, + { + "epoch": 0.007196068890366177, + "grad_norm": 1.2012474536895752, + "learning_rate": 1e-05, + "loss": 0.2662, + "step": 2100 + }, + { + "epoch": 0.007230335885082206, + "grad_norm": 1.1491566896438599, + "learning_rate": 1e-05, + "loss": 0.2666, + "step": 2110 + }, + { + "epoch": 0.007264602879798236, + "grad_norm": 1.1861019134521484, + "learning_rate": 1e-05, + "loss": 0.2618, + "step": 2120 + }, + { + "epoch": 0.007298869874514265, + "grad_norm": 1.123963713645935, + "learning_rate": 1e-05, + "loss": 0.2646, + "step": 2130 + }, + { + "epoch": 0.007333136869230295, + "grad_norm": 1.2697441577911377, + "learning_rate": 1e-05, + "loss": 0.2713, + "step": 2140 + }, + { + "epoch": 0.007367403863946324, + "grad_norm": 0.9741083383560181, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 2150 + }, + { + "epoch": 0.007401670858662354, + "grad_norm": 1.0292670726776123, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 2160 + }, + { + "epoch": 0.007435937853378383, + "grad_norm": 1.0958001613616943, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 2170 + }, + { + "epoch": 0.007470204848094413, + "grad_norm": 1.166869044303894, + "learning_rate": 1e-05, + "loss": 0.2454, + "step": 2180 + }, + { + "epoch": 0.007504471842810442, + "grad_norm": 1.2552424669265747, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 2190 + }, + { + "epoch": 0.0075387388375264715, + "grad_norm": 1.1589868068695068, + "learning_rate": 1e-05, + "loss": 0.2659, + "step": 2200 + }, + { + "epoch": 0.0075730058322425004, + "grad_norm": 1.1640287637710571, + "learning_rate": 1e-05, + "loss": 0.257, + "step": 2210 + }, + { + "epoch": 0.00760727282695853, + "grad_norm": 1.0953587293624878, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 2220 + }, + { + "epoch": 0.007641539821674559, + "grad_norm": 1.2174441814422607, + "learning_rate": 1e-05, + "loss": 0.2626, + "step": 2230 + }, + { + "epoch": 0.007675806816390589, + "grad_norm": 1.1194220781326294, + "learning_rate": 1e-05, + "loss": 0.241, + "step": 2240 + }, + { + "epoch": 0.007710073811106618, + "grad_norm": 1.0677419900894165, + "learning_rate": 1e-05, + "loss": 0.2718, + "step": 2250 + }, + { + "epoch": 0.007744340805822648, + "grad_norm": 1.0956069231033325, + "learning_rate": 1e-05, + "loss": 0.2493, + "step": 2260 + }, + { + "epoch": 0.007778607800538677, + "grad_norm": 1.1772819757461548, + "learning_rate": 1e-05, + "loss": 0.2614, + "step": 2270 + }, + { + "epoch": 0.007812874795254707, + "grad_norm": 1.0341110229492188, + "learning_rate": 1e-05, + "loss": 0.2488, + "step": 2280 + }, + { + "epoch": 0.007847141789970737, + "grad_norm": 1.174186110496521, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 2290 + }, + { + "epoch": 0.007881408784686765, + "grad_norm": 0.9867792725563049, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 2300 + }, + { + "epoch": 0.007915675779402795, + "grad_norm": 1.1443661451339722, + "learning_rate": 1e-05, + "loss": 0.2331, + "step": 2310 + }, + { + "epoch": 0.007949942774118824, + "grad_norm": 1.117896318435669, + "learning_rate": 1e-05, + "loss": 0.2277, + "step": 2320 + }, + { + "epoch": 0.007984209768834854, + "grad_norm": 1.13510000705719, + "learning_rate": 1e-05, + "loss": 0.2137, + "step": 2330 + }, + { + "epoch": 0.008018476763550882, + "grad_norm": 0.9749162793159485, + "learning_rate": 1e-05, + "loss": 0.2161, + "step": 2340 + }, + { + "epoch": 0.008052743758266912, + "grad_norm": 1.1519534587860107, + "learning_rate": 1e-05, + "loss": 0.2254, + "step": 2350 + }, + { + "epoch": 0.008087010752982942, + "grad_norm": 1.0861778259277344, + "learning_rate": 1e-05, + "loss": 0.2153, + "step": 2360 + }, + { + "epoch": 0.008121277747698972, + "grad_norm": 1.0184444189071655, + "learning_rate": 1e-05, + "loss": 0.2066, + "step": 2370 + }, + { + "epoch": 0.008155544742415, + "grad_norm": 1.0581239461898804, + "learning_rate": 1e-05, + "loss": 0.2243, + "step": 2380 + }, + { + "epoch": 0.00818981173713103, + "grad_norm": 0.9954540729522705, + "learning_rate": 1e-05, + "loss": 0.2171, + "step": 2390 + }, + { + "epoch": 0.00822407873184706, + "grad_norm": 1.121960163116455, + "learning_rate": 1e-05, + "loss": 0.2216, + "step": 2400 + }, + { + "epoch": 0.00825834572656309, + "grad_norm": 1.097725510597229, + "learning_rate": 1e-05, + "loss": 0.2142, + "step": 2410 + }, + { + "epoch": 0.008292612721279118, + "grad_norm": 1.0566459894180298, + "learning_rate": 1e-05, + "loss": 0.2272, + "step": 2420 + }, + { + "epoch": 0.008326879715995147, + "grad_norm": 1.0077927112579346, + "learning_rate": 1e-05, + "loss": 0.211, + "step": 2430 + }, + { + "epoch": 0.008361146710711177, + "grad_norm": 1.176035761833191, + "learning_rate": 1e-05, + "loss": 0.2125, + "step": 2440 + }, + { + "epoch": 0.008395413705427207, + "grad_norm": 1.0064568519592285, + "learning_rate": 1e-05, + "loss": 0.2066, + "step": 2450 + }, + { + "epoch": 0.008429680700143235, + "grad_norm": 1.1852171421051025, + "learning_rate": 1e-05, + "loss": 0.2087, + "step": 2460 + }, + { + "epoch": 0.008463947694859265, + "grad_norm": 0.9580971002578735, + "learning_rate": 1e-05, + "loss": 0.2172, + "step": 2470 + }, + { + "epoch": 0.008498214689575295, + "grad_norm": 1.1230813264846802, + "learning_rate": 1e-05, + "loss": 0.2104, + "step": 2480 + }, + { + "epoch": 0.008532481684291325, + "grad_norm": 1.1891340017318726, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 2490 + }, + { + "epoch": 0.008566748679007353, + "grad_norm": 1.2579045295715332, + "learning_rate": 1e-05, + "loss": 0.2109, + "step": 2500 + }, + { + "epoch": 0.008566748679007353, + "eval_cer": 13.300405715293703, + "eval_loss": 0.26059621572494507, + "eval_normalized_cer": 9.502398081534773, + "eval_runtime": 226.5522, + "eval_samples_per_second": 2.26, + "eval_steps_per_second": 0.035, + "step": 2500 + }, + { + "epoch": 0.008601015673723383, + "grad_norm": 1.0522507429122925, + "learning_rate": 1e-05, + "loss": 0.2154, + "step": 2510 + }, + { + "epoch": 0.008635282668439413, + "grad_norm": 1.0875492095947266, + "learning_rate": 1e-05, + "loss": 0.2251, + "step": 2520 + }, + { + "epoch": 0.008669549663155442, + "grad_norm": 1.0868346691131592, + "learning_rate": 1e-05, + "loss": 0.2086, + "step": 2530 + }, + { + "epoch": 0.008703816657871472, + "grad_norm": 1.0993175506591797, + "learning_rate": 1e-05, + "loss": 0.205, + "step": 2540 + }, + { + "epoch": 0.0087380836525875, + "grad_norm": 1.0495941638946533, + "learning_rate": 1e-05, + "loss": 0.2135, + "step": 2550 + }, + { + "epoch": 0.00877235064730353, + "grad_norm": 1.0326807498931885, + "learning_rate": 1e-05, + "loss": 0.2105, + "step": 2560 + }, + { + "epoch": 0.00880661764201956, + "grad_norm": 1.0804367065429688, + "learning_rate": 1e-05, + "loss": 0.2438, + "step": 2570 + }, + { + "epoch": 0.00884088463673559, + "grad_norm": 1.0738023519515991, + "learning_rate": 1e-05, + "loss": 0.2537, + "step": 2580 + }, + { + "epoch": 0.008875151631451618, + "grad_norm": 1.1695871353149414, + "learning_rate": 1e-05, + "loss": 0.2518, + "step": 2590 + }, + { + "epoch": 0.008909418626167648, + "grad_norm": 1.155653476715088, + "learning_rate": 1e-05, + "loss": 0.2592, + "step": 2600 + }, + { + "epoch": 0.008943685620883678, + "grad_norm": 1.1516027450561523, + "learning_rate": 1e-05, + "loss": 0.2387, + "step": 2610 + }, + { + "epoch": 0.008977952615599707, + "grad_norm": 1.2618260383605957, + "learning_rate": 1e-05, + "loss": 0.2638, + "step": 2620 + }, + { + "epoch": 0.009012219610315736, + "grad_norm": 1.2422987222671509, + "learning_rate": 1e-05, + "loss": 0.2459, + "step": 2630 + }, + { + "epoch": 0.009046486605031765, + "grad_norm": 1.1460082530975342, + "learning_rate": 1e-05, + "loss": 0.2509, + "step": 2640 + }, + { + "epoch": 0.009080753599747795, + "grad_norm": 1.2502261400222778, + "learning_rate": 1e-05, + "loss": 0.2595, + "step": 2650 + }, + { + "epoch": 0.009115020594463825, + "grad_norm": 1.139840006828308, + "learning_rate": 1e-05, + "loss": 0.255, + "step": 2660 + }, + { + "epoch": 0.009149287589179853, + "grad_norm": 1.3247896432876587, + "learning_rate": 1e-05, + "loss": 0.2721, + "step": 2670 + }, + { + "epoch": 0.009183554583895883, + "grad_norm": 1.1355103254318237, + "learning_rate": 1e-05, + "loss": 0.2604, + "step": 2680 + }, + { + "epoch": 0.009217821578611913, + "grad_norm": 1.106541633605957, + "learning_rate": 1e-05, + "loss": 0.2374, + "step": 2690 + }, + { + "epoch": 0.009252088573327943, + "grad_norm": 1.2375975847244263, + "learning_rate": 1e-05, + "loss": 0.2719, + "step": 2700 + }, + { + "epoch": 0.00928635556804397, + "grad_norm": 1.1048275232315063, + "learning_rate": 1e-05, + "loss": 0.2791, + "step": 2710 + }, + { + "epoch": 0.00932062256276, + "grad_norm": 0.9889766573905945, + "learning_rate": 1e-05, + "loss": 0.2457, + "step": 2720 + }, + { + "epoch": 0.00935488955747603, + "grad_norm": 1.1566202640533447, + "learning_rate": 1e-05, + "loss": 0.252, + "step": 2730 + }, + { + "epoch": 0.00938915655219206, + "grad_norm": 1.1586074829101562, + "learning_rate": 1e-05, + "loss": 0.2517, + "step": 2740 + }, + { + "epoch": 0.009423423546908088, + "grad_norm": 0.990419328212738, + "learning_rate": 1e-05, + "loss": 0.2572, + "step": 2750 + }, + { + "epoch": 0.009457690541624118, + "grad_norm": 1.1101089715957642, + "learning_rate": 1e-05, + "loss": 0.2525, + "step": 2760 + }, + { + "epoch": 0.009491957536340148, + "grad_norm": 1.0488269329071045, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 2770 + }, + { + "epoch": 0.009526224531056178, + "grad_norm": 1.1127737760543823, + "learning_rate": 1e-05, + "loss": 0.2578, + "step": 2780 + }, + { + "epoch": 0.009560491525772206, + "grad_norm": 1.2353262901306152, + "learning_rate": 1e-05, + "loss": 0.2412, + "step": 2790 + }, + { + "epoch": 0.009594758520488236, + "grad_norm": 1.1262571811676025, + "learning_rate": 1e-05, + "loss": 0.2438, + "step": 2800 + }, + { + "epoch": 0.009629025515204266, + "grad_norm": 1.294323205947876, + "learning_rate": 1e-05, + "loss": 0.2512, + "step": 2810 + }, + { + "epoch": 0.009663292509920296, + "grad_norm": 1.0706703662872314, + "learning_rate": 1e-05, + "loss": 0.2595, + "step": 2820 + }, + { + "epoch": 0.009697559504636324, + "grad_norm": 1.0089077949523926, + "learning_rate": 1e-05, + "loss": 0.2522, + "step": 2830 + }, + { + "epoch": 0.009731826499352354, + "grad_norm": 0.9697763323783875, + "learning_rate": 1e-05, + "loss": 0.2684, + "step": 2840 + }, + { + "epoch": 0.009766093494068383, + "grad_norm": 1.1122509241104126, + "learning_rate": 1e-05, + "loss": 0.2629, + "step": 2850 + }, + { + "epoch": 0.009800360488784413, + "grad_norm": 1.0381057262420654, + "learning_rate": 1e-05, + "loss": 0.2482, + "step": 2860 + }, + { + "epoch": 0.009834627483500441, + "grad_norm": 1.126947045326233, + "learning_rate": 1e-05, + "loss": 0.2674, + "step": 2870 + }, + { + "epoch": 0.009868894478216471, + "grad_norm": 1.0714973211288452, + "learning_rate": 1e-05, + "loss": 0.2634, + "step": 2880 + }, + { + "epoch": 0.009903161472932501, + "grad_norm": 1.0942039489746094, + "learning_rate": 1e-05, + "loss": 0.2751, + "step": 2890 + }, + { + "epoch": 0.009937428467648531, + "grad_norm": 1.1503955125808716, + "learning_rate": 1e-05, + "loss": 0.272, + "step": 2900 + }, + { + "epoch": 0.009971695462364559, + "grad_norm": 1.1912988424301147, + "learning_rate": 1e-05, + "loss": 0.2645, + "step": 2910 + }, + { + "epoch": 0.010005962457080589, + "grad_norm": 1.0941249132156372, + "learning_rate": 1e-05, + "loss": 0.2531, + "step": 2920 + }, + { + "epoch": 0.010040229451796619, + "grad_norm": 1.2545968294143677, + "learning_rate": 1e-05, + "loss": 0.2562, + "step": 2930 + }, + { + "epoch": 0.010074496446512649, + "grad_norm": 1.3605022430419922, + "learning_rate": 1e-05, + "loss": 0.2601, + "step": 2940 + }, + { + "epoch": 0.010108763441228677, + "grad_norm": 1.0911775827407837, + "learning_rate": 1e-05, + "loss": 0.2605, + "step": 2950 + }, + { + "epoch": 0.010143030435944706, + "grad_norm": 1.133867859840393, + "learning_rate": 1e-05, + "loss": 0.2554, + "step": 2960 + }, + { + "epoch": 0.010177297430660736, + "grad_norm": 1.2511764764785767, + "learning_rate": 1e-05, + "loss": 0.2658, + "step": 2970 + }, + { + "epoch": 0.010211564425376766, + "grad_norm": 1.1705303192138672, + "learning_rate": 1e-05, + "loss": 0.2737, + "step": 2980 + }, + { + "epoch": 0.010245831420092794, + "grad_norm": 1.132071614265442, + "learning_rate": 1e-05, + "loss": 0.2665, + "step": 2990 + }, + { + "epoch": 0.010280098414808824, + "grad_norm": 1.2301791906356812, + "learning_rate": 1e-05, + "loss": 0.2645, + "step": 3000 + }, + { + "epoch": 0.010280098414808824, + "eval_cer": 12.938789910037043, + "eval_loss": 0.2511608302593231, + "eval_normalized_cer": 9.152677857713828, + "eval_runtime": 227.4553, + "eval_samples_per_second": 2.251, + "eval_steps_per_second": 0.035, + "step": 3000 + }, + { + "epoch": 0.010314365409524854, + "grad_norm": 1.1527032852172852, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 3010 + }, + { + "epoch": 0.010348632404240884, + "grad_norm": 1.1162952184677124, + "learning_rate": 1e-05, + "loss": 0.2728, + "step": 3020 + }, + { + "epoch": 0.010382899398956912, + "grad_norm": 1.062084436416626, + "learning_rate": 1e-05, + "loss": 0.2496, + "step": 3030 + }, + { + "epoch": 0.010417166393672942, + "grad_norm": 1.1536457538604736, + "learning_rate": 1e-05, + "loss": 0.2633, + "step": 3040 + }, + { + "epoch": 0.010451433388388972, + "grad_norm": 1.2096189260482788, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 3050 + }, + { + "epoch": 0.010485700383105001, + "grad_norm": 0.9950299263000488, + "learning_rate": 1e-05, + "loss": 0.246, + "step": 3060 + }, + { + "epoch": 0.01051996737782103, + "grad_norm": 1.0628243684768677, + "learning_rate": 1e-05, + "loss": 0.2544, + "step": 3070 + }, + { + "epoch": 0.01055423437253706, + "grad_norm": 1.042555570602417, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 3080 + }, + { + "epoch": 0.01058850136725309, + "grad_norm": 1.22646164894104, + "learning_rate": 1e-05, + "loss": 0.2503, + "step": 3090 + }, + { + "epoch": 0.010622768361969119, + "grad_norm": 1.0862691402435303, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 3100 + }, + { + "epoch": 0.010657035356685147, + "grad_norm": 1.148868203163147, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 3110 + }, + { + "epoch": 0.010691302351401177, + "grad_norm": 1.1677169799804688, + "learning_rate": 1e-05, + "loss": 0.2481, + "step": 3120 + }, + { + "epoch": 0.010725569346117207, + "grad_norm": 0.990696132183075, + "learning_rate": 1e-05, + "loss": 0.2421, + "step": 3130 + }, + { + "epoch": 0.010759836340833237, + "grad_norm": 1.2869263887405396, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 3140 + }, + { + "epoch": 0.010794103335549265, + "grad_norm": 1.0741721391677856, + "learning_rate": 1e-05, + "loss": 0.2617, + "step": 3150 + }, + { + "epoch": 0.010828370330265295, + "grad_norm": 1.103102445602417, + "learning_rate": 1e-05, + "loss": 0.2442, + "step": 3160 + }, + { + "epoch": 0.010862637324981324, + "grad_norm": 1.2562378644943237, + "learning_rate": 1e-05, + "loss": 0.2589, + "step": 3170 + }, + { + "epoch": 0.010896904319697354, + "grad_norm": 1.2153191566467285, + "learning_rate": 1e-05, + "loss": 0.2417, + "step": 3180 + }, + { + "epoch": 0.010931171314413384, + "grad_norm": 1.0507330894470215, + "learning_rate": 1e-05, + "loss": 0.2607, + "step": 3190 + }, + { + "epoch": 0.010965438309129412, + "grad_norm": 1.1882787942886353, + "learning_rate": 1e-05, + "loss": 0.2469, + "step": 3200 + }, + { + "epoch": 0.010999705303845442, + "grad_norm": 1.1394702196121216, + "learning_rate": 1e-05, + "loss": 0.2574, + "step": 3210 + }, + { + "epoch": 0.011033972298561472, + "grad_norm": 1.2482614517211914, + "learning_rate": 1e-05, + "loss": 0.2456, + "step": 3220 + }, + { + "epoch": 0.011068239293277502, + "grad_norm": 1.0362995862960815, + "learning_rate": 1e-05, + "loss": 0.2589, + "step": 3230 + }, + { + "epoch": 0.01110250628799353, + "grad_norm": 1.1730456352233887, + "learning_rate": 1e-05, + "loss": 0.2497, + "step": 3240 + }, + { + "epoch": 0.01113677328270956, + "grad_norm": 1.1563142538070679, + "learning_rate": 1e-05, + "loss": 0.2439, + "step": 3250 + }, + { + "epoch": 0.01117104027742559, + "grad_norm": 1.1030769348144531, + "learning_rate": 1e-05, + "loss": 0.2671, + "step": 3260 + }, + { + "epoch": 0.01120530727214162, + "grad_norm": 1.1719223260879517, + "learning_rate": 1e-05, + "loss": 0.2501, + "step": 3270 + }, + { + "epoch": 0.011239574266857648, + "grad_norm": 1.1840440034866333, + "learning_rate": 1e-05, + "loss": 0.2643, + "step": 3280 + }, + { + "epoch": 0.011273841261573677, + "grad_norm": 1.1928170919418335, + "learning_rate": 1e-05, + "loss": 0.2629, + "step": 3290 + }, + { + "epoch": 0.011308108256289707, + "grad_norm": 1.0311812162399292, + "learning_rate": 1e-05, + "loss": 0.2552, + "step": 3300 + }, + { + "epoch": 0.011342375251005737, + "grad_norm": 1.1625889539718628, + "learning_rate": 1e-05, + "loss": 0.2561, + "step": 3310 + }, + { + "epoch": 0.011376642245721765, + "grad_norm": 1.0287625789642334, + "learning_rate": 1e-05, + "loss": 0.2341, + "step": 3320 + }, + { + "epoch": 0.011410909240437795, + "grad_norm": 1.1310815811157227, + "learning_rate": 1e-05, + "loss": 0.2554, + "step": 3330 + }, + { + "epoch": 0.011445176235153825, + "grad_norm": 1.1266168355941772, + "learning_rate": 1e-05, + "loss": 0.234, + "step": 3340 + }, + { + "epoch": 0.011479443229869855, + "grad_norm": 1.1979014873504639, + "learning_rate": 1e-05, + "loss": 0.2559, + "step": 3350 + }, + { + "epoch": 0.011513710224585883, + "grad_norm": 1.0378515720367432, + "learning_rate": 1e-05, + "loss": 0.2502, + "step": 3360 + }, + { + "epoch": 0.011547977219301913, + "grad_norm": 1.1832512617111206, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 3370 + }, + { + "epoch": 0.011582244214017942, + "grad_norm": 0.9605569839477539, + "learning_rate": 1e-05, + "loss": 0.2349, + "step": 3380 + }, + { + "epoch": 0.011616511208733972, + "grad_norm": 1.0463056564331055, + "learning_rate": 1e-05, + "loss": 0.2328, + "step": 3390 + }, + { + "epoch": 0.01165077820345, + "grad_norm": 1.1021932363510132, + "learning_rate": 1e-05, + "loss": 0.2383, + "step": 3400 + }, + { + "epoch": 0.01168504519816603, + "grad_norm": 1.040493130683899, + "learning_rate": 1e-05, + "loss": 0.2374, + "step": 3410 + }, + { + "epoch": 0.01171931219288206, + "grad_norm": 1.1483063697814941, + "learning_rate": 1e-05, + "loss": 0.2398, + "step": 3420 + }, + { + "epoch": 0.01175357918759809, + "grad_norm": 1.0316531658172607, + "learning_rate": 1e-05, + "loss": 0.2329, + "step": 3430 + }, + { + "epoch": 0.011787846182314118, + "grad_norm": 1.1677886247634888, + "learning_rate": 1e-05, + "loss": 0.2493, + "step": 3440 + }, + { + "epoch": 0.011822113177030148, + "grad_norm": 1.2078930139541626, + "learning_rate": 1e-05, + "loss": 0.2337, + "step": 3450 + }, + { + "epoch": 0.011856380171746178, + "grad_norm": 1.178202509880066, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 3460 + }, + { + "epoch": 0.011890647166462208, + "grad_norm": 1.0453248023986816, + "learning_rate": 1e-05, + "loss": 0.2233, + "step": 3470 + }, + { + "epoch": 0.011924914161178236, + "grad_norm": 1.0171067714691162, + "learning_rate": 1e-05, + "loss": 0.2338, + "step": 3480 + }, + { + "epoch": 0.011959181155894266, + "grad_norm": 1.051792860031128, + "learning_rate": 1e-05, + "loss": 0.2394, + "step": 3490 + }, + { + "epoch": 0.011993448150610295, + "grad_norm": 1.1237847805023193, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 3500 + }, + { + "epoch": 0.011993448150610295, + "eval_cer": 13.071088375374845, + "eval_loss": 0.25454944372177124, + "eval_normalized_cer": 9.542366107114308, + "eval_runtime": 228.9468, + "eval_samples_per_second": 2.236, + "eval_steps_per_second": 0.035, + "step": 3500 + }, + { + "epoch": 0.012027715145326325, + "grad_norm": 1.1366350650787354, + "learning_rate": 1e-05, + "loss": 0.2353, + "step": 3510 + }, + { + "epoch": 0.012061982140042353, + "grad_norm": 1.136927604675293, + "learning_rate": 1e-05, + "loss": 0.2358, + "step": 3520 + }, + { + "epoch": 0.012096249134758383, + "grad_norm": 1.1875656843185425, + "learning_rate": 1e-05, + "loss": 0.2305, + "step": 3530 + }, + { + "epoch": 0.012130516129474413, + "grad_norm": 1.2016057968139648, + "learning_rate": 1e-05, + "loss": 0.2435, + "step": 3540 + }, + { + "epoch": 0.012164783124190443, + "grad_norm": 1.209622859954834, + "learning_rate": 1e-05, + "loss": 0.2361, + "step": 3550 + }, + { + "epoch": 0.012199050118906471, + "grad_norm": 1.0696970224380493, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 3560 + }, + { + "epoch": 0.0122333171136225, + "grad_norm": 1.2674167156219482, + "learning_rate": 1e-05, + "loss": 0.243, + "step": 3570 + }, + { + "epoch": 0.01226758410833853, + "grad_norm": 1.2928141355514526, + "learning_rate": 1e-05, + "loss": 0.2491, + "step": 3580 + }, + { + "epoch": 0.01230185110305456, + "grad_norm": 1.0642272233963013, + "learning_rate": 1e-05, + "loss": 0.2356, + "step": 3590 + }, + { + "epoch": 0.012336118097770589, + "grad_norm": 1.0935972929000854, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 3600 + }, + { + "epoch": 0.012370385092486618, + "grad_norm": 1.180668830871582, + "learning_rate": 1e-05, + "loss": 0.2409, + "step": 3610 + }, + { + "epoch": 0.012404652087202648, + "grad_norm": 1.2312487363815308, + "learning_rate": 1e-05, + "loss": 0.2478, + "step": 3620 + }, + { + "epoch": 0.012438919081918678, + "grad_norm": 0.947522759437561, + "learning_rate": 1e-05, + "loss": 0.2281, + "step": 3630 + }, + { + "epoch": 0.012473186076634706, + "grad_norm": 1.0618727207183838, + "learning_rate": 1e-05, + "loss": 0.2423, + "step": 3640 + }, + { + "epoch": 0.012507453071350736, + "grad_norm": 1.0766098499298096, + "learning_rate": 1e-05, + "loss": 0.2364, + "step": 3650 + }, + { + "epoch": 0.012541720066066766, + "grad_norm": 1.1174747943878174, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 3660 + }, + { + "epoch": 0.012575987060782796, + "grad_norm": 1.1940118074417114, + "learning_rate": 1e-05, + "loss": 0.2212, + "step": 3670 + }, + { + "epoch": 0.012610254055498824, + "grad_norm": 1.1407246589660645, + "learning_rate": 1e-05, + "loss": 0.2423, + "step": 3680 + }, + { + "epoch": 0.012644521050214854, + "grad_norm": 1.2646050453186035, + "learning_rate": 1e-05, + "loss": 0.2252, + "step": 3690 + }, + { + "epoch": 0.012678788044930884, + "grad_norm": 1.130337119102478, + "learning_rate": 1e-05, + "loss": 0.2131, + "step": 3700 + }, + { + "epoch": 0.012713055039646913, + "grad_norm": 1.1432557106018066, + "learning_rate": 1e-05, + "loss": 0.2386, + "step": 3710 + }, + { + "epoch": 0.012747322034362941, + "grad_norm": 1.1370545625686646, + "learning_rate": 1e-05, + "loss": 0.2347, + "step": 3720 + }, + { + "epoch": 0.012781589029078971, + "grad_norm": 1.3126403093338013, + "learning_rate": 1e-05, + "loss": 0.2159, + "step": 3730 + }, + { + "epoch": 0.012815856023795001, + "grad_norm": 1.2375295162200928, + "learning_rate": 1e-05, + "loss": 0.2275, + "step": 3740 + }, + { + "epoch": 0.012850123018511031, + "grad_norm": 1.0877372026443481, + "learning_rate": 1e-05, + "loss": 0.2201, + "step": 3750 + }, + { + "epoch": 0.012884390013227059, + "grad_norm": 1.1122978925704956, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 3760 + }, + { + "epoch": 0.012918657007943089, + "grad_norm": 1.0270159244537354, + "learning_rate": 1e-05, + "loss": 0.2313, + "step": 3770 + }, + { + "epoch": 0.012952924002659119, + "grad_norm": 1.1370947360992432, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 3780 + }, + { + "epoch": 0.012987190997375149, + "grad_norm": 1.2888813018798828, + "learning_rate": 1e-05, + "loss": 0.2384, + "step": 3790 + }, + { + "epoch": 0.013021457992091178, + "grad_norm": 1.2443634271621704, + "learning_rate": 1e-05, + "loss": 0.2218, + "step": 3800 + }, + { + "epoch": 0.013055724986807207, + "grad_norm": 1.1919447183609009, + "learning_rate": 1e-05, + "loss": 0.2277, + "step": 3810 + }, + { + "epoch": 0.013089991981523236, + "grad_norm": 1.140600562095642, + "learning_rate": 1e-05, + "loss": 0.2317, + "step": 3820 + }, + { + "epoch": 0.013124258976239266, + "grad_norm": 1.074697494506836, + "learning_rate": 1e-05, + "loss": 0.2273, + "step": 3830 + }, + { + "epoch": 0.013158525970955296, + "grad_norm": 1.1003391742706299, + "learning_rate": 1e-05, + "loss": 0.2217, + "step": 3840 + }, + { + "epoch": 0.013192792965671324, + "grad_norm": 1.1427338123321533, + "learning_rate": 1e-05, + "loss": 0.2377, + "step": 3850 + }, + { + "epoch": 0.013227059960387354, + "grad_norm": 1.0806514024734497, + "learning_rate": 1e-05, + "loss": 0.2332, + "step": 3860 + }, + { + "epoch": 0.013261326955103384, + "grad_norm": 1.1547067165374756, + "learning_rate": 1e-05, + "loss": 0.2306, + "step": 3870 + }, + { + "epoch": 0.013295593949819414, + "grad_norm": 1.2483099699020386, + "learning_rate": 1e-05, + "loss": 0.2166, + "step": 3880 + }, + { + "epoch": 0.013329860944535442, + "grad_norm": 1.096939206123352, + "learning_rate": 1e-05, + "loss": 0.2253, + "step": 3890 + }, + { + "epoch": 0.013364127939251472, + "grad_norm": 1.1876115798950195, + "learning_rate": 1e-05, + "loss": 0.2377, + "step": 3900 + }, + { + "epoch": 0.013398394933967502, + "grad_norm": 1.1380902528762817, + "learning_rate": 1e-05, + "loss": 0.2256, + "step": 3910 + }, + { + "epoch": 0.013432661928683531, + "grad_norm": 1.0738089084625244, + "learning_rate": 1e-05, + "loss": 0.2307, + "step": 3920 + }, + { + "epoch": 0.01346692892339956, + "grad_norm": 1.0351170301437378, + "learning_rate": 1e-05, + "loss": 0.2296, + "step": 3930 + }, + { + "epoch": 0.01350119591811559, + "grad_norm": 1.2752678394317627, + "learning_rate": 1e-05, + "loss": 0.2462, + "step": 3940 + }, + { + "epoch": 0.01353546291283162, + "grad_norm": 1.2618532180786133, + "learning_rate": 1e-05, + "loss": 0.2364, + "step": 3950 + }, + { + "epoch": 0.013569729907547649, + "grad_norm": 1.1907076835632324, + "learning_rate": 1e-05, + "loss": 0.2397, + "step": 3960 + }, + { + "epoch": 0.013603996902263677, + "grad_norm": 0.9435076117515564, + "learning_rate": 1e-05, + "loss": 0.2391, + "step": 3970 + }, + { + "epoch": 0.013638263896979707, + "grad_norm": 1.0608407258987427, + "learning_rate": 1e-05, + "loss": 0.2241, + "step": 3980 + }, + { + "epoch": 0.013672530891695737, + "grad_norm": 1.0729584693908691, + "learning_rate": 1e-05, + "loss": 0.2237, + "step": 3990 + }, + { + "epoch": 0.013706797886411767, + "grad_norm": 1.2006182670593262, + "learning_rate": 1e-05, + "loss": 0.2386, + "step": 4000 + }, + { + "epoch": 0.013706797886411767, + "eval_cer": 12.594813900158758, + "eval_loss": 0.25156331062316895, + "eval_normalized_cer": 8.912869704236611, + "eval_runtime": 228.7977, + "eval_samples_per_second": 2.238, + "eval_steps_per_second": 0.035, + "step": 4000 + }, + { + "epoch": 0.013741064881127795, + "grad_norm": 1.2020457983016968, + "learning_rate": 1e-05, + "loss": 0.2318, + "step": 4010 + }, + { + "epoch": 0.013775331875843825, + "grad_norm": 1.0251790285110474, + "learning_rate": 1e-05, + "loss": 0.248, + "step": 4020 + }, + { + "epoch": 0.013809598870559854, + "grad_norm": 1.160437822341919, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 4030 + }, + { + "epoch": 0.013843865865275884, + "grad_norm": 1.025770664215088, + "learning_rate": 1e-05, + "loss": 0.2293, + "step": 4040 + }, + { + "epoch": 0.013878132859991912, + "grad_norm": 1.111954689025879, + "learning_rate": 1e-05, + "loss": 0.2377, + "step": 4050 + }, + { + "epoch": 0.013912399854707942, + "grad_norm": 1.0644809007644653, + "learning_rate": 1e-05, + "loss": 0.2195, + "step": 4060 + }, + { + "epoch": 0.013946666849423972, + "grad_norm": 1.2926712036132812, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 4070 + }, + { + "epoch": 0.013980933844140002, + "grad_norm": 1.2169601917266846, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 4080 + }, + { + "epoch": 0.01401520083885603, + "grad_norm": 1.1396681070327759, + "learning_rate": 1e-05, + "loss": 0.2305, + "step": 4090 + }, + { + "epoch": 0.01404946783357206, + "grad_norm": 1.2242721319198608, + "learning_rate": 1e-05, + "loss": 0.2301, + "step": 4100 + }, + { + "epoch": 0.01408373482828809, + "grad_norm": 1.195324420928955, + "learning_rate": 1e-05, + "loss": 0.2368, + "step": 4110 + }, + { + "epoch": 0.01411800182300412, + "grad_norm": 1.2345412969589233, + "learning_rate": 1e-05, + "loss": 0.2301, + "step": 4120 + }, + { + "epoch": 0.014152268817720148, + "grad_norm": 1.1502156257629395, + "learning_rate": 1e-05, + "loss": 0.2327, + "step": 4130 + }, + { + "epoch": 0.014186535812436177, + "grad_norm": 1.2128121852874756, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 4140 + }, + { + "epoch": 0.014220802807152207, + "grad_norm": 1.2618858814239502, + "learning_rate": 1e-05, + "loss": 0.231, + "step": 4150 + }, + { + "epoch": 0.014255069801868237, + "grad_norm": 1.0879299640655518, + "learning_rate": 1e-05, + "loss": 0.2302, + "step": 4160 + }, + { + "epoch": 0.014289336796584265, + "grad_norm": 0.9794358015060425, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 4170 + }, + { + "epoch": 0.014323603791300295, + "grad_norm": 1.1454006433486938, + "learning_rate": 1e-05, + "loss": 0.2328, + "step": 4180 + }, + { + "epoch": 0.014357870786016325, + "grad_norm": 1.223686933517456, + "learning_rate": 1e-05, + "loss": 0.2211, + "step": 4190 + }, + { + "epoch": 0.014392137780732355, + "grad_norm": 1.1423155069351196, + "learning_rate": 1e-05, + "loss": 0.2391, + "step": 4200 + }, + { + "epoch": 0.014426404775448383, + "grad_norm": 1.1027394533157349, + "learning_rate": 1e-05, + "loss": 0.2279, + "step": 4210 + }, + { + "epoch": 0.014460671770164413, + "grad_norm": 1.1777397394180298, + "learning_rate": 1e-05, + "loss": 0.2293, + "step": 4220 + }, + { + "epoch": 0.014494938764880443, + "grad_norm": 1.01688551902771, + "learning_rate": 1e-05, + "loss": 0.2275, + "step": 4230 + }, + { + "epoch": 0.014529205759596472, + "grad_norm": 1.1520488262176514, + "learning_rate": 1e-05, + "loss": 0.2301, + "step": 4240 + }, + { + "epoch": 0.0145634727543125, + "grad_norm": 1.2820484638214111, + "learning_rate": 1e-05, + "loss": 0.2205, + "step": 4250 + }, + { + "epoch": 0.01459773974902853, + "grad_norm": 1.169291377067566, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 4260 + }, + { + "epoch": 0.01463200674374456, + "grad_norm": 1.1135886907577515, + "learning_rate": 1e-05, + "loss": 0.2384, + "step": 4270 + }, + { + "epoch": 0.01466627373846059, + "grad_norm": 1.0846205949783325, + "learning_rate": 1e-05, + "loss": 0.223, + "step": 4280 + }, + { + "epoch": 0.014700540733176618, + "grad_norm": 0.981488049030304, + "learning_rate": 1e-05, + "loss": 0.2092, + "step": 4290 + }, + { + "epoch": 0.014734807727892648, + "grad_norm": 1.0437407493591309, + "learning_rate": 1e-05, + "loss": 0.2293, + "step": 4300 + }, + { + "epoch": 0.014769074722608678, + "grad_norm": 1.005792260169983, + "learning_rate": 1e-05, + "loss": 0.2286, + "step": 4310 + }, + { + "epoch": 0.014803341717324708, + "grad_norm": 1.1903142929077148, + "learning_rate": 1e-05, + "loss": 0.231, + "step": 4320 + }, + { + "epoch": 0.014837608712040736, + "grad_norm": 1.1308993101119995, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 4330 + }, + { + "epoch": 0.014871875706756766, + "grad_norm": 1.0948210954666138, + "learning_rate": 1e-05, + "loss": 0.213, + "step": 4340 + }, + { + "epoch": 0.014906142701472795, + "grad_norm": 1.2674663066864014, + "learning_rate": 1e-05, + "loss": 0.2432, + "step": 4350 + }, + { + "epoch": 0.014940409696188825, + "grad_norm": 1.4228485822677612, + "learning_rate": 1e-05, + "loss": 0.2491, + "step": 4360 + }, + { + "epoch": 0.014974676690904853, + "grad_norm": 1.1533160209655762, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 4370 + }, + { + "epoch": 0.015008943685620883, + "grad_norm": 1.1454424858093262, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 4380 + }, + { + "epoch": 0.015043210680336913, + "grad_norm": 1.2944281101226807, + "learning_rate": 1e-05, + "loss": 0.2651, + "step": 4390 + }, + { + "epoch": 0.015077477675052943, + "grad_norm": 1.2148584127426147, + "learning_rate": 1e-05, + "loss": 0.2694, + "step": 4400 + }, + { + "epoch": 0.015111744669768971, + "grad_norm": 1.091282844543457, + "learning_rate": 1e-05, + "loss": 0.2672, + "step": 4410 + }, + { + "epoch": 0.015146011664485001, + "grad_norm": 1.2254445552825928, + "learning_rate": 1e-05, + "loss": 0.2583, + "step": 4420 + }, + { + "epoch": 0.01518027865920103, + "grad_norm": 1.367516279220581, + "learning_rate": 1e-05, + "loss": 0.2586, + "step": 4430 + }, + { + "epoch": 0.01521454565391706, + "grad_norm": 1.1858383417129517, + "learning_rate": 1e-05, + "loss": 0.2764, + "step": 4440 + }, + { + "epoch": 0.01524881264863309, + "grad_norm": 1.1331857442855835, + "learning_rate": 1e-05, + "loss": 0.2577, + "step": 4450 + }, + { + "epoch": 0.015283079643349119, + "grad_norm": 1.2343239784240723, + "learning_rate": 1e-05, + "loss": 0.2661, + "step": 4460 + }, + { + "epoch": 0.015317346638065148, + "grad_norm": 1.0893656015396118, + "learning_rate": 1e-05, + "loss": 0.2538, + "step": 4470 + }, + { + "epoch": 0.015351613632781178, + "grad_norm": 1.1467857360839844, + "learning_rate": 1e-05, + "loss": 0.2496, + "step": 4480 + }, + { + "epoch": 0.015385880627497208, + "grad_norm": 1.2753335237503052, + "learning_rate": 1e-05, + "loss": 0.2797, + "step": 4490 + }, + { + "epoch": 0.015420147622213236, + "grad_norm": 1.1355762481689453, + "learning_rate": 1e-05, + "loss": 0.2672, + "step": 4500 + }, + { + "epoch": 0.015420147622213236, + "eval_cer": 13.159287352266713, + "eval_loss": 0.24996142089366913, + "eval_normalized_cer": 9.59232613908873, + "eval_runtime": 228.0477, + "eval_samples_per_second": 2.245, + "eval_steps_per_second": 0.035, + "step": 4500 + }, + { + "epoch": 0.015454414616929266, + "grad_norm": 1.2256762981414795, + "learning_rate": 1e-05, + "loss": 0.2662, + "step": 4510 + }, + { + "epoch": 0.015488681611645296, + "grad_norm": 1.0631389617919922, + "learning_rate": 1e-05, + "loss": 0.2596, + "step": 4520 + }, + { + "epoch": 0.015522948606361326, + "grad_norm": 1.0759390592575073, + "learning_rate": 1e-05, + "loss": 0.2553, + "step": 4530 + }, + { + "epoch": 0.015557215601077354, + "grad_norm": 1.1867231130599976, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 4540 + }, + { + "epoch": 0.015591482595793384, + "grad_norm": 1.1203633546829224, + "learning_rate": 1e-05, + "loss": 0.2732, + "step": 4550 + }, + { + "epoch": 0.015625749590509413, + "grad_norm": 1.1223920583724976, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 4560 + }, + { + "epoch": 0.015660016585225443, + "grad_norm": 1.066497564315796, + "learning_rate": 1e-05, + "loss": 0.2456, + "step": 4570 + }, + { + "epoch": 0.015694283579941473, + "grad_norm": 1.2520133256912231, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 4580 + }, + { + "epoch": 0.015728550574657503, + "grad_norm": 1.3602423667907715, + "learning_rate": 1e-05, + "loss": 0.2698, + "step": 4590 + }, + { + "epoch": 0.01576281756937353, + "grad_norm": 1.1748729944229126, + "learning_rate": 1e-05, + "loss": 0.2621, + "step": 4600 + }, + { + "epoch": 0.01579708456408956, + "grad_norm": 0.9431802034378052, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 4610 + }, + { + "epoch": 0.01583135155880559, + "grad_norm": 1.0146753787994385, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 4620 + }, + { + "epoch": 0.01586561855352162, + "grad_norm": 1.1340891122817993, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 4630 + }, + { + "epoch": 0.01589988554823765, + "grad_norm": 1.1456454992294312, + "learning_rate": 1e-05, + "loss": 0.2307, + "step": 4640 + }, + { + "epoch": 0.01593415254295368, + "grad_norm": 1.1026827096939087, + "learning_rate": 1e-05, + "loss": 0.2295, + "step": 4650 + }, + { + "epoch": 0.01596841953766971, + "grad_norm": 1.2215088605880737, + "learning_rate": 1e-05, + "loss": 0.245, + "step": 4660 + }, + { + "epoch": 0.01600268653238574, + "grad_norm": 1.1760615110397339, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 4670 + }, + { + "epoch": 0.016036953527101765, + "grad_norm": 1.1690876483917236, + "learning_rate": 1e-05, + "loss": 0.2282, + "step": 4680 + }, + { + "epoch": 0.016071220521817794, + "grad_norm": 1.182026743888855, + "learning_rate": 1e-05, + "loss": 0.2351, + "step": 4690 + }, + { + "epoch": 0.016105487516533824, + "grad_norm": 1.0182474851608276, + "learning_rate": 1e-05, + "loss": 0.2284, + "step": 4700 + }, + { + "epoch": 0.016139754511249854, + "grad_norm": 1.2531431913375854, + "learning_rate": 1e-05, + "loss": 0.244, + "step": 4710 + }, + { + "epoch": 0.016174021505965884, + "grad_norm": 0.9633692502975464, + "learning_rate": 1e-05, + "loss": 0.2297, + "step": 4720 + }, + { + "epoch": 0.016208288500681914, + "grad_norm": 1.1144667863845825, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 4730 + }, + { + "epoch": 0.016242555495397944, + "grad_norm": 1.0768555402755737, + "learning_rate": 1e-05, + "loss": 0.2216, + "step": 4740 + }, + { + "epoch": 0.016276822490113974, + "grad_norm": 1.2052035331726074, + "learning_rate": 1e-05, + "loss": 0.2278, + "step": 4750 + }, + { + "epoch": 0.01631108948483, + "grad_norm": 1.0291496515274048, + "learning_rate": 1e-05, + "loss": 0.2226, + "step": 4760 + }, + { + "epoch": 0.01634535647954603, + "grad_norm": 1.2100346088409424, + "learning_rate": 1e-05, + "loss": 0.2278, + "step": 4770 + }, + { + "epoch": 0.01637962347426206, + "grad_norm": 1.214861273765564, + "learning_rate": 1e-05, + "loss": 0.2313, + "step": 4780 + }, + { + "epoch": 0.01641389046897809, + "grad_norm": 1.137210726737976, + "learning_rate": 1e-05, + "loss": 0.2235, + "step": 4790 + }, + { + "epoch": 0.01644815746369412, + "grad_norm": 1.046673059463501, + "learning_rate": 1e-05, + "loss": 0.2231, + "step": 4800 + }, + { + "epoch": 0.01648242445841015, + "grad_norm": 1.08164644241333, + "learning_rate": 1e-05, + "loss": 0.2235, + "step": 4810 + }, + { + "epoch": 0.01651669145312618, + "grad_norm": 1.1432491540908813, + "learning_rate": 1e-05, + "loss": 0.246, + "step": 4820 + }, + { + "epoch": 0.01655095844784221, + "grad_norm": 1.1684173345565796, + "learning_rate": 1e-05, + "loss": 0.218, + "step": 4830 + }, + { + "epoch": 0.016585225442558235, + "grad_norm": 1.0895615816116333, + "learning_rate": 1e-05, + "loss": 0.2109, + "step": 4840 + }, + { + "epoch": 0.016619492437274265, + "grad_norm": 1.1505770683288574, + "learning_rate": 1e-05, + "loss": 0.2283, + "step": 4850 + }, + { + "epoch": 0.016653759431990295, + "grad_norm": 1.3385730981826782, + "learning_rate": 1e-05, + "loss": 0.2344, + "step": 4860 + }, + { + "epoch": 0.016688026426706325, + "grad_norm": 1.109035611152649, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 4870 + }, + { + "epoch": 0.016722293421422355, + "grad_norm": 1.1834880113601685, + "learning_rate": 1e-05, + "loss": 0.2247, + "step": 4880 + }, + { + "epoch": 0.016756560416138384, + "grad_norm": 1.2369152307510376, + "learning_rate": 1e-05, + "loss": 0.2449, + "step": 4890 + }, + { + "epoch": 0.016790827410854414, + "grad_norm": 1.131173014640808, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 4900 + }, + { + "epoch": 0.016825094405570444, + "grad_norm": 1.1100351810455322, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 4910 + }, + { + "epoch": 0.01685936140028647, + "grad_norm": 1.1857340335845947, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 4920 + }, + { + "epoch": 0.0168936283950025, + "grad_norm": 1.1568819284439087, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 4930 + }, + { + "epoch": 0.01692789538971853, + "grad_norm": 1.104872465133667, + "learning_rate": 1e-05, + "loss": 0.2449, + "step": 4940 + }, + { + "epoch": 0.01696216238443456, + "grad_norm": 1.0907660722732544, + "learning_rate": 1e-05, + "loss": 0.2496, + "step": 4950 + }, + { + "epoch": 0.01699642937915059, + "grad_norm": 1.1100903749465942, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 4960 + }, + { + "epoch": 0.01703069637386662, + "grad_norm": 1.141200065612793, + "learning_rate": 1e-05, + "loss": 0.2459, + "step": 4970 + }, + { + "epoch": 0.01706496336858265, + "grad_norm": 1.2853361368179321, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 4980 + }, + { + "epoch": 0.01709923036329868, + "grad_norm": 1.1542645692825317, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 4990 + }, + { + "epoch": 0.017133497358014706, + "grad_norm": 1.2022640705108643, + "learning_rate": 1e-05, + "loss": 0.2371, + "step": 5000 + }, + { + "epoch": 0.017133497358014706, + "eval_cer": 12.92115011465867, + "eval_loss": 0.2521001100540161, + "eval_normalized_cer": 9.30255795363709, + "eval_runtime": 227.4868, + "eval_samples_per_second": 2.251, + "eval_steps_per_second": 0.035, + "step": 5000 + }, + { + "epoch": 0.017167764352730736, + "grad_norm": 1.0765001773834229, + "learning_rate": 1e-05, + "loss": 0.2455, + "step": 5010 + }, + { + "epoch": 0.017202031347446765, + "grad_norm": 1.0711493492126465, + "learning_rate": 1e-05, + "loss": 0.2422, + "step": 5020 + }, + { + "epoch": 0.017236298342162795, + "grad_norm": 1.0719484090805054, + "learning_rate": 1e-05, + "loss": 0.2531, + "step": 5030 + }, + { + "epoch": 0.017270565336878825, + "grad_norm": 1.1884721517562866, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 5040 + }, + { + "epoch": 0.017304832331594855, + "grad_norm": 1.068827509880066, + "learning_rate": 1e-05, + "loss": 0.2474, + "step": 5050 + }, + { + "epoch": 0.017339099326310885, + "grad_norm": 1.1308655738830566, + "learning_rate": 1e-05, + "loss": 0.2627, + "step": 5060 + }, + { + "epoch": 0.017373366321026915, + "grad_norm": 1.1527314186096191, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 5070 + }, + { + "epoch": 0.017407633315742944, + "grad_norm": 1.1800657510757446, + "learning_rate": 1e-05, + "loss": 0.2587, + "step": 5080 + }, + { + "epoch": 0.01744190031045897, + "grad_norm": 1.095189094543457, + "learning_rate": 1e-05, + "loss": 0.2424, + "step": 5090 + }, + { + "epoch": 0.017476167305175, + "grad_norm": 1.109617829322815, + "learning_rate": 1e-05, + "loss": 0.2543, + "step": 5100 + }, + { + "epoch": 0.01751043429989103, + "grad_norm": 1.2110544443130493, + "learning_rate": 1e-05, + "loss": 0.2687, + "step": 5110 + }, + { + "epoch": 0.01754470129460706, + "grad_norm": 1.0466723442077637, + "learning_rate": 1e-05, + "loss": 0.2424, + "step": 5120 + }, + { + "epoch": 0.01757896828932309, + "grad_norm": 1.2060648202896118, + "learning_rate": 1e-05, + "loss": 0.2337, + "step": 5130 + }, + { + "epoch": 0.01761323528403912, + "grad_norm": 1.203142762184143, + "learning_rate": 1e-05, + "loss": 0.2556, + "step": 5140 + }, + { + "epoch": 0.01764750227875515, + "grad_norm": 1.0751283168792725, + "learning_rate": 1e-05, + "loss": 0.2235, + "step": 5150 + }, + { + "epoch": 0.01768176927347118, + "grad_norm": 1.1377781629562378, + "learning_rate": 1e-05, + "loss": 0.2448, + "step": 5160 + }, + { + "epoch": 0.017716036268187206, + "grad_norm": 1.147454023361206, + "learning_rate": 1e-05, + "loss": 0.2172, + "step": 5170 + }, + { + "epoch": 0.017750303262903236, + "grad_norm": 1.129897952079773, + "learning_rate": 1e-05, + "loss": 0.2418, + "step": 5180 + }, + { + "epoch": 0.017784570257619266, + "grad_norm": 1.1261131763458252, + "learning_rate": 1e-05, + "loss": 0.2328, + "step": 5190 + }, + { + "epoch": 0.017818837252335296, + "grad_norm": 1.0794824361801147, + "learning_rate": 1e-05, + "loss": 0.2546, + "step": 5200 + }, + { + "epoch": 0.017853104247051325, + "grad_norm": 1.1870142221450806, + "learning_rate": 1e-05, + "loss": 0.249, + "step": 5210 + }, + { + "epoch": 0.017887371241767355, + "grad_norm": 1.0414400100708008, + "learning_rate": 1e-05, + "loss": 0.2285, + "step": 5220 + }, + { + "epoch": 0.017921638236483385, + "grad_norm": 1.173405647277832, + "learning_rate": 1e-05, + "loss": 0.2529, + "step": 5230 + }, + { + "epoch": 0.017955905231199415, + "grad_norm": 1.039650797843933, + "learning_rate": 1e-05, + "loss": 0.2321, + "step": 5240 + }, + { + "epoch": 0.01799017222591544, + "grad_norm": 1.0359266996383667, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 5250 + }, + { + "epoch": 0.01802443922063147, + "grad_norm": 1.0630840063095093, + "learning_rate": 1e-05, + "loss": 0.2117, + "step": 5260 + }, + { + "epoch": 0.0180587062153475, + "grad_norm": 1.0937180519104004, + "learning_rate": 1e-05, + "loss": 0.2454, + "step": 5270 + }, + { + "epoch": 0.01809297321006353, + "grad_norm": 1.1015993356704712, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 5280 + }, + { + "epoch": 0.01812724020477956, + "grad_norm": 1.060584545135498, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 5290 + }, + { + "epoch": 0.01816150719949559, + "grad_norm": 1.1389795541763306, + "learning_rate": 1e-05, + "loss": 0.233, + "step": 5300 + }, + { + "epoch": 0.01819577419421162, + "grad_norm": 1.0018917322158813, + "learning_rate": 1e-05, + "loss": 0.2453, + "step": 5310 + }, + { + "epoch": 0.01823004118892765, + "grad_norm": 1.0546092987060547, + "learning_rate": 1e-05, + "loss": 0.2333, + "step": 5320 + }, + { + "epoch": 0.018264308183643677, + "grad_norm": 1.1121848821640015, + "learning_rate": 1e-05, + "loss": 0.2317, + "step": 5330 + }, + { + "epoch": 0.018298575178359706, + "grad_norm": 1.1613191366195679, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 5340 + }, + { + "epoch": 0.018332842173075736, + "grad_norm": 1.1250524520874023, + "learning_rate": 1e-05, + "loss": 0.2471, + "step": 5350 + }, + { + "epoch": 0.018367109167791766, + "grad_norm": 1.0905226469039917, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 5360 + }, + { + "epoch": 0.018401376162507796, + "grad_norm": 0.9885173439979553, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 5370 + }, + { + "epoch": 0.018435643157223826, + "grad_norm": 1.288758635520935, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 5380 + }, + { + "epoch": 0.018469910151939856, + "grad_norm": 1.2433462142944336, + "learning_rate": 1e-05, + "loss": 0.2427, + "step": 5390 + }, + { + "epoch": 0.018504177146655885, + "grad_norm": 1.2367336750030518, + "learning_rate": 1e-05, + "loss": 0.2511, + "step": 5400 + }, + { + "epoch": 0.018538444141371912, + "grad_norm": 1.1871395111083984, + "learning_rate": 1e-05, + "loss": 0.2276, + "step": 5410 + }, + { + "epoch": 0.01857271113608794, + "grad_norm": 0.9569379091262817, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 5420 + }, + { + "epoch": 0.01860697813080397, + "grad_norm": 1.1487014293670654, + "learning_rate": 1e-05, + "loss": 0.2295, + "step": 5430 + }, + { + "epoch": 0.01864124512552, + "grad_norm": 1.0800844430923462, + "learning_rate": 1e-05, + "loss": 0.2247, + "step": 5440 + }, + { + "epoch": 0.01867551212023603, + "grad_norm": 1.1834380626678467, + "learning_rate": 1e-05, + "loss": 0.226, + "step": 5450 + }, + { + "epoch": 0.01870977911495206, + "grad_norm": 1.0035191774368286, + "learning_rate": 1e-05, + "loss": 0.2414, + "step": 5460 + }, + { + "epoch": 0.01874404610966809, + "grad_norm": 1.0685466527938843, + "learning_rate": 1e-05, + "loss": 0.2449, + "step": 5470 + }, + { + "epoch": 0.01877831310438412, + "grad_norm": 1.1921565532684326, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 5480 + }, + { + "epoch": 0.018812580099100147, + "grad_norm": 1.1201281547546387, + "learning_rate": 1e-05, + "loss": 0.255, + "step": 5490 + }, + { + "epoch": 0.018846847093816177, + "grad_norm": 1.1162866353988647, + "learning_rate": 1e-05, + "loss": 0.2426, + "step": 5500 + }, + { + "epoch": 0.018846847093816177, + "eval_cer": 13.238666431469396, + "eval_loss": 0.25262224674224854, + "eval_normalized_cer": 9.562350119904076, + "eval_runtime": 229.0802, + "eval_samples_per_second": 2.235, + "eval_steps_per_second": 0.035, + "step": 5500 + }, + { + "epoch": 0.018881114088532207, + "grad_norm": 1.0215845108032227, + "learning_rate": 1e-05, + "loss": 0.2368, + "step": 5510 + }, + { + "epoch": 0.018915381083248237, + "grad_norm": 1.0062447786331177, + "learning_rate": 1e-05, + "loss": 0.2308, + "step": 5520 + }, + { + "epoch": 0.018949648077964266, + "grad_norm": 1.223649024963379, + "learning_rate": 1e-05, + "loss": 0.2409, + "step": 5530 + }, + { + "epoch": 0.018983915072680296, + "grad_norm": 1.2076172828674316, + "learning_rate": 1e-05, + "loss": 0.2236, + "step": 5540 + }, + { + "epoch": 0.019018182067396326, + "grad_norm": 1.154416561126709, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 5550 + }, + { + "epoch": 0.019052449062112356, + "grad_norm": 1.284858226776123, + "learning_rate": 1e-05, + "loss": 0.2321, + "step": 5560 + }, + { + "epoch": 0.019086716056828382, + "grad_norm": 1.0406948328018188, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 5570 + }, + { + "epoch": 0.019120983051544412, + "grad_norm": 1.1980571746826172, + "learning_rate": 1e-05, + "loss": 0.2274, + "step": 5580 + }, + { + "epoch": 0.019155250046260442, + "grad_norm": 1.073560357093811, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 5590 + }, + { + "epoch": 0.019189517040976472, + "grad_norm": 1.0982617139816284, + "learning_rate": 1e-05, + "loss": 0.2391, + "step": 5600 + }, + { + "epoch": 0.019223784035692502, + "grad_norm": 1.015085220336914, + "learning_rate": 1e-05, + "loss": 0.2269, + "step": 5610 + }, + { + "epoch": 0.01925805103040853, + "grad_norm": 1.238585352897644, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 5620 + }, + { + "epoch": 0.01929231802512456, + "grad_norm": 1.3326079845428467, + "learning_rate": 1e-05, + "loss": 0.25, + "step": 5630 + }, + { + "epoch": 0.01932658501984059, + "grad_norm": 1.1263608932495117, + "learning_rate": 1e-05, + "loss": 0.234, + "step": 5640 + }, + { + "epoch": 0.019360852014556618, + "grad_norm": 1.083595633506775, + "learning_rate": 1e-05, + "loss": 0.2504, + "step": 5650 + }, + { + "epoch": 0.019395119009272647, + "grad_norm": 1.0787022113800049, + "learning_rate": 1e-05, + "loss": 0.2248, + "step": 5660 + }, + { + "epoch": 0.019429386003988677, + "grad_norm": 1.312565803527832, + "learning_rate": 1e-05, + "loss": 0.263, + "step": 5670 + }, + { + "epoch": 0.019463652998704707, + "grad_norm": 1.0305407047271729, + "learning_rate": 1e-05, + "loss": 0.2358, + "step": 5680 + }, + { + "epoch": 0.019497919993420737, + "grad_norm": 1.0905306339263916, + "learning_rate": 1e-05, + "loss": 0.2358, + "step": 5690 + }, + { + "epoch": 0.019532186988136767, + "grad_norm": 1.1105730533599854, + "learning_rate": 1e-05, + "loss": 0.2371, + "step": 5700 + }, + { + "epoch": 0.019566453982852797, + "grad_norm": 1.1664555072784424, + "learning_rate": 1e-05, + "loss": 0.244, + "step": 5710 + }, + { + "epoch": 0.019600720977568827, + "grad_norm": 1.0702719688415527, + "learning_rate": 1e-05, + "loss": 0.2305, + "step": 5720 + }, + { + "epoch": 0.019634987972284856, + "grad_norm": 1.0736626386642456, + "learning_rate": 1e-05, + "loss": 0.2406, + "step": 5730 + }, + { + "epoch": 0.019669254967000883, + "grad_norm": 1.0510461330413818, + "learning_rate": 1e-05, + "loss": 0.2335, + "step": 5740 + }, + { + "epoch": 0.019703521961716913, + "grad_norm": 1.0435370206832886, + "learning_rate": 1e-05, + "loss": 0.2211, + "step": 5750 + }, + { + "epoch": 0.019737788956432942, + "grad_norm": 1.2461049556732178, + "learning_rate": 1e-05, + "loss": 0.2188, + "step": 5760 + }, + { + "epoch": 0.019772055951148972, + "grad_norm": 1.0351046323776245, + "learning_rate": 1e-05, + "loss": 0.2269, + "step": 5770 + }, + { + "epoch": 0.019806322945865002, + "grad_norm": 1.124671459197998, + "learning_rate": 1e-05, + "loss": 0.2284, + "step": 5780 + }, + { + "epoch": 0.019840589940581032, + "grad_norm": 1.145488977432251, + "learning_rate": 1e-05, + "loss": 0.2415, + "step": 5790 + }, + { + "epoch": 0.019874856935297062, + "grad_norm": 1.1410046815872192, + "learning_rate": 1e-05, + "loss": 0.2296, + "step": 5800 + }, + { + "epoch": 0.01990912393001309, + "grad_norm": 1.2782517671585083, + "learning_rate": 1e-05, + "loss": 0.2367, + "step": 5810 + }, + { + "epoch": 0.019943390924729118, + "grad_norm": 1.204562783241272, + "learning_rate": 1e-05, + "loss": 0.2289, + "step": 5820 + }, + { + "epoch": 0.019977657919445148, + "grad_norm": 1.1141811609268188, + "learning_rate": 1e-05, + "loss": 0.2223, + "step": 5830 + }, + { + "epoch": 0.020011924914161178, + "grad_norm": 1.1790316104888916, + "learning_rate": 1e-05, + "loss": 0.2308, + "step": 5840 + }, + { + "epoch": 0.020046191908877208, + "grad_norm": 1.0944266319274902, + "learning_rate": 1e-05, + "loss": 0.2366, + "step": 5850 + }, + { + "epoch": 0.020080458903593237, + "grad_norm": 1.0892263650894165, + "learning_rate": 1e-05, + "loss": 0.2384, + "step": 5860 + }, + { + "epoch": 0.020114725898309267, + "grad_norm": 1.1419873237609863, + "learning_rate": 1e-05, + "loss": 0.2414, + "step": 5870 + }, + { + "epoch": 0.020148992893025297, + "grad_norm": 1.2230783700942993, + "learning_rate": 1e-05, + "loss": 0.2394, + "step": 5880 + }, + { + "epoch": 0.020183259887741327, + "grad_norm": 1.1309173107147217, + "learning_rate": 1e-05, + "loss": 0.2561, + "step": 5890 + }, + { + "epoch": 0.020217526882457353, + "grad_norm": 1.2405802011489868, + "learning_rate": 1e-05, + "loss": 0.259, + "step": 5900 + }, + { + "epoch": 0.020251793877173383, + "grad_norm": 1.2853388786315918, + "learning_rate": 1e-05, + "loss": 0.2668, + "step": 5910 + }, + { + "epoch": 0.020286060871889413, + "grad_norm": 1.299046277999878, + "learning_rate": 1e-05, + "loss": 0.251, + "step": 5920 + }, + { + "epoch": 0.020320327866605443, + "grad_norm": 1.142052173614502, + "learning_rate": 1e-05, + "loss": 0.2655, + "step": 5930 + }, + { + "epoch": 0.020354594861321473, + "grad_norm": 1.3770766258239746, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 5940 + }, + { + "epoch": 0.020388861856037502, + "grad_norm": 1.1458237171173096, + "learning_rate": 1e-05, + "loss": 0.2742, + "step": 5950 + }, + { + "epoch": 0.020423128850753532, + "grad_norm": 1.3130786418914795, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 5960 + }, + { + "epoch": 0.020457395845469562, + "grad_norm": 1.2816088199615479, + "learning_rate": 1e-05, + "loss": 0.2593, + "step": 5970 + }, + { + "epoch": 0.02049166284018559, + "grad_norm": 1.0405460596084595, + "learning_rate": 1e-05, + "loss": 0.2608, + "step": 5980 + }, + { + "epoch": 0.02052592983490162, + "grad_norm": 1.2035329341888428, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 5990 + }, + { + "epoch": 0.020560196829617648, + "grad_norm": 1.0495450496673584, + "learning_rate": 1e-05, + "loss": 0.2468, + "step": 6000 + }, + { + "epoch": 0.020560196829617648, + "eval_cer": 13.079908273064033, + "eval_loss": 0.2540421485900879, + "eval_normalized_cer": 9.292565947242206, + "eval_runtime": 227.4153, + "eval_samples_per_second": 2.251, + "eval_steps_per_second": 0.035, + "step": 6000 + }, + { + "epoch": 0.020594463824333678, + "grad_norm": 1.1614056825637817, + "learning_rate": 1e-05, + "loss": 0.2527, + "step": 6010 + }, + { + "epoch": 0.020628730819049708, + "grad_norm": 1.1835705041885376, + "learning_rate": 1e-05, + "loss": 0.2592, + "step": 6020 + }, + { + "epoch": 0.020662997813765738, + "grad_norm": 1.1335136890411377, + "learning_rate": 1e-05, + "loss": 0.2727, + "step": 6030 + }, + { + "epoch": 0.020697264808481768, + "grad_norm": 1.052079439163208, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 6040 + }, + { + "epoch": 0.020731531803197797, + "grad_norm": 1.096330165863037, + "learning_rate": 1e-05, + "loss": 0.2684, + "step": 6050 + }, + { + "epoch": 0.020765798797913824, + "grad_norm": 1.2359880208969116, + "learning_rate": 1e-05, + "loss": 0.2638, + "step": 6060 + }, + { + "epoch": 0.020800065792629854, + "grad_norm": 1.2259430885314941, + "learning_rate": 1e-05, + "loss": 0.2488, + "step": 6070 + }, + { + "epoch": 0.020834332787345883, + "grad_norm": 1.0531619787216187, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 6080 + }, + { + "epoch": 0.020868599782061913, + "grad_norm": 1.1754058599472046, + "learning_rate": 1e-05, + "loss": 0.254, + "step": 6090 + }, + { + "epoch": 0.020902866776777943, + "grad_norm": 1.0922538042068481, + "learning_rate": 1e-05, + "loss": 0.2522, + "step": 6100 + }, + { + "epoch": 0.020937133771493973, + "grad_norm": 1.1970179080963135, + "learning_rate": 1e-05, + "loss": 0.267, + "step": 6110 + }, + { + "epoch": 0.020971400766210003, + "grad_norm": 1.2625236511230469, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 6120 + }, + { + "epoch": 0.021005667760926033, + "grad_norm": 1.152846336364746, + "learning_rate": 1e-05, + "loss": 0.2429, + "step": 6130 + }, + { + "epoch": 0.02103993475564206, + "grad_norm": 1.1184160709381104, + "learning_rate": 1e-05, + "loss": 0.2566, + "step": 6140 + }, + { + "epoch": 0.02107420175035809, + "grad_norm": 1.1153484582901, + "learning_rate": 1e-05, + "loss": 0.2583, + "step": 6150 + }, + { + "epoch": 0.02110846874507412, + "grad_norm": 1.2822504043579102, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 6160 + }, + { + "epoch": 0.02114273573979015, + "grad_norm": 1.1332992315292358, + "learning_rate": 1e-05, + "loss": 0.2799, + "step": 6170 + }, + { + "epoch": 0.02117700273450618, + "grad_norm": 1.0284112691879272, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 6180 + }, + { + "epoch": 0.02121126972922221, + "grad_norm": 1.1097975969314575, + "learning_rate": 1e-05, + "loss": 0.2513, + "step": 6190 + }, + { + "epoch": 0.021245536723938238, + "grad_norm": 1.168990969657898, + "learning_rate": 1e-05, + "loss": 0.2843, + "step": 6200 + }, + { + "epoch": 0.021279803718654268, + "grad_norm": 0.9956926107406616, + "learning_rate": 1e-05, + "loss": 0.247, + "step": 6210 + }, + { + "epoch": 0.021314070713370294, + "grad_norm": 1.2191492319107056, + "learning_rate": 1e-05, + "loss": 0.2608, + "step": 6220 + }, + { + "epoch": 0.021348337708086324, + "grad_norm": 1.0872688293457031, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 6230 + }, + { + "epoch": 0.021382604702802354, + "grad_norm": 1.0746614933013916, + "learning_rate": 1e-05, + "loss": 0.244, + "step": 6240 + }, + { + "epoch": 0.021416871697518384, + "grad_norm": 1.1560328006744385, + "learning_rate": 1e-05, + "loss": 0.2639, + "step": 6250 + }, + { + "epoch": 0.021451138692234414, + "grad_norm": 1.1529641151428223, + "learning_rate": 1e-05, + "loss": 0.2585, + "step": 6260 + }, + { + "epoch": 0.021485405686950444, + "grad_norm": 1.0708386898040771, + "learning_rate": 1e-05, + "loss": 0.2669, + "step": 6270 + }, + { + "epoch": 0.021519672681666473, + "grad_norm": 1.208079218864441, + "learning_rate": 1e-05, + "loss": 0.2436, + "step": 6280 + }, + { + "epoch": 0.021553939676382503, + "grad_norm": 1.1871508359909058, + "learning_rate": 1e-05, + "loss": 0.2655, + "step": 6290 + }, + { + "epoch": 0.02158820667109853, + "grad_norm": 1.0997953414916992, + "learning_rate": 1e-05, + "loss": 0.2578, + "step": 6300 + }, + { + "epoch": 0.02162247366581456, + "grad_norm": 1.2404417991638184, + "learning_rate": 1e-05, + "loss": 0.2726, + "step": 6310 + }, + { + "epoch": 0.02165674066053059, + "grad_norm": 1.1724058389663696, + "learning_rate": 1e-05, + "loss": 0.2611, + "step": 6320 + }, + { + "epoch": 0.02169100765524662, + "grad_norm": 1.124932885169983, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 6330 + }, + { + "epoch": 0.02172527464996265, + "grad_norm": 1.129584550857544, + "learning_rate": 1e-05, + "loss": 0.2651, + "step": 6340 + }, + { + "epoch": 0.02175954164467868, + "grad_norm": 1.1869479417800903, + "learning_rate": 1e-05, + "loss": 0.2451, + "step": 6350 + }, + { + "epoch": 0.02179380863939471, + "grad_norm": 1.1753504276275635, + "learning_rate": 1e-05, + "loss": 0.2509, + "step": 6360 + }, + { + "epoch": 0.02182807563411074, + "grad_norm": 1.1704761981964111, + "learning_rate": 1e-05, + "loss": 0.2614, + "step": 6370 + }, + { + "epoch": 0.02186234262882677, + "grad_norm": 1.347970724105835, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 6380 + }, + { + "epoch": 0.021896609623542795, + "grad_norm": 1.0677597522735596, + "learning_rate": 1e-05, + "loss": 0.2539, + "step": 6390 + }, + { + "epoch": 0.021930876618258825, + "grad_norm": 1.1567541360855103, + "learning_rate": 1e-05, + "loss": 0.2621, + "step": 6400 + }, + { + "epoch": 0.021965143612974854, + "grad_norm": 1.1231553554534912, + "learning_rate": 1e-05, + "loss": 0.2453, + "step": 6410 + }, + { + "epoch": 0.021999410607690884, + "grad_norm": 1.0485198497772217, + "learning_rate": 1e-05, + "loss": 0.2503, + "step": 6420 + }, + { + "epoch": 0.022033677602406914, + "grad_norm": 1.12228262424469, + "learning_rate": 1e-05, + "loss": 0.2488, + "step": 6430 + }, + { + "epoch": 0.022067944597122944, + "grad_norm": 1.2610136270523071, + "learning_rate": 1e-05, + "loss": 0.2445, + "step": 6440 + }, + { + "epoch": 0.022102211591838974, + "grad_norm": 0.9546436071395874, + "learning_rate": 1e-05, + "loss": 0.226, + "step": 6450 + }, + { + "epoch": 0.022136478586555004, + "grad_norm": 1.3363466262817383, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 6460 + }, + { + "epoch": 0.02217074558127103, + "grad_norm": 1.1454704999923706, + "learning_rate": 1e-05, + "loss": 0.2434, + "step": 6470 + }, + { + "epoch": 0.02220501257598706, + "grad_norm": 1.1578549146652222, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 6480 + }, + { + "epoch": 0.02223927957070309, + "grad_norm": 1.096081018447876, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 6490 + }, + { + "epoch": 0.02227354656541912, + "grad_norm": 1.2388731241226196, + "learning_rate": 1e-05, + "loss": 0.2457, + "step": 6500 + }, + { + "epoch": 0.02227354656541912, + "eval_cer": 12.929970012347859, + "eval_loss": 0.24839338660240173, + "eval_normalized_cer": 9.242605915267786, + "eval_runtime": 227.6401, + "eval_samples_per_second": 2.249, + "eval_steps_per_second": 0.035, + "step": 6500 + }, + { + "epoch": 0.02230781356013515, + "grad_norm": 1.0306715965270996, + "learning_rate": 1e-05, + "loss": 0.2393, + "step": 6510 + }, + { + "epoch": 0.02234208055485118, + "grad_norm": 1.1339504718780518, + "learning_rate": 1e-05, + "loss": 0.2563, + "step": 6520 + }, + { + "epoch": 0.02237634754956721, + "grad_norm": 0.912266731262207, + "learning_rate": 1e-05, + "loss": 0.2465, + "step": 6530 + }, + { + "epoch": 0.02241061454428324, + "grad_norm": 1.1917020082473755, + "learning_rate": 1e-05, + "loss": 0.2395, + "step": 6540 + }, + { + "epoch": 0.022444881538999265, + "grad_norm": 1.248515248298645, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 6550 + }, + { + "epoch": 0.022479148533715295, + "grad_norm": 1.180799961090088, + "learning_rate": 1e-05, + "loss": 0.2616, + "step": 6560 + }, + { + "epoch": 0.022513415528431325, + "grad_norm": 1.0700205564498901, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 6570 + }, + { + "epoch": 0.022547682523147355, + "grad_norm": 1.1814614534378052, + "learning_rate": 1e-05, + "loss": 0.2471, + "step": 6580 + }, + { + "epoch": 0.022581949517863385, + "grad_norm": 1.3973134756088257, + "learning_rate": 1e-05, + "loss": 0.2383, + "step": 6590 + }, + { + "epoch": 0.022616216512579414, + "grad_norm": 1.244265079498291, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 6600 + }, + { + "epoch": 0.022650483507295444, + "grad_norm": 1.1685833930969238, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 6610 + }, + { + "epoch": 0.022684750502011474, + "grad_norm": 1.1566667556762695, + "learning_rate": 1e-05, + "loss": 0.2443, + "step": 6620 + }, + { + "epoch": 0.0227190174967275, + "grad_norm": 1.0241929292678833, + "learning_rate": 1e-05, + "loss": 0.2412, + "step": 6630 + }, + { + "epoch": 0.02275328449144353, + "grad_norm": 1.0359474420547485, + "learning_rate": 1e-05, + "loss": 0.2374, + "step": 6640 + }, + { + "epoch": 0.02278755148615956, + "grad_norm": 1.040810227394104, + "learning_rate": 1e-05, + "loss": 0.2254, + "step": 6650 + }, + { + "epoch": 0.02282181848087559, + "grad_norm": 1.0343252420425415, + "learning_rate": 1e-05, + "loss": 0.2366, + "step": 6660 + }, + { + "epoch": 0.02285608547559162, + "grad_norm": 1.052739143371582, + "learning_rate": 1e-05, + "loss": 0.2273, + "step": 6670 + }, + { + "epoch": 0.02289035247030765, + "grad_norm": 1.0414966344833374, + "learning_rate": 1e-05, + "loss": 0.2082, + "step": 6680 + }, + { + "epoch": 0.02292461946502368, + "grad_norm": 1.2340532541275024, + "learning_rate": 1e-05, + "loss": 0.2241, + "step": 6690 + }, + { + "epoch": 0.02295888645973971, + "grad_norm": 0.9693310260772705, + "learning_rate": 1e-05, + "loss": 0.2322, + "step": 6700 + }, + { + "epoch": 0.022993153454455736, + "grad_norm": 1.103025197982788, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 6710 + }, + { + "epoch": 0.023027420449171766, + "grad_norm": 1.119689702987671, + "learning_rate": 1e-05, + "loss": 0.214, + "step": 6720 + }, + { + "epoch": 0.023061687443887795, + "grad_norm": 0.93172287940979, + "learning_rate": 1e-05, + "loss": 0.2094, + "step": 6730 + }, + { + "epoch": 0.023095954438603825, + "grad_norm": 1.0207446813583374, + "learning_rate": 1e-05, + "loss": 0.2238, + "step": 6740 + }, + { + "epoch": 0.023130221433319855, + "grad_norm": 1.200201392173767, + "learning_rate": 1e-05, + "loss": 0.218, + "step": 6750 + }, + { + "epoch": 0.023164488428035885, + "grad_norm": 1.1485291719436646, + "learning_rate": 1e-05, + "loss": 0.2314, + "step": 6760 + }, + { + "epoch": 0.023198755422751915, + "grad_norm": 1.2236285209655762, + "learning_rate": 1e-05, + "loss": 0.2326, + "step": 6770 + }, + { + "epoch": 0.023233022417467945, + "grad_norm": 1.1756523847579956, + "learning_rate": 1e-05, + "loss": 0.2122, + "step": 6780 + }, + { + "epoch": 0.02326728941218397, + "grad_norm": 1.0356839895248413, + "learning_rate": 1e-05, + "loss": 0.2078, + "step": 6790 + }, + { + "epoch": 0.0233015564069, + "grad_norm": 1.1896883249282837, + "learning_rate": 1e-05, + "loss": 0.2072, + "step": 6800 + }, + { + "epoch": 0.02333582340161603, + "grad_norm": 1.1080976724624634, + "learning_rate": 1e-05, + "loss": 0.2127, + "step": 6810 + }, + { + "epoch": 0.02337009039633206, + "grad_norm": 1.128263235092163, + "learning_rate": 1e-05, + "loss": 0.2282, + "step": 6820 + }, + { + "epoch": 0.02340435739104809, + "grad_norm": 1.0398188829421997, + "learning_rate": 1e-05, + "loss": 0.2095, + "step": 6830 + }, + { + "epoch": 0.02343862438576412, + "grad_norm": 1.1791975498199463, + "learning_rate": 1e-05, + "loss": 0.2216, + "step": 6840 + }, + { + "epoch": 0.02347289138048015, + "grad_norm": 1.1444710493087769, + "learning_rate": 1e-05, + "loss": 0.2447, + "step": 6850 + }, + { + "epoch": 0.02350715837519618, + "grad_norm": 1.136607050895691, + "learning_rate": 1e-05, + "loss": 0.2093, + "step": 6860 + }, + { + "epoch": 0.023541425369912206, + "grad_norm": 1.0915231704711914, + "learning_rate": 1e-05, + "loss": 0.2128, + "step": 6870 + }, + { + "epoch": 0.023575692364628236, + "grad_norm": 1.0416276454925537, + "learning_rate": 1e-05, + "loss": 0.2092, + "step": 6880 + }, + { + "epoch": 0.023609959359344266, + "grad_norm": 1.3693732023239136, + "learning_rate": 1e-05, + "loss": 0.2137, + "step": 6890 + }, + { + "epoch": 0.023644226354060296, + "grad_norm": 1.1747677326202393, + "learning_rate": 1e-05, + "loss": 0.2215, + "step": 6900 + }, + { + "epoch": 0.023678493348776326, + "grad_norm": 1.1593588590621948, + "learning_rate": 1e-05, + "loss": 0.2234, + "step": 6910 + }, + { + "epoch": 0.023712760343492355, + "grad_norm": 1.2322016954421997, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 6920 + }, + { + "epoch": 0.023747027338208385, + "grad_norm": 1.167648196220398, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 6930 + }, + { + "epoch": 0.023781294332924415, + "grad_norm": 1.0984666347503662, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 6940 + }, + { + "epoch": 0.023815561327640445, + "grad_norm": 1.1234291791915894, + "learning_rate": 1e-05, + "loss": 0.2532, + "step": 6950 + }, + { + "epoch": 0.02384982832235647, + "grad_norm": 1.2158063650131226, + "learning_rate": 1e-05, + "loss": 0.2567, + "step": 6960 + }, + { + "epoch": 0.0238840953170725, + "grad_norm": 1.0958101749420166, + "learning_rate": 1e-05, + "loss": 0.2387, + "step": 6970 + }, + { + "epoch": 0.02391836231178853, + "grad_norm": 1.1536844968795776, + "learning_rate": 1e-05, + "loss": 0.2712, + "step": 6980 + }, + { + "epoch": 0.02395262930650456, + "grad_norm": 1.2437007427215576, + "learning_rate": 1e-05, + "loss": 0.2563, + "step": 6990 + }, + { + "epoch": 0.02398689630122059, + "grad_norm": 1.0884592533111572, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 7000 + }, + { + "epoch": 0.02398689630122059, + "eval_cer": 13.079908273064033, + "eval_loss": 0.2514401376247406, + "eval_normalized_cer": 9.622302158273381, + "eval_runtime": 227.6705, + "eval_samples_per_second": 2.249, + "eval_steps_per_second": 0.035, + "step": 7000 + }, + { + "epoch": 0.02402116329593662, + "grad_norm": 1.2332980632781982, + "learning_rate": 1e-05, + "loss": 0.2543, + "step": 7010 + }, + { + "epoch": 0.02405543029065265, + "grad_norm": 1.1041260957717896, + "learning_rate": 1e-05, + "loss": 0.2663, + "step": 7020 + }, + { + "epoch": 0.02408969728536868, + "grad_norm": 1.1479183435440063, + "learning_rate": 1e-05, + "loss": 0.2528, + "step": 7030 + }, + { + "epoch": 0.024123964280084707, + "grad_norm": 1.103766918182373, + "learning_rate": 1e-05, + "loss": 0.2336, + "step": 7040 + }, + { + "epoch": 0.024158231274800736, + "grad_norm": 1.238996148109436, + "learning_rate": 1e-05, + "loss": 0.2436, + "step": 7050 + }, + { + "epoch": 0.024192498269516766, + "grad_norm": 1.2652095556259155, + "learning_rate": 1e-05, + "loss": 0.2464, + "step": 7060 + }, + { + "epoch": 0.024226765264232796, + "grad_norm": 1.180665373802185, + "learning_rate": 1e-05, + "loss": 0.2541, + "step": 7070 + }, + { + "epoch": 0.024261032258948826, + "grad_norm": 1.1601506471633911, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 7080 + }, + { + "epoch": 0.024295299253664856, + "grad_norm": 1.257034420967102, + "learning_rate": 1e-05, + "loss": 0.2446, + "step": 7090 + }, + { + "epoch": 0.024329566248380886, + "grad_norm": 1.0813285112380981, + "learning_rate": 1e-05, + "loss": 0.2546, + "step": 7100 + }, + { + "epoch": 0.024363833243096916, + "grad_norm": 1.1124157905578613, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 7110 + }, + { + "epoch": 0.024398100237812942, + "grad_norm": 1.0615211725234985, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 7120 + }, + { + "epoch": 0.024432367232528972, + "grad_norm": 1.185677409172058, + "learning_rate": 1e-05, + "loss": 0.2383, + "step": 7130 + }, + { + "epoch": 0.024466634227245, + "grad_norm": 1.1810061931610107, + "learning_rate": 1e-05, + "loss": 0.2603, + "step": 7140 + }, + { + "epoch": 0.02450090122196103, + "grad_norm": 1.155860424041748, + "learning_rate": 1e-05, + "loss": 0.2434, + "step": 7150 + }, + { + "epoch": 0.02453516821667706, + "grad_norm": 1.113008737564087, + "learning_rate": 1e-05, + "loss": 0.2529, + "step": 7160 + }, + { + "epoch": 0.02456943521139309, + "grad_norm": 1.1276872158050537, + "learning_rate": 1e-05, + "loss": 0.2265, + "step": 7170 + }, + { + "epoch": 0.02460370220610912, + "grad_norm": 1.149792194366455, + "learning_rate": 1e-05, + "loss": 0.2349, + "step": 7180 + }, + { + "epoch": 0.02463796920082515, + "grad_norm": 1.1619532108306885, + "learning_rate": 1e-05, + "loss": 0.2336, + "step": 7190 + }, + { + "epoch": 0.024672236195541177, + "grad_norm": 1.0760303735733032, + "learning_rate": 1e-05, + "loss": 0.2315, + "step": 7200 + }, + { + "epoch": 0.024706503190257207, + "grad_norm": 1.2807782888412476, + "learning_rate": 1e-05, + "loss": 0.2382, + "step": 7210 + }, + { + "epoch": 0.024740770184973237, + "grad_norm": 1.0910037755966187, + "learning_rate": 1e-05, + "loss": 0.2333, + "step": 7220 + }, + { + "epoch": 0.024775037179689267, + "grad_norm": 1.2938390970230103, + "learning_rate": 1e-05, + "loss": 0.2147, + "step": 7230 + }, + { + "epoch": 0.024809304174405297, + "grad_norm": 1.185542106628418, + "learning_rate": 1e-05, + "loss": 0.2232, + "step": 7240 + }, + { + "epoch": 0.024843571169121326, + "grad_norm": 1.0598995685577393, + "learning_rate": 1e-05, + "loss": 0.2278, + "step": 7250 + }, + { + "epoch": 0.024877838163837356, + "grad_norm": 1.1860477924346924, + "learning_rate": 1e-05, + "loss": 0.2179, + "step": 7260 + }, + { + "epoch": 0.024912105158553386, + "grad_norm": 1.1935844421386719, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 7270 + }, + { + "epoch": 0.024946372153269412, + "grad_norm": 1.0449039936065674, + "learning_rate": 1e-05, + "loss": 0.2307, + "step": 7280 + }, + { + "epoch": 0.024980639147985442, + "grad_norm": 1.0651369094848633, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 7290 + }, + { + "epoch": 0.025014906142701472, + "grad_norm": 1.0416852235794067, + "learning_rate": 1e-05, + "loss": 0.2208, + "step": 7300 + }, + { + "epoch": 0.025049173137417502, + "grad_norm": 1.0064860582351685, + "learning_rate": 1e-05, + "loss": 0.2227, + "step": 7310 + }, + { + "epoch": 0.025083440132133532, + "grad_norm": 1.0357342958450317, + "learning_rate": 1e-05, + "loss": 0.22, + "step": 7320 + }, + { + "epoch": 0.02511770712684956, + "grad_norm": 1.019918441772461, + "learning_rate": 1e-05, + "loss": 0.2396, + "step": 7330 + }, + { + "epoch": 0.02515197412156559, + "grad_norm": 1.0327798128128052, + "learning_rate": 1e-05, + "loss": 0.2118, + "step": 7340 + }, + { + "epoch": 0.02518624111628162, + "grad_norm": 0.9973874092102051, + "learning_rate": 1e-05, + "loss": 0.2275, + "step": 7350 + }, + { + "epoch": 0.025220508110997648, + "grad_norm": 1.093544840812683, + "learning_rate": 1e-05, + "loss": 0.2214, + "step": 7360 + }, + { + "epoch": 0.025254775105713678, + "grad_norm": 1.118829369544983, + "learning_rate": 1e-05, + "loss": 0.237, + "step": 7370 + }, + { + "epoch": 0.025289042100429707, + "grad_norm": 1.2009224891662598, + "learning_rate": 1e-05, + "loss": 0.2447, + "step": 7380 + }, + { + "epoch": 0.025323309095145737, + "grad_norm": 1.1427584886550903, + "learning_rate": 1e-05, + "loss": 0.234, + "step": 7390 + }, + { + "epoch": 0.025357576089861767, + "grad_norm": 0.9685842394828796, + "learning_rate": 1e-05, + "loss": 0.2231, + "step": 7400 + }, + { + "epoch": 0.025391843084577797, + "grad_norm": 1.165501356124878, + "learning_rate": 1e-05, + "loss": 0.2139, + "step": 7410 + }, + { + "epoch": 0.025426110079293827, + "grad_norm": 1.4023411273956299, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 7420 + }, + { + "epoch": 0.025460377074009857, + "grad_norm": 1.218546748161316, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 7430 + }, + { + "epoch": 0.025494644068725883, + "grad_norm": 1.4930671453475952, + "learning_rate": 1e-05, + "loss": 0.2466, + "step": 7440 + }, + { + "epoch": 0.025528911063441913, + "grad_norm": 1.145317554473877, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 7450 + }, + { + "epoch": 0.025563178058157943, + "grad_norm": 1.2366299629211426, + "learning_rate": 1e-05, + "loss": 0.2606, + "step": 7460 + }, + { + "epoch": 0.025597445052873972, + "grad_norm": 1.0542744398117065, + "learning_rate": 1e-05, + "loss": 0.2493, + "step": 7470 + }, + { + "epoch": 0.025631712047590002, + "grad_norm": 1.2272337675094604, + "learning_rate": 1e-05, + "loss": 0.2537, + "step": 7480 + }, + { + "epoch": 0.025665979042306032, + "grad_norm": 1.169912576675415, + "learning_rate": 1e-05, + "loss": 0.2581, + "step": 7490 + }, + { + "epoch": 0.025700246037022062, + "grad_norm": 1.1997913122177124, + "learning_rate": 1e-05, + "loss": 0.2547, + "step": 7500 + }, + { + "epoch": 0.025700246037022062, + "eval_cer": 12.859410830834362, + "eval_loss": 0.2470153123140335, + "eval_normalized_cer": 9.162669864108713, + "eval_runtime": 227.7782, + "eval_samples_per_second": 2.248, + "eval_steps_per_second": 0.035, + "step": 7500 + }, + { + "epoch": 0.025734513031738092, + "grad_norm": 1.0920944213867188, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 7510 + }, + { + "epoch": 0.025768780026454118, + "grad_norm": 1.349660038948059, + "learning_rate": 1e-05, + "loss": 0.2591, + "step": 7520 + }, + { + "epoch": 0.025803047021170148, + "grad_norm": 1.0097490549087524, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 7530 + }, + { + "epoch": 0.025837314015886178, + "grad_norm": 1.118241548538208, + "learning_rate": 1e-05, + "loss": 0.2603, + "step": 7540 + }, + { + "epoch": 0.025871581010602208, + "grad_norm": 1.078802466392517, + "learning_rate": 1e-05, + "loss": 0.2532, + "step": 7550 + }, + { + "epoch": 0.025905848005318238, + "grad_norm": 1.0794482231140137, + "learning_rate": 1e-05, + "loss": 0.2521, + "step": 7560 + }, + { + "epoch": 0.025940115000034267, + "grad_norm": 1.130106806755066, + "learning_rate": 1e-05, + "loss": 0.2574, + "step": 7570 + }, + { + "epoch": 0.025974381994750297, + "grad_norm": 1.112724781036377, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 7580 + }, + { + "epoch": 0.026008648989466327, + "grad_norm": 1.2646088600158691, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 7590 + }, + { + "epoch": 0.026042915984182357, + "grad_norm": 1.1961979866027832, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 7600 + }, + { + "epoch": 0.026077182978898383, + "grad_norm": 1.2568695545196533, + "learning_rate": 1e-05, + "loss": 0.245, + "step": 7610 + }, + { + "epoch": 0.026111449973614413, + "grad_norm": 1.0233054161071777, + "learning_rate": 1e-05, + "loss": 0.2429, + "step": 7620 + }, + { + "epoch": 0.026145716968330443, + "grad_norm": 1.4355731010437012, + "learning_rate": 1e-05, + "loss": 0.2623, + "step": 7630 + }, + { + "epoch": 0.026179983963046473, + "grad_norm": 0.9781149625778198, + "learning_rate": 1e-05, + "loss": 0.2436, + "step": 7640 + }, + { + "epoch": 0.026214250957762503, + "grad_norm": 1.085255742073059, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 7650 + }, + { + "epoch": 0.026248517952478533, + "grad_norm": 1.0647081136703491, + "learning_rate": 1e-05, + "loss": 0.2596, + "step": 7660 + }, + { + "epoch": 0.026282784947194562, + "grad_norm": 1.3411939144134521, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 7670 + }, + { + "epoch": 0.026317051941910592, + "grad_norm": 1.0778676271438599, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 7680 + }, + { + "epoch": 0.02635131893662662, + "grad_norm": 1.1606541872024536, + "learning_rate": 1e-05, + "loss": 0.2537, + "step": 7690 + }, + { + "epoch": 0.02638558593134265, + "grad_norm": 1.0706511735916138, + "learning_rate": 1e-05, + "loss": 0.2324, + "step": 7700 + }, + { + "epoch": 0.02641985292605868, + "grad_norm": 1.2074836492538452, + "learning_rate": 1e-05, + "loss": 0.2487, + "step": 7710 + }, + { + "epoch": 0.026454119920774708, + "grad_norm": 1.0147804021835327, + "learning_rate": 1e-05, + "loss": 0.2202, + "step": 7720 + }, + { + "epoch": 0.026488386915490738, + "grad_norm": 1.1806961297988892, + "learning_rate": 1e-05, + "loss": 0.2464, + "step": 7730 + }, + { + "epoch": 0.026522653910206768, + "grad_norm": 1.1552751064300537, + "learning_rate": 1e-05, + "loss": 0.2244, + "step": 7740 + }, + { + "epoch": 0.026556920904922798, + "grad_norm": 1.115871548652649, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 7750 + }, + { + "epoch": 0.026591187899638827, + "grad_norm": 1.0924640893936157, + "learning_rate": 1e-05, + "loss": 0.2237, + "step": 7760 + }, + { + "epoch": 0.026625454894354854, + "grad_norm": 1.021644115447998, + "learning_rate": 1e-05, + "loss": 0.2257, + "step": 7770 + }, + { + "epoch": 0.026659721889070884, + "grad_norm": 1.1757131814956665, + "learning_rate": 1e-05, + "loss": 0.2278, + "step": 7780 + }, + { + "epoch": 0.026693988883786914, + "grad_norm": 1.1914074420928955, + "learning_rate": 1e-05, + "loss": 0.2266, + "step": 7790 + }, + { + "epoch": 0.026728255878502943, + "grad_norm": 1.0416505336761475, + "learning_rate": 1e-05, + "loss": 0.2273, + "step": 7800 + }, + { + "epoch": 0.026762522873218973, + "grad_norm": 1.0241059064865112, + "learning_rate": 1e-05, + "loss": 0.2342, + "step": 7810 + }, + { + "epoch": 0.026796789867935003, + "grad_norm": 1.133334994316101, + "learning_rate": 1e-05, + "loss": 0.2303, + "step": 7820 + }, + { + "epoch": 0.026831056862651033, + "grad_norm": 1.1711792945861816, + "learning_rate": 1e-05, + "loss": 0.2333, + "step": 7830 + }, + { + "epoch": 0.026865323857367063, + "grad_norm": 1.1120338439941406, + "learning_rate": 1e-05, + "loss": 0.2474, + "step": 7840 + }, + { + "epoch": 0.02689959085208309, + "grad_norm": 1.1995311975479126, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 7850 + }, + { + "epoch": 0.02693385784679912, + "grad_norm": 1.1725718975067139, + "learning_rate": 1e-05, + "loss": 0.2361, + "step": 7860 + }, + { + "epoch": 0.02696812484151515, + "grad_norm": 0.9564438462257385, + "learning_rate": 1e-05, + "loss": 0.2266, + "step": 7870 + }, + { + "epoch": 0.02700239183623118, + "grad_norm": 1.140692114830017, + "learning_rate": 1e-05, + "loss": 0.2319, + "step": 7880 + }, + { + "epoch": 0.02703665883094721, + "grad_norm": 1.0812654495239258, + "learning_rate": 1e-05, + "loss": 0.2434, + "step": 7890 + }, + { + "epoch": 0.02707092582566324, + "grad_norm": 1.179500937461853, + "learning_rate": 1e-05, + "loss": 0.2191, + "step": 7900 + }, + { + "epoch": 0.027105192820379268, + "grad_norm": 1.1073647737503052, + "learning_rate": 1e-05, + "loss": 0.2315, + "step": 7910 + }, + { + "epoch": 0.027139459815095298, + "grad_norm": 1.093070387840271, + "learning_rate": 1e-05, + "loss": 0.2256, + "step": 7920 + }, + { + "epoch": 0.027173726809811324, + "grad_norm": 1.2253212928771973, + "learning_rate": 1e-05, + "loss": 0.2413, + "step": 7930 + }, + { + "epoch": 0.027207993804527354, + "grad_norm": 1.1531736850738525, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 7940 + }, + { + "epoch": 0.027242260799243384, + "grad_norm": 1.0366076231002808, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 7950 + }, + { + "epoch": 0.027276527793959414, + "grad_norm": 1.1657369136810303, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 7960 + }, + { + "epoch": 0.027310794788675444, + "grad_norm": 1.3050105571746826, + "learning_rate": 1e-05, + "loss": 0.2704, + "step": 7970 + }, + { + "epoch": 0.027345061783391474, + "grad_norm": 1.1378298997879028, + "learning_rate": 1e-05, + "loss": 0.2481, + "step": 7980 + }, + { + "epoch": 0.027379328778107503, + "grad_norm": 1.1434043645858765, + "learning_rate": 1e-05, + "loss": 0.2671, + "step": 7990 + }, + { + "epoch": 0.027413595772823533, + "grad_norm": 1.0899518728256226, + "learning_rate": 1e-05, + "loss": 0.2573, + "step": 8000 + }, + { + "epoch": 0.027413595772823533, + "eval_cer": 12.903510319280295, + "eval_loss": 0.2475583553314209, + "eval_normalized_cer": 9.362509992006395, + "eval_runtime": 228.4278, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.035, + "step": 8000 + }, + { + "epoch": 0.02744786276753956, + "grad_norm": 0.970212459564209, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 8010 + }, + { + "epoch": 0.02748212976225559, + "grad_norm": 1.0460257530212402, + "learning_rate": 1e-05, + "loss": 0.2606, + "step": 8020 + }, + { + "epoch": 0.02751639675697162, + "grad_norm": 1.116742491722107, + "learning_rate": 1e-05, + "loss": 0.2571, + "step": 8030 + }, + { + "epoch": 0.02755066375168765, + "grad_norm": 1.2562140226364136, + "learning_rate": 1e-05, + "loss": 0.2561, + "step": 8040 + }, + { + "epoch": 0.02758493074640368, + "grad_norm": 1.2499713897705078, + "learning_rate": 1e-05, + "loss": 0.2683, + "step": 8050 + }, + { + "epoch": 0.02761919774111971, + "grad_norm": 1.151715874671936, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 8060 + }, + { + "epoch": 0.02765346473583574, + "grad_norm": 1.2527892589569092, + "learning_rate": 1e-05, + "loss": 0.261, + "step": 8070 + }, + { + "epoch": 0.02768773173055177, + "grad_norm": 1.1776025295257568, + "learning_rate": 1e-05, + "loss": 0.2616, + "step": 8080 + }, + { + "epoch": 0.027721998725267795, + "grad_norm": 1.1632285118103027, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 8090 + }, + { + "epoch": 0.027756265719983825, + "grad_norm": 1.3266422748565674, + "learning_rate": 1e-05, + "loss": 0.2667, + "step": 8100 + }, + { + "epoch": 0.027790532714699855, + "grad_norm": 1.240424633026123, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 8110 + }, + { + "epoch": 0.027824799709415884, + "grad_norm": 1.1874525547027588, + "learning_rate": 1e-05, + "loss": 0.2505, + "step": 8120 + }, + { + "epoch": 0.027859066704131914, + "grad_norm": 1.0850279331207275, + "learning_rate": 1e-05, + "loss": 0.2556, + "step": 8130 + }, + { + "epoch": 0.027893333698847944, + "grad_norm": 1.203342318534851, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 8140 + }, + { + "epoch": 0.027927600693563974, + "grad_norm": 0.9685319066047668, + "learning_rate": 1e-05, + "loss": 0.2614, + "step": 8150 + }, + { + "epoch": 0.027961867688280004, + "grad_norm": 1.020749807357788, + "learning_rate": 1e-05, + "loss": 0.2763, + "step": 8160 + }, + { + "epoch": 0.02799613468299603, + "grad_norm": 1.1530399322509766, + "learning_rate": 1e-05, + "loss": 0.2544, + "step": 8170 + }, + { + "epoch": 0.02803040167771206, + "grad_norm": 1.0800687074661255, + "learning_rate": 1e-05, + "loss": 0.2628, + "step": 8180 + }, + { + "epoch": 0.02806466867242809, + "grad_norm": 1.1825618743896484, + "learning_rate": 1e-05, + "loss": 0.2524, + "step": 8190 + }, + { + "epoch": 0.02809893566714412, + "grad_norm": 1.176870346069336, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 8200 + }, + { + "epoch": 0.02813320266186015, + "grad_norm": 1.19720458984375, + "learning_rate": 1e-05, + "loss": 0.2801, + "step": 8210 + }, + { + "epoch": 0.02816746965657618, + "grad_norm": 1.0634618997573853, + "learning_rate": 1e-05, + "loss": 0.2607, + "step": 8220 + }, + { + "epoch": 0.02820173665129221, + "grad_norm": 1.1780894994735718, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 8230 + }, + { + "epoch": 0.02823600364600824, + "grad_norm": 1.18949294090271, + "learning_rate": 1e-05, + "loss": 0.2432, + "step": 8240 + }, + { + "epoch": 0.02827027064072427, + "grad_norm": 1.3350197076797485, + "learning_rate": 1e-05, + "loss": 0.2644, + "step": 8250 + }, + { + "epoch": 0.028304537635440295, + "grad_norm": 1.1507694721221924, + "learning_rate": 1e-05, + "loss": 0.254, + "step": 8260 + }, + { + "epoch": 0.028338804630156325, + "grad_norm": 1.0806615352630615, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 8270 + }, + { + "epoch": 0.028373071624872355, + "grad_norm": 1.1201471090316772, + "learning_rate": 1e-05, + "loss": 0.2553, + "step": 8280 + }, + { + "epoch": 0.028407338619588385, + "grad_norm": 1.0681666135787964, + "learning_rate": 1e-05, + "loss": 0.258, + "step": 8290 + }, + { + "epoch": 0.028441605614304415, + "grad_norm": 1.0958445072174072, + "learning_rate": 1e-05, + "loss": 0.2502, + "step": 8300 + }, + { + "epoch": 0.028475872609020444, + "grad_norm": 1.165635585784912, + "learning_rate": 1e-05, + "loss": 0.2642, + "step": 8310 + }, + { + "epoch": 0.028510139603736474, + "grad_norm": 0.9674690961837769, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 8320 + }, + { + "epoch": 0.028544406598452504, + "grad_norm": 1.239996314048767, + "learning_rate": 1e-05, + "loss": 0.2706, + "step": 8330 + }, + { + "epoch": 0.02857867359316853, + "grad_norm": 1.0063962936401367, + "learning_rate": 1e-05, + "loss": 0.2448, + "step": 8340 + }, + { + "epoch": 0.02861294058788456, + "grad_norm": 1.0466179847717285, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 8350 + }, + { + "epoch": 0.02864720758260059, + "grad_norm": Infinity, + "learning_rate": 1e-05, + "loss": 0.2595, + "step": 8360 + }, + { + "epoch": 0.02868147457731662, + "grad_norm": 1.1461595296859741, + "learning_rate": 1e-05, + "loss": 0.2515, + "step": 8370 + }, + { + "epoch": 0.02871574157203265, + "grad_norm": 1.2697845697402954, + "learning_rate": 1e-05, + "loss": 0.2641, + "step": 8380 + }, + { + "epoch": 0.02875000856674868, + "grad_norm": 1.2665945291519165, + "learning_rate": 1e-05, + "loss": 0.2613, + "step": 8390 + }, + { + "epoch": 0.02878427556146471, + "grad_norm": 1.1350281238555908, + "learning_rate": 1e-05, + "loss": 0.2524, + "step": 8400 + }, + { + "epoch": 0.02881854255618074, + "grad_norm": 1.0341808795928955, + "learning_rate": 1e-05, + "loss": 0.2466, + "step": 8410 + }, + { + "epoch": 0.028852809550896766, + "grad_norm": 1.1108484268188477, + "learning_rate": 1e-05, + "loss": 0.2471, + "step": 8420 + }, + { + "epoch": 0.028887076545612796, + "grad_norm": 1.059414029121399, + "learning_rate": 1e-05, + "loss": 0.2695, + "step": 8430 + }, + { + "epoch": 0.028921343540328825, + "grad_norm": 1.0888679027557373, + "learning_rate": 1e-05, + "loss": 0.2683, + "step": 8440 + }, + { + "epoch": 0.028955610535044855, + "grad_norm": 1.1649068593978882, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 8450 + }, + { + "epoch": 0.028989877529760885, + "grad_norm": 1.218563199043274, + "learning_rate": 1e-05, + "loss": 0.2456, + "step": 8460 + }, + { + "epoch": 0.029024144524476915, + "grad_norm": 1.3558833599090576, + "learning_rate": 1e-05, + "loss": 0.2517, + "step": 8470 + }, + { + "epoch": 0.029058411519192945, + "grad_norm": 1.2579597234725952, + "learning_rate": 1e-05, + "loss": 0.2516, + "step": 8480 + }, + { + "epoch": 0.029092678513908975, + "grad_norm": 1.185253381729126, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 8490 + }, + { + "epoch": 0.029126945508625, + "grad_norm": 1.1937752962112427, + "learning_rate": 1e-05, + "loss": 0.2654, + "step": 8500 + }, + { + "epoch": 0.029126945508625, + "eval_cer": 12.89469042159111, + "eval_loss": 0.2503049969673157, + "eval_normalized_cer": 8.952837729816148, + "eval_runtime": 229.0216, + "eval_samples_per_second": 2.236, + "eval_steps_per_second": 0.035, + "step": 8500 + } + ], + "logging_steps": 10, + "max_steps": 291826, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.72345176096768e+21, + "train_batch_size": 128, + "trial_name": null, + "trial_params": null +}