{ "best_metric": 9.4224620303757, "best_model_checkpoint": "kotoba_v2_enc_logs_epoch2_2/checkpoint-500", "epoch": 0.006853398943205883, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.4266994716029415e-05, "grad_norm": 1.0561553239822388, "learning_rate": 1e-05, "loss": 0.2361, "step": 10 }, { "epoch": 6.853398943205883e-05, "grad_norm": 1.1626238822937012, "learning_rate": 1e-05, "loss": 0.2265, "step": 20 }, { "epoch": 0.00010280098414808825, "grad_norm": 0.9845689535140991, "learning_rate": 1e-05, "loss": 0.2279, "step": 30 }, { "epoch": 0.00013706797886411766, "grad_norm": 1.142356276512146, "learning_rate": 1e-05, "loss": 0.2382, "step": 40 }, { "epoch": 0.00017133497358014707, "grad_norm": 1.0053240060806274, "learning_rate": 1e-05, "loss": 0.2473, "step": 50 }, { "epoch": 0.0002056019682961765, "grad_norm": 1.1098105907440186, "learning_rate": 1e-05, "loss": 0.2438, "step": 60 }, { "epoch": 0.0002398689630122059, "grad_norm": 1.191983699798584, "learning_rate": 1e-05, "loss": 0.2293, "step": 70 }, { "epoch": 0.0002741359577282353, "grad_norm": 1.1295104026794434, "learning_rate": 1e-05, "loss": 0.2362, "step": 80 }, { "epoch": 0.0003084029524442647, "grad_norm": 1.037972092628479, "learning_rate": 1e-05, "loss": 0.2455, "step": 90 }, { "epoch": 0.00034266994716029413, "grad_norm": 1.1975648403167725, "learning_rate": 1e-05, "loss": 0.2459, "step": 100 }, { "epoch": 0.00037693694187632354, "grad_norm": 1.0676342248916626, "learning_rate": 1e-05, "loss": 0.2271, "step": 110 }, { "epoch": 0.000411203936592353, "grad_norm": 1.0749495029449463, "learning_rate": 1e-05, "loss": 0.2417, "step": 120 }, { "epoch": 0.0004454709313083824, "grad_norm": 1.094260811805725, "learning_rate": 1e-05, "loss": 0.2354, "step": 130 }, { "epoch": 0.0004797379260244118, "grad_norm": 1.0395853519439697, "learning_rate": 1e-05, "loss": 0.2381, "step": 140 }, { "epoch": 0.0005140049207404412, "grad_norm": 1.2008885145187378, "learning_rate": 1e-05, "loss": 0.2354, "step": 150 }, { "epoch": 0.0005482719154564706, "grad_norm": 1.0647832155227661, "learning_rate": 1e-05, "loss": 0.2321, "step": 160 }, { "epoch": 0.0005825389101725, "grad_norm": 1.327071189880371, "learning_rate": 1e-05, "loss": 0.238, "step": 170 }, { "epoch": 0.0006168059048885295, "grad_norm": 1.1184055805206299, "learning_rate": 1e-05, "loss": 0.2242, "step": 180 }, { "epoch": 0.0006510728996045589, "grad_norm": 1.2512784004211426, "learning_rate": 1e-05, "loss": 0.2437, "step": 190 }, { "epoch": 0.0006853398943205883, "grad_norm": 1.0614465475082397, "learning_rate": 1e-05, "loss": 0.2382, "step": 200 }, { "epoch": 0.0007196068890366177, "grad_norm": 1.0607149600982666, "learning_rate": 1e-05, "loss": 0.2381, "step": 210 }, { "epoch": 0.0007538738837526471, "grad_norm": 1.0422028303146362, "learning_rate": 1e-05, "loss": 0.2294, "step": 220 }, { "epoch": 0.0007881408784686765, "grad_norm": 1.0162984132766724, "learning_rate": 1e-05, "loss": 0.2275, "step": 230 }, { "epoch": 0.000822407873184706, "grad_norm": 1.1085543632507324, "learning_rate": 1e-05, "loss": 0.2161, "step": 240 }, { "epoch": 0.0008566748679007354, "grad_norm": 1.1854636669158936, "learning_rate": 1e-05, "loss": 0.2382, "step": 250 }, { "epoch": 0.0008909418626167648, "grad_norm": 1.40137779712677, "learning_rate": 1e-05, "loss": 0.2579, "step": 260 }, { "epoch": 0.0009252088573327942, "grad_norm": 1.0814112424850464, "learning_rate": 1e-05, "loss": 0.2612, "step": 270 }, { "epoch": 0.0009594758520488236, "grad_norm": 1.083736538887024, "learning_rate": 1e-05, "loss": 0.2711, "step": 280 }, { "epoch": 0.000993742846764853, "grad_norm": 1.0861411094665527, "learning_rate": 1e-05, "loss": 0.2642, "step": 290 }, { "epoch": 0.0010280098414808825, "grad_norm": 1.1141265630722046, "learning_rate": 1e-05, "loss": 0.2585, "step": 300 }, { "epoch": 0.0010622768361969119, "grad_norm": 1.326241374015808, "learning_rate": 1e-05, "loss": 0.2858, "step": 310 }, { "epoch": 0.0010965438309129413, "grad_norm": 1.393750786781311, "learning_rate": 1e-05, "loss": 0.2635, "step": 320 }, { "epoch": 0.0011308108256289707, "grad_norm": 1.0851459503173828, "learning_rate": 1e-05, "loss": 0.2565, "step": 330 }, { "epoch": 0.001165077820345, "grad_norm": 1.2323757410049438, "learning_rate": 1e-05, "loss": 0.2465, "step": 340 }, { "epoch": 0.0011993448150610295, "grad_norm": 1.376953125, "learning_rate": 1e-05, "loss": 0.2671, "step": 350 }, { "epoch": 0.001233611809777059, "grad_norm": 1.084592580795288, "learning_rate": 1e-05, "loss": 0.2643, "step": 360 }, { "epoch": 0.0012678788044930883, "grad_norm": 1.2907005548477173, "learning_rate": 1e-05, "loss": 0.2584, "step": 370 }, { "epoch": 0.0013021457992091177, "grad_norm": 1.0698130130767822, "learning_rate": 1e-05, "loss": 0.2526, "step": 380 }, { "epoch": 0.0013364127939251471, "grad_norm": 1.1399807929992676, "learning_rate": 1e-05, "loss": 0.2759, "step": 390 }, { "epoch": 0.0013706797886411765, "grad_norm": 1.1480791568756104, "learning_rate": 1e-05, "loss": 0.2499, "step": 400 }, { "epoch": 0.001404946783357206, "grad_norm": 1.3095237016677856, "learning_rate": 1e-05, "loss": 0.2536, "step": 410 }, { "epoch": 0.0014392137780732353, "grad_norm": 1.068246841430664, "learning_rate": 1e-05, "loss": 0.2604, "step": 420 }, { "epoch": 0.0014734807727892648, "grad_norm": 1.2310419082641602, "learning_rate": 1e-05, "loss": 0.2632, "step": 430 }, { "epoch": 0.0015077477675052942, "grad_norm": 1.161867380142212, "learning_rate": 1e-05, "loss": 0.2584, "step": 440 }, { "epoch": 0.0015420147622213236, "grad_norm": 1.1461217403411865, "learning_rate": 1e-05, "loss": 0.2592, "step": 450 }, { "epoch": 0.001576281756937353, "grad_norm": 1.3006030321121216, "learning_rate": 1e-05, "loss": 0.2607, "step": 460 }, { "epoch": 0.0016105487516533824, "grad_norm": 1.1223125457763672, "learning_rate": 1e-05, "loss": 0.2433, "step": 470 }, { "epoch": 0.001644815746369412, "grad_norm": 1.2909380197525024, "learning_rate": 1e-05, "loss": 0.2693, "step": 480 }, { "epoch": 0.0016790827410854414, "grad_norm": 1.2270597219467163, "learning_rate": 1e-05, "loss": 0.2661, "step": 490 }, { "epoch": 0.0017133497358014708, "grad_norm": 1.1439770460128784, "learning_rate": 1e-05, "loss": 0.2517, "step": 500 }, { "epoch": 0.0017133497358014708, "eval_cer": 13.0358087846181, "eval_loss": 0.25224336981773376, "eval_normalized_cer": 9.4224620303757, "eval_runtime": 227.2174, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.035, "step": 500 }, { "epoch": 0.0017476167305175002, "grad_norm": 1.1377454996109009, "learning_rate": 1e-05, "loss": 0.2579, "step": 510 }, { "epoch": 0.0017818837252335296, "grad_norm": 1.2096498012542725, "learning_rate": 1e-05, "loss": 0.2727, "step": 520 }, { "epoch": 0.001816150719949559, "grad_norm": 1.187213659286499, "learning_rate": 1e-05, "loss": 0.2562, "step": 530 }, { "epoch": 0.0018504177146655885, "grad_norm": 0.969393253326416, "learning_rate": 1e-05, "loss": 0.2378, "step": 540 }, { "epoch": 0.0018846847093816179, "grad_norm": 0.9745528697967529, "learning_rate": 1e-05, "loss": 0.2774, "step": 550 }, { "epoch": 0.0019189517040976473, "grad_norm": 1.0725352764129639, "learning_rate": 1e-05, "loss": 0.2541, "step": 560 }, { "epoch": 0.0019532186988136767, "grad_norm": 1.217871904373169, "learning_rate": 1e-05, "loss": 0.2395, "step": 570 }, { "epoch": 0.001987485693529706, "grad_norm": 1.3582627773284912, "learning_rate": 1e-05, "loss": 0.2594, "step": 580 }, { "epoch": 0.0020217526882457355, "grad_norm": 1.2415379285812378, "learning_rate": 1e-05, "loss": 0.2582, "step": 590 }, { "epoch": 0.002056019682961765, "grad_norm": 0.9810131192207336, "learning_rate": 1e-05, "loss": 0.2284, "step": 600 }, { "epoch": 0.0020902866776777943, "grad_norm": 0.9806564450263977, "learning_rate": 1e-05, "loss": 0.2688, "step": 610 }, { "epoch": 0.0021245536723938237, "grad_norm": 1.2755467891693115, "learning_rate": 1e-05, "loss": 0.2591, "step": 620 }, { "epoch": 0.002158820667109853, "grad_norm": 0.9300326704978943, "learning_rate": 1e-05, "loss": 0.2444, "step": 630 }, { "epoch": 0.0021930876618258825, "grad_norm": 1.1276524066925049, "learning_rate": 1e-05, "loss": 0.236, "step": 640 }, { "epoch": 0.002227354656541912, "grad_norm": 1.1786876916885376, "learning_rate": 1e-05, "loss": 0.2443, "step": 650 }, { "epoch": 0.0022616216512579414, "grad_norm": 1.1702712774276733, "learning_rate": 1e-05, "loss": 0.2627, "step": 660 }, { "epoch": 0.0022958886459739708, "grad_norm": 1.2837899923324585, "learning_rate": 1e-05, "loss": 0.2378, "step": 670 }, { "epoch": 0.00233015564069, "grad_norm": 1.0623608827590942, "learning_rate": 1e-05, "loss": 0.2491, "step": 680 }, { "epoch": 0.0023644226354060296, "grad_norm": 1.1288243532180786, "learning_rate": 1e-05, "loss": 0.2773, "step": 690 }, { "epoch": 0.002398689630122059, "grad_norm": 1.0192692279815674, "learning_rate": 1e-05, "loss": 0.2492, "step": 700 }, { "epoch": 0.0024329566248380884, "grad_norm": 1.2274680137634277, "learning_rate": 1e-05, "loss": 0.2345, "step": 710 }, { "epoch": 0.002467223619554118, "grad_norm": 1.240645170211792, "learning_rate": 1e-05, "loss": 0.2624, "step": 720 }, { "epoch": 0.002501490614270147, "grad_norm": 1.0681366920471191, "learning_rate": 1e-05, "loss": 0.2553, "step": 730 }, { "epoch": 0.0025357576089861766, "grad_norm": 1.0161867141723633, "learning_rate": 1e-05, "loss": 0.2547, "step": 740 }, { "epoch": 0.002570024603702206, "grad_norm": 1.2384017705917358, "learning_rate": 1e-05, "loss": 0.2449, "step": 750 }, { "epoch": 0.0026042915984182354, "grad_norm": 1.1739261150360107, "learning_rate": 1e-05, "loss": 0.2523, "step": 760 }, { "epoch": 0.002638558593134265, "grad_norm": 1.0396535396575928, "learning_rate": 1e-05, "loss": 0.2535, "step": 770 }, { "epoch": 0.0026728255878502943, "grad_norm": 1.14767324924469, "learning_rate": 1e-05, "loss": 0.2594, "step": 780 }, { "epoch": 0.0027070925825663237, "grad_norm": 1.1783303022384644, "learning_rate": 1e-05, "loss": 0.2546, "step": 790 }, { "epoch": 0.002741359577282353, "grad_norm": 1.1065645217895508, "learning_rate": 1e-05, "loss": 0.2547, "step": 800 }, { "epoch": 0.0027756265719983825, "grad_norm": 1.256645917892456, "learning_rate": 1e-05, "loss": 0.2548, "step": 810 }, { "epoch": 0.002809893566714412, "grad_norm": 1.058158278465271, "learning_rate": 1e-05, "loss": 0.257, "step": 820 }, { "epoch": 0.0028441605614304413, "grad_norm": 1.0647656917572021, "learning_rate": 1e-05, "loss": 0.2479, "step": 830 }, { "epoch": 0.0028784275561464707, "grad_norm": 1.1984691619873047, "learning_rate": 1e-05, "loss": 0.2503, "step": 840 }, { "epoch": 0.0029126945508625, "grad_norm": 1.1380070447921753, "learning_rate": 1e-05, "loss": 0.245, "step": 850 }, { "epoch": 0.0029469615455785295, "grad_norm": 1.2131065130233765, "learning_rate": 1e-05, "loss": 0.242, "step": 860 }, { "epoch": 0.002981228540294559, "grad_norm": 1.1822234392166138, "learning_rate": 1e-05, "loss": 0.2613, "step": 870 }, { "epoch": 0.0030154955350105883, "grad_norm": 1.0591018199920654, "learning_rate": 1e-05, "loss": 0.2654, "step": 880 }, { "epoch": 0.0030497625297266177, "grad_norm": 1.2318428754806519, "learning_rate": 1e-05, "loss": 0.2525, "step": 890 }, { "epoch": 0.003084029524442647, "grad_norm": 1.0146839618682861, "learning_rate": 1e-05, "loss": 0.2609, "step": 900 }, { "epoch": 0.0031182965191586766, "grad_norm": 1.1508561372756958, "learning_rate": 1e-05, "loss": 0.2541, "step": 910 }, { "epoch": 0.003152563513874706, "grad_norm": 1.1494849920272827, "learning_rate": 1e-05, "loss": 0.2461, "step": 920 }, { "epoch": 0.0031868305085907354, "grad_norm": 1.2423807382583618, "learning_rate": 1e-05, "loss": 0.2573, "step": 930 }, { "epoch": 0.0032210975033067648, "grad_norm": 1.2714438438415527, "learning_rate": 1e-05, "loss": 0.2545, "step": 940 }, { "epoch": 0.0032553644980227946, "grad_norm": 1.2088007926940918, "learning_rate": 1e-05, "loss": 0.2773, "step": 950 }, { "epoch": 0.003289631492738824, "grad_norm": 1.0737963914871216, "learning_rate": 1e-05, "loss": 0.2495, "step": 960 }, { "epoch": 0.0033238984874548534, "grad_norm": 1.0942472219467163, "learning_rate": 1e-05, "loss": 0.2401, "step": 970 }, { "epoch": 0.003358165482170883, "grad_norm": 1.1282986402511597, "learning_rate": 1e-05, "loss": 0.2638, "step": 980 }, { "epoch": 0.0033924324768869123, "grad_norm": 1.0762425661087036, "learning_rate": 1e-05, "loss": 0.2619, "step": 990 }, { "epoch": 0.0034266994716029417, "grad_norm": 1.09200119972229, "learning_rate": 1e-05, "loss": 0.2464, "step": 1000 }, { "epoch": 0.0034266994716029417, "eval_cer": 13.80313988357735, "eval_loss": 0.25397512316703796, "eval_normalized_cer": 9.952038369304557, "eval_runtime": 227.5088, "eval_samples_per_second": 2.25, "eval_steps_per_second": 0.035, "step": 1000 }, { "epoch": 0.003460966466318971, "grad_norm": 0.9681844711303711, "learning_rate": 1e-05, "loss": 0.2567, "step": 1010 }, { "epoch": 0.0034952334610350005, "grad_norm": 1.0064711570739746, "learning_rate": 1e-05, "loss": 0.2514, "step": 1020 }, { "epoch": 0.00352950045575103, "grad_norm": 1.190294623374939, "learning_rate": 1e-05, "loss": 0.2654, "step": 1030 }, { "epoch": 0.0035637674504670593, "grad_norm": 1.332492709159851, "learning_rate": 1e-05, "loss": 0.2725, "step": 1040 }, { "epoch": 0.0035980344451830887, "grad_norm": 1.1110397577285767, "learning_rate": 1e-05, "loss": 0.2504, "step": 1050 }, { "epoch": 0.003632301439899118, "grad_norm": 1.2327215671539307, "learning_rate": 1e-05, "loss": 0.2733, "step": 1060 }, { "epoch": 0.0036665684346151475, "grad_norm": 1.1694815158843994, "learning_rate": 1e-05, "loss": 0.2611, "step": 1070 }, { "epoch": 0.003700835429331177, "grad_norm": 1.212570309638977, "learning_rate": 1e-05, "loss": 0.2556, "step": 1080 }, { "epoch": 0.0037351024240472063, "grad_norm": 1.1467297077178955, "learning_rate": 1e-05, "loss": 0.2485, "step": 1090 }, { "epoch": 0.0037693694187632357, "grad_norm": 0.9628469347953796, "learning_rate": 1e-05, "loss": 0.2523, "step": 1100 }, { "epoch": 0.003803636413479265, "grad_norm": 1.1593494415283203, "learning_rate": 1e-05, "loss": 0.2635, "step": 1110 }, { "epoch": 0.0038379034081952946, "grad_norm": 1.1376386880874634, "learning_rate": 1e-05, "loss": 0.2504, "step": 1120 }, { "epoch": 0.003872170402911324, "grad_norm": 1.129338026046753, "learning_rate": 1e-05, "loss": 0.2601, "step": 1130 }, { "epoch": 0.003906437397627353, "grad_norm": 1.0889575481414795, "learning_rate": 1e-05, "loss": 0.2455, "step": 1140 }, { "epoch": 0.003940704392343382, "grad_norm": 1.1437270641326904, "learning_rate": 1e-05, "loss": 0.253, "step": 1150 }, { "epoch": 0.003974971387059412, "grad_norm": 1.0283392667770386, "learning_rate": 1e-05, "loss": 0.2507, "step": 1160 }, { "epoch": 0.004009238381775441, "grad_norm": 1.130747675895691, "learning_rate": 1e-05, "loss": 0.2715, "step": 1170 }, { "epoch": 0.004043505376491471, "grad_norm": 1.3483778238296509, "learning_rate": 1e-05, "loss": 0.2742, "step": 1180 }, { "epoch": 0.0040777723712075, "grad_norm": 1.0879924297332764, "learning_rate": 1e-05, "loss": 0.2641, "step": 1190 }, { "epoch": 0.00411203936592353, "grad_norm": 1.1242927312850952, "learning_rate": 1e-05, "loss": 0.2586, "step": 1200 }, { "epoch": 0.004146306360639559, "grad_norm": 1.0185858011245728, "learning_rate": 1e-05, "loss": 0.2465, "step": 1210 }, { "epoch": 0.004180573355355589, "grad_norm": 0.9555259943008423, "learning_rate": 1e-05, "loss": 0.2528, "step": 1220 }, { "epoch": 0.004214840350071618, "grad_norm": 1.210371971130371, "learning_rate": 1e-05, "loss": 0.2613, "step": 1230 }, { "epoch": 0.0042491073447876474, "grad_norm": 1.1261368989944458, "learning_rate": 1e-05, "loss": 0.2551, "step": 1240 }, { "epoch": 0.004283374339503676, "grad_norm": 1.2142603397369385, "learning_rate": 1e-05, "loss": 0.264, "step": 1250 }, { "epoch": 0.004317641334219706, "grad_norm": 1.057758092880249, "learning_rate": 1e-05, "loss": 0.2587, "step": 1260 }, { "epoch": 0.004351908328935736, "grad_norm": 1.0871245861053467, "learning_rate": 1e-05, "loss": 0.2549, "step": 1270 }, { "epoch": 0.004386175323651765, "grad_norm": 1.1214648485183716, "learning_rate": 1e-05, "loss": 0.2582, "step": 1280 }, { "epoch": 0.004420442318367795, "grad_norm": 1.0265707969665527, "learning_rate": 1e-05, "loss": 0.2123, "step": 1290 }, { "epoch": 0.004454709313083824, "grad_norm": 1.1180216073989868, "learning_rate": 1e-05, "loss": 0.2245, "step": 1300 }, { "epoch": 0.004488976307799854, "grad_norm": 1.028238296508789, "learning_rate": 1e-05, "loss": 0.2118, "step": 1310 }, { "epoch": 0.004523243302515883, "grad_norm": 1.0321682691574097, "learning_rate": 1e-05, "loss": 0.2196, "step": 1320 }, { "epoch": 0.0045575102972319126, "grad_norm": 1.1180269718170166, "learning_rate": 1e-05, "loss": 0.2403, "step": 1330 }, { "epoch": 0.0045917772919479415, "grad_norm": 1.079560399055481, "learning_rate": 1e-05, "loss": 0.2309, "step": 1340 }, { "epoch": 0.004626044286663971, "grad_norm": 1.0062284469604492, "learning_rate": 1e-05, "loss": 0.228, "step": 1350 }, { "epoch": 0.00466031128138, "grad_norm": 1.1098395586013794, "learning_rate": 1e-05, "loss": 0.2435, "step": 1360 }, { "epoch": 0.00469457827609603, "grad_norm": 1.0619688034057617, "learning_rate": 1e-05, "loss": 0.2342, "step": 1370 }, { "epoch": 0.004728845270812059, "grad_norm": 1.1943925619125366, "learning_rate": 1e-05, "loss": 0.2315, "step": 1380 }, { "epoch": 0.004763112265528089, "grad_norm": 1.0958552360534668, "learning_rate": 1e-05, "loss": 0.2379, "step": 1390 }, { "epoch": 0.004797379260244118, "grad_norm": 1.0984197854995728, "learning_rate": 1e-05, "loss": 0.2208, "step": 1400 }, { "epoch": 0.004831646254960148, "grad_norm": 1.0741859674453735, "learning_rate": 1e-05, "loss": 0.2378, "step": 1410 }, { "epoch": 0.004865913249676177, "grad_norm": 1.1457058191299438, "learning_rate": 1e-05, "loss": 0.2516, "step": 1420 }, { "epoch": 0.004900180244392207, "grad_norm": 0.9849014282226562, "learning_rate": 1e-05, "loss": 0.2406, "step": 1430 }, { "epoch": 0.004934447239108236, "grad_norm": 1.1174912452697754, "learning_rate": 1e-05, "loss": 0.2122, "step": 1440 }, { "epoch": 0.0049687142338242654, "grad_norm": 1.0292854309082031, "learning_rate": 1e-05, "loss": 0.2349, "step": 1450 }, { "epoch": 0.005002981228540294, "grad_norm": 1.0343785285949707, "learning_rate": 1e-05, "loss": 0.2158, "step": 1460 }, { "epoch": 0.005037248223256324, "grad_norm": 1.1178008317947388, "learning_rate": 1e-05, "loss": 0.2264, "step": 1470 }, { "epoch": 0.005071515217972353, "grad_norm": 1.0238450765609741, "learning_rate": 1e-05, "loss": 0.2287, "step": 1480 }, { "epoch": 0.005105782212688383, "grad_norm": 1.1728886365890503, "learning_rate": 1e-05, "loss": 0.2373, "step": 1490 }, { "epoch": 0.005140049207404412, "grad_norm": 1.227034091949463, "learning_rate": 1e-05, "loss": 0.222, "step": 1500 }, { "epoch": 0.005140049207404412, "eval_cer": 13.150467454577527, "eval_loss": 0.25801682472229004, "eval_normalized_cer": 9.452438049560353, "eval_runtime": 227.9378, "eval_samples_per_second": 2.246, "eval_steps_per_second": 0.035, "step": 1500 }, { "epoch": 0.005174316202120442, "grad_norm": 1.0703920125961304, "learning_rate": 1e-05, "loss": 0.2156, "step": 1510 }, { "epoch": 0.005208583196836471, "grad_norm": 1.1343841552734375, "learning_rate": 1e-05, "loss": 0.2126, "step": 1520 }, { "epoch": 0.005242850191552501, "grad_norm": 1.1743741035461426, "learning_rate": 1e-05, "loss": 0.2491, "step": 1530 }, { "epoch": 0.00527711718626853, "grad_norm": 1.1476744413375854, "learning_rate": 1e-05, "loss": 0.236, "step": 1540 }, { "epoch": 0.0053113841809845595, "grad_norm": 1.0899590253829956, "learning_rate": 1e-05, "loss": 0.2361, "step": 1550 }, { "epoch": 0.0053456511757005885, "grad_norm": 1.0281250476837158, "learning_rate": 1e-05, "loss": 0.2226, "step": 1560 }, { "epoch": 0.005379918170416618, "grad_norm": 0.9932867884635925, "learning_rate": 1e-05, "loss": 0.2301, "step": 1570 }, { "epoch": 0.005414185165132647, "grad_norm": 1.1992309093475342, "learning_rate": 1e-05, "loss": 0.2179, "step": 1580 }, { "epoch": 0.005448452159848677, "grad_norm": 1.0017774105072021, "learning_rate": 1e-05, "loss": 0.2244, "step": 1590 }, { "epoch": 0.005482719154564706, "grad_norm": 1.0827686786651611, "learning_rate": 1e-05, "loss": 0.2313, "step": 1600 }, { "epoch": 0.005516986149280736, "grad_norm": 1.2260409593582153, "learning_rate": 1e-05, "loss": 0.229, "step": 1610 }, { "epoch": 0.005551253143996765, "grad_norm": 1.2530804872512817, "learning_rate": 1e-05, "loss": 0.2437, "step": 1620 }, { "epoch": 0.005585520138712795, "grad_norm": 1.068452000617981, "learning_rate": 1e-05, "loss": 0.2138, "step": 1630 }, { "epoch": 0.005619787133428824, "grad_norm": 1.3108712434768677, "learning_rate": 1e-05, "loss": 0.2284, "step": 1640 }, { "epoch": 0.005654054128144854, "grad_norm": 1.0919209718704224, "learning_rate": 1e-05, "loss": 0.213, "step": 1650 }, { "epoch": 0.005688321122860883, "grad_norm": 1.1530914306640625, "learning_rate": 1e-05, "loss": 0.2292, "step": 1660 }, { "epoch": 0.005722588117576912, "grad_norm": 1.084028959274292, "learning_rate": 1e-05, "loss": 0.2393, "step": 1670 }, { "epoch": 0.005756855112292941, "grad_norm": 1.247847557067871, "learning_rate": 1e-05, "loss": 0.2452, "step": 1680 }, { "epoch": 0.005791122107008971, "grad_norm": 1.03806734085083, "learning_rate": 1e-05, "loss": 0.2317, "step": 1690 }, { "epoch": 0.005825389101725, "grad_norm": 1.1643092632293701, "learning_rate": 1e-05, "loss": 0.2348, "step": 1700 }, { "epoch": 0.00585965609644103, "grad_norm": 1.1066207885742188, "learning_rate": 1e-05, "loss": 0.2348, "step": 1710 }, { "epoch": 0.005893923091157059, "grad_norm": 1.1813760995864868, "learning_rate": 1e-05, "loss": 0.2295, "step": 1720 }, { "epoch": 0.005928190085873089, "grad_norm": 1.1444518566131592, "learning_rate": 1e-05, "loss": 0.2101, "step": 1730 }, { "epoch": 0.005962457080589118, "grad_norm": 1.1485129594802856, "learning_rate": 1e-05, "loss": 0.2397, "step": 1740 }, { "epoch": 0.005996724075305148, "grad_norm": 1.1813607215881348, "learning_rate": 1e-05, "loss": 0.231, "step": 1750 }, { "epoch": 0.006030991070021177, "grad_norm": 1.4075005054473877, "learning_rate": 1e-05, "loss": 0.2306, "step": 1760 }, { "epoch": 0.0060652580647372065, "grad_norm": 1.2183804512023926, "learning_rate": 1e-05, "loss": 0.2227, "step": 1770 }, { "epoch": 0.0060995250594532355, "grad_norm": 1.3654927015304565, "learning_rate": 1e-05, "loss": 0.2341, "step": 1780 }, { "epoch": 0.006133792054169265, "grad_norm": 1.2806668281555176, "learning_rate": 1e-05, "loss": 0.2226, "step": 1790 }, { "epoch": 0.006168059048885294, "grad_norm": 1.2949618101119995, "learning_rate": 1e-05, "loss": 0.2698, "step": 1800 }, { "epoch": 0.006202326043601324, "grad_norm": 1.3080159425735474, "learning_rate": 1e-05, "loss": 0.2691, "step": 1810 }, { "epoch": 0.006236593038317353, "grad_norm": 1.1831908226013184, "learning_rate": 1e-05, "loss": 0.2644, "step": 1820 }, { "epoch": 0.006270860033033383, "grad_norm": 1.1216965913772583, "learning_rate": 1e-05, "loss": 0.2582, "step": 1830 }, { "epoch": 0.006305127027749412, "grad_norm": 1.1943161487579346, "learning_rate": 1e-05, "loss": 0.2769, "step": 1840 }, { "epoch": 0.006339394022465442, "grad_norm": 1.0856040716171265, "learning_rate": 1e-05, "loss": 0.2526, "step": 1850 }, { "epoch": 0.006373661017181471, "grad_norm": 1.1100040674209595, "learning_rate": 1e-05, "loss": 0.2576, "step": 1860 }, { "epoch": 0.006407928011897501, "grad_norm": 1.3369051218032837, "learning_rate": 1e-05, "loss": 0.2684, "step": 1870 }, { "epoch": 0.0064421950066135296, "grad_norm": 1.158797264099121, "learning_rate": 1e-05, "loss": 0.2474, "step": 1880 }, { "epoch": 0.006476462001329559, "grad_norm": 1.1821873188018799, "learning_rate": 1e-05, "loss": 0.272, "step": 1890 }, { "epoch": 0.006510728996045589, "grad_norm": 1.0739686489105225, "learning_rate": 1e-05, "loss": 0.2798, "step": 1900 }, { "epoch": 0.006544995990761618, "grad_norm": 1.0639653205871582, "learning_rate": 1e-05, "loss": 0.2682, "step": 1910 }, { "epoch": 0.006579262985477648, "grad_norm": 1.2149512767791748, "learning_rate": 1e-05, "loss": 0.2586, "step": 1920 }, { "epoch": 0.006613529980193677, "grad_norm": 1.1057014465332031, "learning_rate": 1e-05, "loss": 0.2719, "step": 1930 }, { "epoch": 0.006647796974909707, "grad_norm": 1.0929185152053833, "learning_rate": 1e-05, "loss": 0.2703, "step": 1940 }, { "epoch": 0.006682063969625736, "grad_norm": 1.0322917699813843, "learning_rate": 1e-05, "loss": 0.2477, "step": 1950 }, { "epoch": 0.006716330964341766, "grad_norm": 1.2460272312164307, "learning_rate": 1e-05, "loss": 0.2816, "step": 1960 }, { "epoch": 0.006750597959057795, "grad_norm": 1.2049859762191772, "learning_rate": 1e-05, "loss": 0.2648, "step": 1970 }, { "epoch": 0.0067848649537738245, "grad_norm": 1.1182633638381958, "learning_rate": 1e-05, "loss": 0.2549, "step": 1980 }, { "epoch": 0.0068191319484898535, "grad_norm": 1.1514990329742432, "learning_rate": 1e-05, "loss": 0.2695, "step": 1990 }, { "epoch": 0.006853398943205883, "grad_norm": 1.0150858163833618, "learning_rate": 1e-05, "loss": 0.2532, "step": 2000 }, { "epoch": 0.006853398943205883, "eval_cer": 13.565002645969306, "eval_loss": 0.2523655593395233, "eval_normalized_cer": 9.942046362909672, "eval_runtime": 226.5571, "eval_samples_per_second": 2.26, "eval_steps_per_second": 0.035, "step": 2000 } ], "logging_steps": 10, "max_steps": 291826, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.0551806140416e+20, "train_batch_size": 128, "trial_name": null, "trial_params": null }