|
{ |
|
"best_metric": 8.912869704236611, |
|
"best_model_checkpoint": "kotoba_v2_enc_logs_epoch2_2/checkpoint-4000", |
|
"epoch": 0.04626044286663971, |
|
"eval_steps": 500, |
|
"global_step": 13500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.4266994716029415e-05, |
|
"grad_norm": 1.0561553239822388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.853398943205883e-05, |
|
"grad_norm": 1.1626238822937012, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2265, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00010280098414808825, |
|
"grad_norm": 0.9845689535140991, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2279, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00013706797886411766, |
|
"grad_norm": 1.142356276512146, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00017133497358014707, |
|
"grad_norm": 1.0053240060806274, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2473, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0002056019682961765, |
|
"grad_norm": 1.1098105907440186, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2438, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0002398689630122059, |
|
"grad_norm": 1.191983699798584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0002741359577282353, |
|
"grad_norm": 1.1295104026794434, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2362, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0003084029524442647, |
|
"grad_norm": 1.037972092628479, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2455, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.00034266994716029413, |
|
"grad_norm": 1.1975648403167725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00037693694187632354, |
|
"grad_norm": 1.0676342248916626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2271, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.000411203936592353, |
|
"grad_norm": 1.0749495029449463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2417, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0004454709313083824, |
|
"grad_norm": 1.094260811805725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0004797379260244118, |
|
"grad_norm": 1.0395853519439697, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2381, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0005140049207404412, |
|
"grad_norm": 1.2008885145187378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0005482719154564706, |
|
"grad_norm": 1.0647832155227661, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2321, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0005825389101725, |
|
"grad_norm": 1.327071189880371, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0006168059048885295, |
|
"grad_norm": 1.1184055805206299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2242, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0006510728996045589, |
|
"grad_norm": 1.2512784004211426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0006853398943205883, |
|
"grad_norm": 1.0614465475082397, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0007196068890366177, |
|
"grad_norm": 1.0607149600982666, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2381, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0007538738837526471, |
|
"grad_norm": 1.0422028303146362, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2294, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0007881408784686765, |
|
"grad_norm": 1.0162984132766724, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.000822407873184706, |
|
"grad_norm": 1.1085543632507324, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2161, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.0008566748679007354, |
|
"grad_norm": 1.1854636669158936, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0008909418626167648, |
|
"grad_norm": 1.40137779712677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2579, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0009252088573327942, |
|
"grad_norm": 1.0814112424850464, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2612, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0009594758520488236, |
|
"grad_norm": 1.083736538887024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2711, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.000993742846764853, |
|
"grad_norm": 1.0861411094665527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2642, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.0010280098414808825, |
|
"grad_norm": 1.1141265630722046, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2585, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0010622768361969119, |
|
"grad_norm": 1.326241374015808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2858, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.0010965438309129413, |
|
"grad_norm": 1.393750786781311, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0011308108256289707, |
|
"grad_norm": 1.0851459503173828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2565, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.001165077820345, |
|
"grad_norm": 1.2323757410049438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.0011993448150610295, |
|
"grad_norm": 1.376953125, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2671, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.001233611809777059, |
|
"grad_norm": 1.084592580795288, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2643, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.0012678788044930883, |
|
"grad_norm": 1.2907005548477173, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.0013021457992091177, |
|
"grad_norm": 1.0698130130767822, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0013364127939251471, |
|
"grad_norm": 1.1399807929992676, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2759, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.0013706797886411765, |
|
"grad_norm": 1.1480791568756104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.001404946783357206, |
|
"grad_norm": 1.3095237016677856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2536, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.0014392137780732353, |
|
"grad_norm": 1.068246841430664, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2604, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.0014734807727892648, |
|
"grad_norm": 1.2310419082641602, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2632, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.0015077477675052942, |
|
"grad_norm": 1.161867380142212, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.0015420147622213236, |
|
"grad_norm": 1.1461217403411865, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2592, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.001576281756937353, |
|
"grad_norm": 1.3006030321121216, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2607, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.0016105487516533824, |
|
"grad_norm": 1.1223125457763672, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.001644815746369412, |
|
"grad_norm": 1.2909380197525024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2693, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.0016790827410854414, |
|
"grad_norm": 1.2270597219467163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2661, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.0017133497358014708, |
|
"grad_norm": 1.1439770460128784, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2517, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0017133497358014708, |
|
"eval_cer": 13.0358087846181, |
|
"eval_loss": 0.25224336981773376, |
|
"eval_normalized_cer": 9.4224620303757, |
|
"eval_runtime": 227.2174, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.035, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0017476167305175002, |
|
"grad_norm": 1.1377454996109009, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2579, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.0017818837252335296, |
|
"grad_norm": 1.2096498012542725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2727, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.001816150719949559, |
|
"grad_norm": 1.187213659286499, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2562, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.0018504177146655885, |
|
"grad_norm": 0.969393253326416, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.0018846847093816179, |
|
"grad_norm": 0.9745528697967529, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2774, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0019189517040976473, |
|
"grad_norm": 1.0725352764129639, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2541, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.0019532186988136767, |
|
"grad_norm": 1.217871904373169, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2395, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.001987485693529706, |
|
"grad_norm": 1.3582627773284912, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2594, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0020217526882457355, |
|
"grad_norm": 1.2415379285812378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.002056019682961765, |
|
"grad_norm": 0.9810131192207336, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0020902866776777943, |
|
"grad_norm": 0.9806564450263977, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2688, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.0021245536723938237, |
|
"grad_norm": 1.2755467891693115, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2591, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.002158820667109853, |
|
"grad_norm": 0.9300326704978943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2444, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.0021930876618258825, |
|
"grad_norm": 1.1276524066925049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.002227354656541912, |
|
"grad_norm": 1.1786876916885376, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2443, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0022616216512579414, |
|
"grad_norm": 1.1702712774276733, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2627, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.0022958886459739708, |
|
"grad_norm": 1.2837899923324585, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.00233015564069, |
|
"grad_norm": 1.0623608827590942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.0023644226354060296, |
|
"grad_norm": 1.1288243532180786, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2773, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.002398689630122059, |
|
"grad_norm": 1.0192692279815674, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2492, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0024329566248380884, |
|
"grad_norm": 1.2274680137634277, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2345, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.002467223619554118, |
|
"grad_norm": 1.240645170211792, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2624, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.002501490614270147, |
|
"grad_norm": 1.0681366920471191, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2553, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.0025357576089861766, |
|
"grad_norm": 1.0161867141723633, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2547, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.002570024603702206, |
|
"grad_norm": 1.2384017705917358, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.0026042915984182354, |
|
"grad_norm": 1.1739261150360107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.002638558593134265, |
|
"grad_norm": 1.0396535396575928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.0026728255878502943, |
|
"grad_norm": 1.14767324924469, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2594, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.0027070925825663237, |
|
"grad_norm": 1.1783303022384644, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2546, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.002741359577282353, |
|
"grad_norm": 1.1065645217895508, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2547, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0027756265719983825, |
|
"grad_norm": 1.256645917892456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.002809893566714412, |
|
"grad_norm": 1.058158278465271, |
|
"learning_rate": 1e-05, |
|
"loss": 0.257, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.0028441605614304413, |
|
"grad_norm": 1.0647656917572021, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.0028784275561464707, |
|
"grad_norm": 1.1984691619873047, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2503, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.0029126945508625, |
|
"grad_norm": 1.1380070447921753, |
|
"learning_rate": 1e-05, |
|
"loss": 0.245, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.0029469615455785295, |
|
"grad_norm": 1.2131065130233765, |
|
"learning_rate": 1e-05, |
|
"loss": 0.242, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.002981228540294559, |
|
"grad_norm": 1.1822234392166138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2613, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.0030154955350105883, |
|
"grad_norm": 1.0591018199920654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2654, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.0030497625297266177, |
|
"grad_norm": 1.2318428754806519, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2525, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.003084029524442647, |
|
"grad_norm": 1.0146839618682861, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2609, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0031182965191586766, |
|
"grad_norm": 1.1508561372756958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2541, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.003152563513874706, |
|
"grad_norm": 1.1494849920272827, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.0031868305085907354, |
|
"grad_norm": 1.2423807382583618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2573, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.0032210975033067648, |
|
"grad_norm": 1.2714438438415527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2545, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.0032553644980227946, |
|
"grad_norm": 1.2088007926940918, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2773, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.003289631492738824, |
|
"grad_norm": 1.0737963914871216, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2495, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.0033238984874548534, |
|
"grad_norm": 1.0942472219467163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.003358165482170883, |
|
"grad_norm": 1.1282986402511597, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2638, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.0033924324768869123, |
|
"grad_norm": 1.0762425661087036, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2619, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.0034266994716029417, |
|
"grad_norm": 1.09200119972229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2464, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0034266994716029417, |
|
"eval_cer": 13.80313988357735, |
|
"eval_loss": 0.25397512316703796, |
|
"eval_normalized_cer": 9.952038369304557, |
|
"eval_runtime": 227.5088, |
|
"eval_samples_per_second": 2.25, |
|
"eval_steps_per_second": 0.035, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.003460966466318971, |
|
"grad_norm": 0.9681844711303711, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2567, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.0034952334610350005, |
|
"grad_norm": 1.0064711570739746, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2514, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.00352950045575103, |
|
"grad_norm": 1.190294623374939, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2654, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.0035637674504670593, |
|
"grad_norm": 1.332492709159851, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2725, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.0035980344451830887, |
|
"grad_norm": 1.1110397577285767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.003632301439899118, |
|
"grad_norm": 1.2327215671539307, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2733, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.0036665684346151475, |
|
"grad_norm": 1.1694815158843994, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2611, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.003700835429331177, |
|
"grad_norm": 1.212570309638977, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2556, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.0037351024240472063, |
|
"grad_norm": 1.1467297077178955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.0037693694187632357, |
|
"grad_norm": 0.9628469347953796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.003803636413479265, |
|
"grad_norm": 1.1593494415283203, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.0038379034081952946, |
|
"grad_norm": 1.1376386880874634, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.003872170402911324, |
|
"grad_norm": 1.129338026046753, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2601, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.003906437397627353, |
|
"grad_norm": 1.0889575481414795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2455, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.003940704392343382, |
|
"grad_norm": 1.1437270641326904, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.003974971387059412, |
|
"grad_norm": 1.0283392667770386, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2507, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.004009238381775441, |
|
"grad_norm": 1.130747675895691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2715, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.004043505376491471, |
|
"grad_norm": 1.3483778238296509, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2742, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.0040777723712075, |
|
"grad_norm": 1.0879924297332764, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2641, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.00411203936592353, |
|
"grad_norm": 1.1242927312850952, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2586, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.004146306360639559, |
|
"grad_norm": 1.0185858011245728, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.004180573355355589, |
|
"grad_norm": 0.9555259943008423, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2528, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.004214840350071618, |
|
"grad_norm": 1.210371971130371, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2613, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.0042491073447876474, |
|
"grad_norm": 1.1261368989944458, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2551, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.004283374339503676, |
|
"grad_norm": 1.2142603397369385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.264, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.004317641334219706, |
|
"grad_norm": 1.057758092880249, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2587, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.004351908328935736, |
|
"grad_norm": 1.0871245861053467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.004386175323651765, |
|
"grad_norm": 1.1214648485183716, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.004420442318367795, |
|
"grad_norm": 1.0265707969665527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2123, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.004454709313083824, |
|
"grad_norm": 1.1180216073989868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2245, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.004488976307799854, |
|
"grad_norm": 1.028238296508789, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2118, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.004523243302515883, |
|
"grad_norm": 1.0321682691574097, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2196, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.0045575102972319126, |
|
"grad_norm": 1.1180269718170166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2403, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.0045917772919479415, |
|
"grad_norm": 1.079560399055481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2309, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.004626044286663971, |
|
"grad_norm": 1.0062284469604492, |
|
"learning_rate": 1e-05, |
|
"loss": 0.228, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.00466031128138, |
|
"grad_norm": 1.1098395586013794, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2435, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.00469457827609603, |
|
"grad_norm": 1.0619688034057617, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2342, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.004728845270812059, |
|
"grad_norm": 1.1943925619125366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2315, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.004763112265528089, |
|
"grad_norm": 1.0958552360534668, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.004797379260244118, |
|
"grad_norm": 1.0984197854995728, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2208, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.004831646254960148, |
|
"grad_norm": 1.0741859674453735, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.004865913249676177, |
|
"grad_norm": 1.1457058191299438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2516, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.004900180244392207, |
|
"grad_norm": 0.9849014282226562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2406, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.004934447239108236, |
|
"grad_norm": 1.1174912452697754, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2122, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.0049687142338242654, |
|
"grad_norm": 1.0292854309082031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2349, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.005002981228540294, |
|
"grad_norm": 1.0343785285949707, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2158, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.005037248223256324, |
|
"grad_norm": 1.1178008317947388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2264, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.005071515217972353, |
|
"grad_norm": 1.0238450765609741, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2287, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.005105782212688383, |
|
"grad_norm": 1.1728886365890503, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2373, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.005140049207404412, |
|
"grad_norm": 1.227034091949463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.005140049207404412, |
|
"eval_cer": 13.150467454577527, |
|
"eval_loss": 0.25801682472229004, |
|
"eval_normalized_cer": 9.452438049560353, |
|
"eval_runtime": 227.9378, |
|
"eval_samples_per_second": 2.246, |
|
"eval_steps_per_second": 0.035, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.005174316202120442, |
|
"grad_norm": 1.0703920125961304, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2156, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.005208583196836471, |
|
"grad_norm": 1.1343841552734375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2126, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.005242850191552501, |
|
"grad_norm": 1.1743741035461426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.00527711718626853, |
|
"grad_norm": 1.1476744413375854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.0053113841809845595, |
|
"grad_norm": 1.0899590253829956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.0053456511757005885, |
|
"grad_norm": 1.0281250476837158, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2226, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.005379918170416618, |
|
"grad_norm": 0.9932867884635925, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.005414185165132647, |
|
"grad_norm": 1.1992309093475342, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2179, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.005448452159848677, |
|
"grad_norm": 1.0017774105072021, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2244, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.005482719154564706, |
|
"grad_norm": 1.0827686786651611, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2313, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.005516986149280736, |
|
"grad_norm": 1.2260409593582153, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.005551253143996765, |
|
"grad_norm": 1.2530804872512817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.005585520138712795, |
|
"grad_norm": 1.068452000617981, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2138, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.005619787133428824, |
|
"grad_norm": 1.3108712434768677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.005654054128144854, |
|
"grad_norm": 1.0919209718704224, |
|
"learning_rate": 1e-05, |
|
"loss": 0.213, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.005688321122860883, |
|
"grad_norm": 1.1530914306640625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2292, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.005722588117576912, |
|
"grad_norm": 1.084028959274292, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2393, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.005756855112292941, |
|
"grad_norm": 1.247847557067871, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.005791122107008971, |
|
"grad_norm": 1.03806734085083, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2317, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.005825389101725, |
|
"grad_norm": 1.1643092632293701, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2348, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.00585965609644103, |
|
"grad_norm": 1.1066207885742188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2348, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.005893923091157059, |
|
"grad_norm": 1.1813760995864868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2295, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.005928190085873089, |
|
"grad_norm": 1.1444518566131592, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2101, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.005962457080589118, |
|
"grad_norm": 1.1485129594802856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.005996724075305148, |
|
"grad_norm": 1.1813607215881348, |
|
"learning_rate": 1e-05, |
|
"loss": 0.231, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.006030991070021177, |
|
"grad_norm": 1.4075005054473877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2306, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.0060652580647372065, |
|
"grad_norm": 1.2183804512023926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2227, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.0060995250594532355, |
|
"grad_norm": 1.3654927015304565, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2341, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.006133792054169265, |
|
"grad_norm": 1.2806668281555176, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2226, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.006168059048885294, |
|
"grad_norm": 1.2949618101119995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2698, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.006202326043601324, |
|
"grad_norm": 1.3080159425735474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2691, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.006236593038317353, |
|
"grad_norm": 1.1831908226013184, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2644, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.006270860033033383, |
|
"grad_norm": 1.1216965913772583, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.006305127027749412, |
|
"grad_norm": 1.1943161487579346, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2769, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.006339394022465442, |
|
"grad_norm": 1.0856040716171265, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.006373661017181471, |
|
"grad_norm": 1.1100040674209595, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2576, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.006407928011897501, |
|
"grad_norm": 1.3369051218032837, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2684, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.0064421950066135296, |
|
"grad_norm": 1.158797264099121, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2474, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.006476462001329559, |
|
"grad_norm": 1.1821873188018799, |
|
"learning_rate": 1e-05, |
|
"loss": 0.272, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.006510728996045589, |
|
"grad_norm": 1.0739686489105225, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2798, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.006544995990761618, |
|
"grad_norm": 1.0639653205871582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2682, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.006579262985477648, |
|
"grad_norm": 1.2149512767791748, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2586, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.006613529980193677, |
|
"grad_norm": 1.1057014465332031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2719, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.006647796974909707, |
|
"grad_norm": 1.0929185152053833, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2703, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.006682063969625736, |
|
"grad_norm": 1.0322917699813843, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2477, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.006716330964341766, |
|
"grad_norm": 1.2460272312164307, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2816, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.006750597959057795, |
|
"grad_norm": 1.2049859762191772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2648, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.0067848649537738245, |
|
"grad_norm": 1.1182633638381958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.0068191319484898535, |
|
"grad_norm": 1.1514990329742432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2695, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.006853398943205883, |
|
"grad_norm": 1.0150858163833618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2532, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.006853398943205883, |
|
"eval_cer": 13.565002645969306, |
|
"eval_loss": 0.2523655593395233, |
|
"eval_normalized_cer": 9.942046362909672, |
|
"eval_runtime": 226.5571, |
|
"eval_samples_per_second": 2.26, |
|
"eval_steps_per_second": 0.035, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.006887665937921912, |
|
"grad_norm": 1.0476700067520142, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2555, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.006921932932637942, |
|
"grad_norm": 1.1178691387176514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2489, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.006956199927353971, |
|
"grad_norm": 1.2596313953399658, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2884, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.006990466922070001, |
|
"grad_norm": 1.1929702758789062, |
|
"learning_rate": 1e-05, |
|
"loss": 0.262, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.00702473391678603, |
|
"grad_norm": 1.1269497871398926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2758, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.00705900091150206, |
|
"grad_norm": 1.1495511531829834, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2668, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.007093267906218089, |
|
"grad_norm": 1.0648061037063599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.007127534900934119, |
|
"grad_norm": 1.3193435668945312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2743, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.0071618018956501476, |
|
"grad_norm": 1.2877907752990723, |
|
"learning_rate": 1e-05, |
|
"loss": 0.248, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.007196068890366177, |
|
"grad_norm": 1.2012474536895752, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2662, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.007230335885082206, |
|
"grad_norm": 1.1491566896438599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2666, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.007264602879798236, |
|
"grad_norm": 1.1861019134521484, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2618, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.007298869874514265, |
|
"grad_norm": 1.123963713645935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2646, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.007333136869230295, |
|
"grad_norm": 1.2697441577911377, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2713, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.007367403863946324, |
|
"grad_norm": 0.9741083383560181, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.007401670858662354, |
|
"grad_norm": 1.0292670726776123, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.007435937853378383, |
|
"grad_norm": 1.0958001613616943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.007470204848094413, |
|
"grad_norm": 1.166869044303894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2454, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.007504471842810442, |
|
"grad_norm": 1.2552424669265747, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.0075387388375264715, |
|
"grad_norm": 1.1589868068695068, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2659, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.0075730058322425004, |
|
"grad_norm": 1.1640287637710571, |
|
"learning_rate": 1e-05, |
|
"loss": 0.257, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.00760727282695853, |
|
"grad_norm": 1.0953587293624878, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2444, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.007641539821674559, |
|
"grad_norm": 1.2174441814422607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2626, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.007675806816390589, |
|
"grad_norm": 1.1194220781326294, |
|
"learning_rate": 1e-05, |
|
"loss": 0.241, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.007710073811106618, |
|
"grad_norm": 1.0677419900894165, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2718, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.007744340805822648, |
|
"grad_norm": 1.0956069231033325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2493, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.007778607800538677, |
|
"grad_norm": 1.1772819757461548, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2614, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.007812874795254707, |
|
"grad_norm": 1.0341110229492188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2488, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.007847141789970737, |
|
"grad_norm": 1.174186110496521, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.007881408784686765, |
|
"grad_norm": 0.9867792725563049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.007915675779402795, |
|
"grad_norm": 1.1443661451339722, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2331, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.007949942774118824, |
|
"grad_norm": 1.117896318435669, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2277, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.007984209768834854, |
|
"grad_norm": 1.13510000705719, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2137, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.008018476763550882, |
|
"grad_norm": 0.9749162793159485, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2161, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.008052743758266912, |
|
"grad_norm": 1.1519534587860107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2254, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.008087010752982942, |
|
"grad_norm": 1.0861778259277344, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2153, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.008121277747698972, |
|
"grad_norm": 1.0184444189071655, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2066, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.008155544742415, |
|
"grad_norm": 1.0581239461898804, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2243, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.00818981173713103, |
|
"grad_norm": 0.9954540729522705, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2171, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.00822407873184706, |
|
"grad_norm": 1.121960163116455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2216, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.00825834572656309, |
|
"grad_norm": 1.097725510597229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2142, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.008292612721279118, |
|
"grad_norm": 1.0566459894180298, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2272, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.008326879715995147, |
|
"grad_norm": 1.0077927112579346, |
|
"learning_rate": 1e-05, |
|
"loss": 0.211, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.008361146710711177, |
|
"grad_norm": 1.176035761833191, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2125, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.008395413705427207, |
|
"grad_norm": 1.0064568519592285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2066, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.008429680700143235, |
|
"grad_norm": 1.1852171421051025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2087, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.008463947694859265, |
|
"grad_norm": 0.9580971002578735, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2172, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.008498214689575295, |
|
"grad_norm": 1.1230813264846802, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2104, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.008532481684291325, |
|
"grad_norm": 1.1891340017318726, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.008566748679007353, |
|
"grad_norm": 1.2579045295715332, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2109, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.008566748679007353, |
|
"eval_cer": 13.300405715293703, |
|
"eval_loss": 0.26059621572494507, |
|
"eval_normalized_cer": 9.502398081534773, |
|
"eval_runtime": 226.5522, |
|
"eval_samples_per_second": 2.26, |
|
"eval_steps_per_second": 0.035, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.008601015673723383, |
|
"grad_norm": 1.0522507429122925, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2154, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.008635282668439413, |
|
"grad_norm": 1.0875492095947266, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2251, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.008669549663155442, |
|
"grad_norm": 1.0868346691131592, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2086, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.008703816657871472, |
|
"grad_norm": 1.0993175506591797, |
|
"learning_rate": 1e-05, |
|
"loss": 0.205, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.0087380836525875, |
|
"grad_norm": 1.0495941638946533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2135, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.00877235064730353, |
|
"grad_norm": 1.0326807498931885, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2105, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.00880661764201956, |
|
"grad_norm": 1.0804367065429688, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2438, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.00884088463673559, |
|
"grad_norm": 1.0738023519515991, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2537, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.008875151631451618, |
|
"grad_norm": 1.1695871353149414, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2518, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.008909418626167648, |
|
"grad_norm": 1.155653476715088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2592, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.008943685620883678, |
|
"grad_norm": 1.1516027450561523, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2387, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.008977952615599707, |
|
"grad_norm": 1.2618260383605957, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2638, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.009012219610315736, |
|
"grad_norm": 1.2422987222671509, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.009046486605031765, |
|
"grad_norm": 1.1460082530975342, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2509, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.009080753599747795, |
|
"grad_norm": 1.2502261400222778, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2595, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.009115020594463825, |
|
"grad_norm": 1.139840006828308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.255, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.009149287589179853, |
|
"grad_norm": 1.3247896432876587, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2721, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.009183554583895883, |
|
"grad_norm": 1.1355103254318237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2604, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.009217821578611913, |
|
"grad_norm": 1.106541633605957, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2374, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.009252088573327943, |
|
"grad_norm": 1.2375975847244263, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2719, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.00928635556804397, |
|
"grad_norm": 1.1048275232315063, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2791, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.00932062256276, |
|
"grad_norm": 0.9889766573905945, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2457, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.00935488955747603, |
|
"grad_norm": 1.1566202640533447, |
|
"learning_rate": 1e-05, |
|
"loss": 0.252, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.00938915655219206, |
|
"grad_norm": 1.1586074829101562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2517, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.009423423546908088, |
|
"grad_norm": 0.990419328212738, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2572, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.009457690541624118, |
|
"grad_norm": 1.1101089715957642, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2525, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.009491957536340148, |
|
"grad_norm": 1.0488269329071045, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.009526224531056178, |
|
"grad_norm": 1.1127737760543823, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2578, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.009560491525772206, |
|
"grad_norm": 1.2353262901306152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2412, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.009594758520488236, |
|
"grad_norm": 1.1262571811676025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2438, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.009629025515204266, |
|
"grad_norm": 1.294323205947876, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2512, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.009663292509920296, |
|
"grad_norm": 1.0706703662872314, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2595, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.009697559504636324, |
|
"grad_norm": 1.0089077949523926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2522, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.009731826499352354, |
|
"grad_norm": 0.9697763323783875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2684, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.009766093494068383, |
|
"grad_norm": 1.1122509241104126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2629, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.009800360488784413, |
|
"grad_norm": 1.0381057262420654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2482, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.009834627483500441, |
|
"grad_norm": 1.126947045326233, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2674, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.009868894478216471, |
|
"grad_norm": 1.0714973211288452, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2634, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.009903161472932501, |
|
"grad_norm": 1.0942039489746094, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2751, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.009937428467648531, |
|
"grad_norm": 1.1503955125808716, |
|
"learning_rate": 1e-05, |
|
"loss": 0.272, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.009971695462364559, |
|
"grad_norm": 1.1912988424301147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2645, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.010005962457080589, |
|
"grad_norm": 1.0941249132156372, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2531, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.010040229451796619, |
|
"grad_norm": 1.2545968294143677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2562, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.010074496446512649, |
|
"grad_norm": 1.3605022430419922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2601, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.010108763441228677, |
|
"grad_norm": 1.0911775827407837, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2605, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.010143030435944706, |
|
"grad_norm": 1.133867859840393, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2554, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.010177297430660736, |
|
"grad_norm": 1.2511764764785767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2658, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.010211564425376766, |
|
"grad_norm": 1.1705303192138672, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2737, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.010245831420092794, |
|
"grad_norm": 1.132071614265442, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2665, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.010280098414808824, |
|
"grad_norm": 1.2301791906356812, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2645, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.010280098414808824, |
|
"eval_cer": 12.938789910037043, |
|
"eval_loss": 0.2511608302593231, |
|
"eval_normalized_cer": 9.152677857713828, |
|
"eval_runtime": 227.4553, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.035, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.010314365409524854, |
|
"grad_norm": 1.1527032852172852, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.010348632404240884, |
|
"grad_norm": 1.1162952184677124, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2728, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.010382899398956912, |
|
"grad_norm": 1.062084436416626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.010417166393672942, |
|
"grad_norm": 1.1536457538604736, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2633, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.010451433388388972, |
|
"grad_norm": 1.2096189260482788, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.010485700383105001, |
|
"grad_norm": 0.9950299263000488, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.01051996737782103, |
|
"grad_norm": 1.0628243684768677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2544, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.01055423437253706, |
|
"grad_norm": 1.042555570602417, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.01058850136725309, |
|
"grad_norm": 1.22646164894104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2503, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.010622768361969119, |
|
"grad_norm": 1.0862691402435303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.010657035356685147, |
|
"grad_norm": 1.148868203163147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.010691302351401177, |
|
"grad_norm": 1.1677169799804688, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2481, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.010725569346117207, |
|
"grad_norm": 0.990696132183075, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2421, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.010759836340833237, |
|
"grad_norm": 1.2869263887405396, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.010794103335549265, |
|
"grad_norm": 1.0741721391677856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2617, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.010828370330265295, |
|
"grad_norm": 1.103102445602417, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2442, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.010862637324981324, |
|
"grad_norm": 1.2562378644943237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2589, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.010896904319697354, |
|
"grad_norm": 1.2153191566467285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2417, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.010931171314413384, |
|
"grad_norm": 1.0507330894470215, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2607, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.010965438309129412, |
|
"grad_norm": 1.1882787942886353, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2469, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.010999705303845442, |
|
"grad_norm": 1.1394702196121216, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2574, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.011033972298561472, |
|
"grad_norm": 1.2482614517211914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2456, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.011068239293277502, |
|
"grad_norm": 1.0362995862960815, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2589, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.01110250628799353, |
|
"grad_norm": 1.1730456352233887, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2497, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.01113677328270956, |
|
"grad_norm": 1.1563142538070679, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2439, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.01117104027742559, |
|
"grad_norm": 1.1030769348144531, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2671, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.01120530727214162, |
|
"grad_norm": 1.1719223260879517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2501, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.011239574266857648, |
|
"grad_norm": 1.1840440034866333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2643, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.011273841261573677, |
|
"grad_norm": 1.1928170919418335, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2629, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.011308108256289707, |
|
"grad_norm": 1.0311812162399292, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2552, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.011342375251005737, |
|
"grad_norm": 1.1625889539718628, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2561, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.011376642245721765, |
|
"grad_norm": 1.0287625789642334, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2341, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.011410909240437795, |
|
"grad_norm": 1.1310815811157227, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2554, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.011445176235153825, |
|
"grad_norm": 1.1266168355941772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.234, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.011479443229869855, |
|
"grad_norm": 1.1979014873504639, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2559, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.011513710224585883, |
|
"grad_norm": 1.0378515720367432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2502, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.011547977219301913, |
|
"grad_norm": 1.1832512617111206, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.011582244214017942, |
|
"grad_norm": 0.9605569839477539, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2349, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.011616511208733972, |
|
"grad_norm": 1.0463056564331055, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.01165077820345, |
|
"grad_norm": 1.1021932363510132, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2383, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.01168504519816603, |
|
"grad_norm": 1.040493130683899, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2374, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.01171931219288206, |
|
"grad_norm": 1.1483063697814941, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2398, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.01175357918759809, |
|
"grad_norm": 1.0316531658172607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2329, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.011787846182314118, |
|
"grad_norm": 1.1677886247634888, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2493, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.011822113177030148, |
|
"grad_norm": 1.2078930139541626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2337, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.011856380171746178, |
|
"grad_norm": 1.178202509880066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.011890647166462208, |
|
"grad_norm": 1.0453248023986816, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2233, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.011924914161178236, |
|
"grad_norm": 1.0171067714691162, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2338, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.011959181155894266, |
|
"grad_norm": 1.051792860031128, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2394, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.011993448150610295, |
|
"grad_norm": 1.1237847805023193, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2428, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.011993448150610295, |
|
"eval_cer": 13.071088375374845, |
|
"eval_loss": 0.25454944372177124, |
|
"eval_normalized_cer": 9.542366107114308, |
|
"eval_runtime": 228.9468, |
|
"eval_samples_per_second": 2.236, |
|
"eval_steps_per_second": 0.035, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.012027715145326325, |
|
"grad_norm": 1.1366350650787354, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2353, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.012061982140042353, |
|
"grad_norm": 1.136927604675293, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2358, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.012096249134758383, |
|
"grad_norm": 1.1875656843185425, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.012130516129474413, |
|
"grad_norm": 1.2016057968139648, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2435, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.012164783124190443, |
|
"grad_norm": 1.209622859954834, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.012199050118906471, |
|
"grad_norm": 1.0696970224380493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2385, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.0122333171136225, |
|
"grad_norm": 1.2674167156219482, |
|
"learning_rate": 1e-05, |
|
"loss": 0.243, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.01226758410833853, |
|
"grad_norm": 1.2928141355514526, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.01230185110305456, |
|
"grad_norm": 1.0642272233963013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2356, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.012336118097770589, |
|
"grad_norm": 1.0935972929000854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2389, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.012370385092486618, |
|
"grad_norm": 1.180668830871582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2409, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.012404652087202648, |
|
"grad_norm": 1.2312487363815308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2478, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.012438919081918678, |
|
"grad_norm": 0.947522759437561, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2281, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.012473186076634706, |
|
"grad_norm": 1.0618727207183838, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2423, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.012507453071350736, |
|
"grad_norm": 1.0766098499298096, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2364, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.012541720066066766, |
|
"grad_norm": 1.1174747943878174, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.012575987060782796, |
|
"grad_norm": 1.1940118074417114, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2212, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.012610254055498824, |
|
"grad_norm": 1.1407246589660645, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2423, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.012644521050214854, |
|
"grad_norm": 1.2646050453186035, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2252, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.012678788044930884, |
|
"grad_norm": 1.130337119102478, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2131, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.012713055039646913, |
|
"grad_norm": 1.1432557106018066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2386, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.012747322034362941, |
|
"grad_norm": 1.1370545625686646, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2347, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.012781589029078971, |
|
"grad_norm": 1.3126403093338013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2159, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.012815856023795001, |
|
"grad_norm": 1.2375295162200928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.012850123018511031, |
|
"grad_norm": 1.0877372026443481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2201, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.012884390013227059, |
|
"grad_norm": 1.1122978925704956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.012918657007943089, |
|
"grad_norm": 1.0270159244537354, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2313, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.012952924002659119, |
|
"grad_norm": 1.1370947360992432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.012987190997375149, |
|
"grad_norm": 1.2888813018798828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2384, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.013021457992091178, |
|
"grad_norm": 1.2443634271621704, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2218, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.013055724986807207, |
|
"grad_norm": 1.1919447183609009, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2277, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.013089991981523236, |
|
"grad_norm": 1.140600562095642, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2317, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.013124258976239266, |
|
"grad_norm": 1.074697494506836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2273, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.013158525970955296, |
|
"grad_norm": 1.1003391742706299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2217, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.013192792965671324, |
|
"grad_norm": 1.1427338123321533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.013227059960387354, |
|
"grad_norm": 1.0806514024734497, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2332, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.013261326955103384, |
|
"grad_norm": 1.1547067165374756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2306, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.013295593949819414, |
|
"grad_norm": 1.2483099699020386, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2166, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.013329860944535442, |
|
"grad_norm": 1.096939206123352, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2253, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.013364127939251472, |
|
"grad_norm": 1.1876115798950195, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.013398394933967502, |
|
"grad_norm": 1.1380902528762817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2256, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.013432661928683531, |
|
"grad_norm": 1.0738089084625244, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2307, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.01346692892339956, |
|
"grad_norm": 1.0351170301437378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2296, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.01350119591811559, |
|
"grad_norm": 1.2752678394317627, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2462, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.01353546291283162, |
|
"grad_norm": 1.2618532180786133, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2364, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.013569729907547649, |
|
"grad_norm": 1.1907076835632324, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.013603996902263677, |
|
"grad_norm": 0.9435076117515564, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2391, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.013638263896979707, |
|
"grad_norm": 1.0608407258987427, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2241, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.013672530891695737, |
|
"grad_norm": 1.0729584693908691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2237, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.013706797886411767, |
|
"grad_norm": 1.2006182670593262, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2386, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.013706797886411767, |
|
"eval_cer": 12.594813900158758, |
|
"eval_loss": 0.25156331062316895, |
|
"eval_normalized_cer": 8.912869704236611, |
|
"eval_runtime": 228.7977, |
|
"eval_samples_per_second": 2.238, |
|
"eval_steps_per_second": 0.035, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.013741064881127795, |
|
"grad_norm": 1.2020457983016968, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2318, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.013775331875843825, |
|
"grad_norm": 1.0251790285110474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.248, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.013809598870559854, |
|
"grad_norm": 1.160437822341919, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2385, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.013843865865275884, |
|
"grad_norm": 1.025770664215088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.013878132859991912, |
|
"grad_norm": 1.111954689025879, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.013912399854707942, |
|
"grad_norm": 1.0644809007644653, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2195, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.013946666849423972, |
|
"grad_norm": 1.2926712036132812, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.013980933844140002, |
|
"grad_norm": 1.2169601917266846, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.01401520083885603, |
|
"grad_norm": 1.1396681070327759, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.01404946783357206, |
|
"grad_norm": 1.2242721319198608, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.01408373482828809, |
|
"grad_norm": 1.195324420928955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2368, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.01411800182300412, |
|
"grad_norm": 1.2345412969589233, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.014152268817720148, |
|
"grad_norm": 1.1502156257629395, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2327, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.014186535812436177, |
|
"grad_norm": 1.2128121852874756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.014220802807152207, |
|
"grad_norm": 1.2618858814239502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.231, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.014255069801868237, |
|
"grad_norm": 1.0879299640655518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2302, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.014289336796584265, |
|
"grad_norm": 0.9794358015060425, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.014323603791300295, |
|
"grad_norm": 1.1454006433486938, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.014357870786016325, |
|
"grad_norm": 1.223686933517456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2211, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.014392137780732355, |
|
"grad_norm": 1.1423155069351196, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2391, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.014426404775448383, |
|
"grad_norm": 1.1027394533157349, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2279, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.014460671770164413, |
|
"grad_norm": 1.1777397394180298, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.014494938764880443, |
|
"grad_norm": 1.01688551902771, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.014529205759596472, |
|
"grad_norm": 1.1520488262176514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.0145634727543125, |
|
"grad_norm": 1.2820484638214111, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2205, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.01459773974902853, |
|
"grad_norm": 1.169291377067566, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2389, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.01463200674374456, |
|
"grad_norm": 1.1135886907577515, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2384, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.01466627373846059, |
|
"grad_norm": 1.0846205949783325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.223, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.014700540733176618, |
|
"grad_norm": 0.981488049030304, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2092, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.014734807727892648, |
|
"grad_norm": 1.0437407493591309, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.014769074722608678, |
|
"grad_norm": 1.005792260169983, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2286, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.014803341717324708, |
|
"grad_norm": 1.1903142929077148, |
|
"learning_rate": 1e-05, |
|
"loss": 0.231, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.014837608712040736, |
|
"grad_norm": 1.1308993101119995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.014871875706756766, |
|
"grad_norm": 1.0948210954666138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.213, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.014906142701472795, |
|
"grad_norm": 1.2674663066864014, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2432, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.014940409696188825, |
|
"grad_norm": 1.4228485822677612, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.014974676690904853, |
|
"grad_norm": 1.1533160209655762, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.015008943685620883, |
|
"grad_norm": 1.1454424858093262, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.015043210680336913, |
|
"grad_norm": 1.2944281101226807, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2651, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.015077477675052943, |
|
"grad_norm": 1.2148584127426147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2694, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.015111744669768971, |
|
"grad_norm": 1.091282844543457, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2672, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.015146011664485001, |
|
"grad_norm": 1.2254445552825928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2583, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.01518027865920103, |
|
"grad_norm": 1.367516279220581, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2586, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.01521454565391706, |
|
"grad_norm": 1.1858383417129517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2764, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.01524881264863309, |
|
"grad_norm": 1.1331857442855835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2577, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.015283079643349119, |
|
"grad_norm": 1.2343239784240723, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2661, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.015317346638065148, |
|
"grad_norm": 1.0893656015396118, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2538, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.015351613632781178, |
|
"grad_norm": 1.1467857360839844, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.015385880627497208, |
|
"grad_norm": 1.2753335237503052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2797, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.015420147622213236, |
|
"grad_norm": 1.1355762481689453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2672, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.015420147622213236, |
|
"eval_cer": 13.159287352266713, |
|
"eval_loss": 0.24996142089366913, |
|
"eval_normalized_cer": 9.59232613908873, |
|
"eval_runtime": 228.0477, |
|
"eval_samples_per_second": 2.245, |
|
"eval_steps_per_second": 0.035, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.015454414616929266, |
|
"grad_norm": 1.2256762981414795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2662, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.015488681611645296, |
|
"grad_norm": 1.0631389617919922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2596, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.015522948606361326, |
|
"grad_norm": 1.0759390592575073, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2553, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.015557215601077354, |
|
"grad_norm": 1.1867231130599976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.015591482595793384, |
|
"grad_norm": 1.1203633546829224, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2732, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.015625749590509413, |
|
"grad_norm": 1.1223920583724976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.015660016585225443, |
|
"grad_norm": 1.066497564315796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2456, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.015694283579941473, |
|
"grad_norm": 1.2520133256912231, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2558, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.015728550574657503, |
|
"grad_norm": 1.3602423667907715, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2698, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.01576281756937353, |
|
"grad_norm": 1.1748729944229126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2621, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.01579708456408956, |
|
"grad_norm": 0.9431802034378052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.01583135155880559, |
|
"grad_norm": 1.0146753787994385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.01586561855352162, |
|
"grad_norm": 1.1340891122817993, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.01589988554823765, |
|
"grad_norm": 1.1456454992294312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2307, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.01593415254295368, |
|
"grad_norm": 1.1026827096939087, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2295, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.01596841953766971, |
|
"grad_norm": 1.2215088605880737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.245, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.01600268653238574, |
|
"grad_norm": 1.1760615110397339, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.016036953527101765, |
|
"grad_norm": 1.1690876483917236, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2282, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.016071220521817794, |
|
"grad_norm": 1.182026743888855, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2351, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.016105487516533824, |
|
"grad_norm": 1.0182474851608276, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.016139754511249854, |
|
"grad_norm": 1.2531431913375854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.244, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.016174021505965884, |
|
"grad_norm": 0.9633692502975464, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2297, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.016208288500681914, |
|
"grad_norm": 1.1144667863845825, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.016242555495397944, |
|
"grad_norm": 1.0768555402755737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2216, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.016276822490113974, |
|
"grad_norm": 1.2052035331726074, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2278, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.01631108948483, |
|
"grad_norm": 1.0291496515274048, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2226, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.01634535647954603, |
|
"grad_norm": 1.2100346088409424, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2278, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.01637962347426206, |
|
"grad_norm": 1.214861273765564, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2313, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.01641389046897809, |
|
"grad_norm": 1.137210726737976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.01644815746369412, |
|
"grad_norm": 1.046673059463501, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2231, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.01648242445841015, |
|
"grad_norm": 1.08164644241333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.01651669145312618, |
|
"grad_norm": 1.1432491540908813, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.01655095844784221, |
|
"grad_norm": 1.1684173345565796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.218, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.016585225442558235, |
|
"grad_norm": 1.0895615816116333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2109, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.016619492437274265, |
|
"grad_norm": 1.1505770683288574, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2283, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.016653759431990295, |
|
"grad_norm": 1.3385730981826782, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2344, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.016688026426706325, |
|
"grad_norm": 1.109035611152649, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2558, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.016722293421422355, |
|
"grad_norm": 1.1834880113601685, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2247, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.016756560416138384, |
|
"grad_norm": 1.2369152307510376, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.016790827410854414, |
|
"grad_norm": 1.131173014640808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.016825094405570444, |
|
"grad_norm": 1.1100351810455322, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.01685936140028647, |
|
"grad_norm": 1.1857340335845947, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.0168936283950025, |
|
"grad_norm": 1.1568819284439087, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.01692789538971853, |
|
"grad_norm": 1.104872465133667, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.01696216238443456, |
|
"grad_norm": 1.0907660722732544, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.01699642937915059, |
|
"grad_norm": 1.1100903749465942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.01703069637386662, |
|
"grad_norm": 1.141200065612793, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.01706496336858265, |
|
"grad_norm": 1.2853361368179321, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.01709923036329868, |
|
"grad_norm": 1.1542645692825317, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.017133497358014706, |
|
"grad_norm": 1.2022640705108643, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2371, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.017133497358014706, |
|
"eval_cer": 12.92115011465867, |
|
"eval_loss": 0.2521001100540161, |
|
"eval_normalized_cer": 9.30255795363709, |
|
"eval_runtime": 227.4868, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.035, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.017167764352730736, |
|
"grad_norm": 1.0765001773834229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2455, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.017202031347446765, |
|
"grad_norm": 1.0711493492126465, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2422, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.017236298342162795, |
|
"grad_norm": 1.0719484090805054, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2531, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.017270565336878825, |
|
"grad_norm": 1.1884721517562866, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.017304832331594855, |
|
"grad_norm": 1.068827509880066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2474, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.017339099326310885, |
|
"grad_norm": 1.1308655738830566, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2627, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.017373366321026915, |
|
"grad_norm": 1.1527314186096191, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.017407633315742944, |
|
"grad_norm": 1.1800657510757446, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2587, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.01744190031045897, |
|
"grad_norm": 1.095189094543457, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2424, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.017476167305175, |
|
"grad_norm": 1.109617829322815, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2543, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.01751043429989103, |
|
"grad_norm": 1.2110544443130493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2687, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.01754470129460706, |
|
"grad_norm": 1.0466723442077637, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2424, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.01757896828932309, |
|
"grad_norm": 1.2060648202896118, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2337, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.01761323528403912, |
|
"grad_norm": 1.203142762184143, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2556, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.01764750227875515, |
|
"grad_norm": 1.0751283168792725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.01768176927347118, |
|
"grad_norm": 1.1377781629562378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2448, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.017716036268187206, |
|
"grad_norm": 1.147454023361206, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2172, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.017750303262903236, |
|
"grad_norm": 1.129897952079773, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2418, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.017784570257619266, |
|
"grad_norm": 1.1261131763458252, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.017818837252335296, |
|
"grad_norm": 1.0794824361801147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2546, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.017853104247051325, |
|
"grad_norm": 1.1870142221450806, |
|
"learning_rate": 1e-05, |
|
"loss": 0.249, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.017887371241767355, |
|
"grad_norm": 1.0414400100708008, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2285, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.017921638236483385, |
|
"grad_norm": 1.173405647277832, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2529, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.017955905231199415, |
|
"grad_norm": 1.039650797843933, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2321, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.01799017222591544, |
|
"grad_norm": 1.0359266996383667, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.01802443922063147, |
|
"grad_norm": 1.0630840063095093, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2117, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.0180587062153475, |
|
"grad_norm": 1.0937180519104004, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2454, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.01809297321006353, |
|
"grad_norm": 1.1015993356704712, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.01812724020477956, |
|
"grad_norm": 1.060584545135498, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.01816150719949559, |
|
"grad_norm": 1.1389795541763306, |
|
"learning_rate": 1e-05, |
|
"loss": 0.233, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.01819577419421162, |
|
"grad_norm": 1.0018917322158813, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2453, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.01823004118892765, |
|
"grad_norm": 1.0546092987060547, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2333, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.018264308183643677, |
|
"grad_norm": 1.1121848821640015, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2317, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.018298575178359706, |
|
"grad_norm": 1.1613191366195679, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.018332842173075736, |
|
"grad_norm": 1.1250524520874023, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2471, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.018367109167791766, |
|
"grad_norm": 1.0905226469039917, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.018401376162507796, |
|
"grad_norm": 0.9885173439979553, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.018435643157223826, |
|
"grad_norm": 1.288758635520935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2472, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.018469910151939856, |
|
"grad_norm": 1.2433462142944336, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2427, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.018504177146655885, |
|
"grad_norm": 1.2367336750030518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2511, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.018538444141371912, |
|
"grad_norm": 1.1871395111083984, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2276, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.01857271113608794, |
|
"grad_norm": 0.9569379091262817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.01860697813080397, |
|
"grad_norm": 1.1487014293670654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2295, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.01864124512552, |
|
"grad_norm": 1.0800844430923462, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2247, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.01867551212023603, |
|
"grad_norm": 1.1834380626678467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.226, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.01870977911495206, |
|
"grad_norm": 1.0035191774368286, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2414, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.01874404610966809, |
|
"grad_norm": 1.0685466527938843, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.01877831310438412, |
|
"grad_norm": 1.1921565532684326, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2419, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.018812580099100147, |
|
"grad_norm": 1.1201281547546387, |
|
"learning_rate": 1e-05, |
|
"loss": 0.255, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.018846847093816177, |
|
"grad_norm": 1.1162866353988647, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2426, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.018846847093816177, |
|
"eval_cer": 13.238666431469396, |
|
"eval_loss": 0.25262224674224854, |
|
"eval_normalized_cer": 9.562350119904076, |
|
"eval_runtime": 229.0802, |
|
"eval_samples_per_second": 2.235, |
|
"eval_steps_per_second": 0.035, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.018881114088532207, |
|
"grad_norm": 1.0215845108032227, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2368, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.018915381083248237, |
|
"grad_norm": 1.0062447786331177, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2308, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.018949648077964266, |
|
"grad_norm": 1.223649024963379, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2409, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.018983915072680296, |
|
"grad_norm": 1.2076172828674316, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2236, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.019018182067396326, |
|
"grad_norm": 1.154416561126709, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2419, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.019052449062112356, |
|
"grad_norm": 1.284858226776123, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2321, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.019086716056828382, |
|
"grad_norm": 1.0406948328018188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.019120983051544412, |
|
"grad_norm": 1.1980571746826172, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2274, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.019155250046260442, |
|
"grad_norm": 1.073560357093811, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.019189517040976472, |
|
"grad_norm": 1.0982617139816284, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2391, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.019223784035692502, |
|
"grad_norm": 1.015085220336914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2269, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.01925805103040853, |
|
"grad_norm": 1.238585352897644, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2428, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.01929231802512456, |
|
"grad_norm": 1.3326079845428467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.25, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.01932658501984059, |
|
"grad_norm": 1.1263608932495117, |
|
"learning_rate": 1e-05, |
|
"loss": 0.234, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.019360852014556618, |
|
"grad_norm": 1.083595633506775, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.019395119009272647, |
|
"grad_norm": 1.0787022113800049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2248, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.019429386003988677, |
|
"grad_norm": 1.312565803527832, |
|
"learning_rate": 1e-05, |
|
"loss": 0.263, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.019463652998704707, |
|
"grad_norm": 1.0305407047271729, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2358, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.019497919993420737, |
|
"grad_norm": 1.0905306339263916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2358, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.019532186988136767, |
|
"grad_norm": 1.1105730533599854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2371, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.019566453982852797, |
|
"grad_norm": 1.1664555072784424, |
|
"learning_rate": 1e-05, |
|
"loss": 0.244, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.019600720977568827, |
|
"grad_norm": 1.0702719688415527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.019634987972284856, |
|
"grad_norm": 1.0736626386642456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2406, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.019669254967000883, |
|
"grad_norm": 1.0510461330413818, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2335, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.019703521961716913, |
|
"grad_norm": 1.0435370206832886, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2211, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.019737788956432942, |
|
"grad_norm": 1.2461049556732178, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2188, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.019772055951148972, |
|
"grad_norm": 1.0351046323776245, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2269, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.019806322945865002, |
|
"grad_norm": 1.124671459197998, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.019840589940581032, |
|
"grad_norm": 1.145488977432251, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2415, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.019874856935297062, |
|
"grad_norm": 1.1410046815872192, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2296, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.01990912393001309, |
|
"grad_norm": 1.2782517671585083, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2367, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.019943390924729118, |
|
"grad_norm": 1.204562783241272, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2289, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.019977657919445148, |
|
"grad_norm": 1.1141811609268188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2223, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.020011924914161178, |
|
"grad_norm": 1.1790316104888916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2308, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.020046191908877208, |
|
"grad_norm": 1.0944266319274902, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2366, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.020080458903593237, |
|
"grad_norm": 1.0892263650894165, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2384, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.020114725898309267, |
|
"grad_norm": 1.1419873237609863, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2414, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.020148992893025297, |
|
"grad_norm": 1.2230783700942993, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2394, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.020183259887741327, |
|
"grad_norm": 1.1309173107147217, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2561, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.020217526882457353, |
|
"grad_norm": 1.2405802011489868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.259, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.020251793877173383, |
|
"grad_norm": 1.2853388786315918, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2668, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.020286060871889413, |
|
"grad_norm": 1.299046277999878, |
|
"learning_rate": 1e-05, |
|
"loss": 0.251, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.020320327866605443, |
|
"grad_norm": 1.142052173614502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2655, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.020354594861321473, |
|
"grad_norm": 1.3770766258239746, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.020388861856037502, |
|
"grad_norm": 1.1458237171173096, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2742, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.020423128850753532, |
|
"grad_norm": 1.3130786418914795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2514, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.020457395845469562, |
|
"grad_norm": 1.2816088199615479, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2593, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.02049166284018559, |
|
"grad_norm": 1.0405460596084595, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2608, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.02052592983490162, |
|
"grad_norm": 1.2035329341888428, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2558, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.020560196829617648, |
|
"grad_norm": 1.0495450496673584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2468, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.020560196829617648, |
|
"eval_cer": 13.079908273064033, |
|
"eval_loss": 0.2540421485900879, |
|
"eval_normalized_cer": 9.292565947242206, |
|
"eval_runtime": 227.4153, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.035, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.020594463824333678, |
|
"grad_norm": 1.1614056825637817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2527, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.020628730819049708, |
|
"grad_norm": 1.1835705041885376, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2592, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.020662997813765738, |
|
"grad_norm": 1.1335136890411377, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2727, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.020697264808481768, |
|
"grad_norm": 1.052079439163208, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2514, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.020731531803197797, |
|
"grad_norm": 1.096330165863037, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2684, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.020765798797913824, |
|
"grad_norm": 1.2359880208969116, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2638, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.020800065792629854, |
|
"grad_norm": 1.2259430885314941, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2488, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.020834332787345883, |
|
"grad_norm": 1.0531619787216187, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.020868599782061913, |
|
"grad_norm": 1.1754058599472046, |
|
"learning_rate": 1e-05, |
|
"loss": 0.254, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.020902866776777943, |
|
"grad_norm": 1.0922538042068481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2522, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.020937133771493973, |
|
"grad_norm": 1.1970179080963135, |
|
"learning_rate": 1e-05, |
|
"loss": 0.267, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.020971400766210003, |
|
"grad_norm": 1.2625236511230469, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.021005667760926033, |
|
"grad_norm": 1.152846336364746, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2429, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.02103993475564206, |
|
"grad_norm": 1.1184160709381104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2566, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.02107420175035809, |
|
"grad_norm": 1.1153484582901, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2583, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.02110846874507412, |
|
"grad_norm": 1.2822504043579102, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.02114273573979015, |
|
"grad_norm": 1.1332992315292358, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2799, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.02117700273450618, |
|
"grad_norm": 1.0284112691879272, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.02121126972922221, |
|
"grad_norm": 1.1097975969314575, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2513, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.021245536723938238, |
|
"grad_norm": 1.168990969657898, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2843, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.021279803718654268, |
|
"grad_norm": 0.9956926107406616, |
|
"learning_rate": 1e-05, |
|
"loss": 0.247, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.021314070713370294, |
|
"grad_norm": 1.2191492319107056, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2608, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.021348337708086324, |
|
"grad_norm": 1.0872688293457031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.021382604702802354, |
|
"grad_norm": 1.0746614933013916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.244, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.021416871697518384, |
|
"grad_norm": 1.1560328006744385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2639, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.021451138692234414, |
|
"grad_norm": 1.1529641151428223, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2585, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.021485405686950444, |
|
"grad_norm": 1.0708386898040771, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2669, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.021519672681666473, |
|
"grad_norm": 1.208079218864441, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2436, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.021553939676382503, |
|
"grad_norm": 1.1871508359909058, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2655, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.02158820667109853, |
|
"grad_norm": 1.0997953414916992, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2578, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.02162247366581456, |
|
"grad_norm": 1.2404417991638184, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2726, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.02165674066053059, |
|
"grad_norm": 1.1724058389663696, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2611, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.02169100765524662, |
|
"grad_norm": 1.124932885169983, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.02172527464996265, |
|
"grad_norm": 1.129584550857544, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2651, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.02175954164467868, |
|
"grad_norm": 1.1869479417800903, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2451, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.02179380863939471, |
|
"grad_norm": 1.1753504276275635, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2509, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.02182807563411074, |
|
"grad_norm": 1.1704761981964111, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2614, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.02186234262882677, |
|
"grad_norm": 1.347970724105835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.021896609623542795, |
|
"grad_norm": 1.0677597522735596, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2539, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.021930876618258825, |
|
"grad_norm": 1.1567541360855103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2621, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.021965143612974854, |
|
"grad_norm": 1.1231553554534912, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2453, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.021999410607690884, |
|
"grad_norm": 1.0485198497772217, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2503, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.022033677602406914, |
|
"grad_norm": 1.12228262424469, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2488, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.022067944597122944, |
|
"grad_norm": 1.2610136270523071, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2445, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.022102211591838974, |
|
"grad_norm": 0.9546436071395874, |
|
"learning_rate": 1e-05, |
|
"loss": 0.226, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.022136478586555004, |
|
"grad_norm": 1.3363466262817383, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2489, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.02217074558127103, |
|
"grad_norm": 1.1454704999923706, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2434, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.02220501257598706, |
|
"grad_norm": 1.1578549146652222, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.02223927957070309, |
|
"grad_norm": 1.096081018447876, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2472, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.02227354656541912, |
|
"grad_norm": 1.2388731241226196, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2457, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02227354656541912, |
|
"eval_cer": 12.929970012347859, |
|
"eval_loss": 0.24839338660240173, |
|
"eval_normalized_cer": 9.242605915267786, |
|
"eval_runtime": 227.6401, |
|
"eval_samples_per_second": 2.249, |
|
"eval_steps_per_second": 0.035, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02230781356013515, |
|
"grad_norm": 1.0306715965270996, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2393, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.02234208055485118, |
|
"grad_norm": 1.1339504718780518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2563, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.02237634754956721, |
|
"grad_norm": 0.912266731262207, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.02241061454428324, |
|
"grad_norm": 1.1917020082473755, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2395, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.022444881538999265, |
|
"grad_norm": 1.248515248298645, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.022479148533715295, |
|
"grad_norm": 1.180799961090088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2616, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.022513415528431325, |
|
"grad_norm": 1.0700205564498901, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.022547682523147355, |
|
"grad_norm": 1.1814614534378052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2471, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.022581949517863385, |
|
"grad_norm": 1.3973134756088257, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2383, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.022616216512579414, |
|
"grad_norm": 1.244265079498291, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.022650483507295444, |
|
"grad_norm": 1.1685833930969238, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.022684750502011474, |
|
"grad_norm": 1.1566667556762695, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2443, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.0227190174967275, |
|
"grad_norm": 1.0241929292678833, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2412, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.02275328449144353, |
|
"grad_norm": 1.0359474420547485, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2374, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.02278755148615956, |
|
"grad_norm": 1.040810227394104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2254, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.02282181848087559, |
|
"grad_norm": 1.0343252420425415, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2366, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.02285608547559162, |
|
"grad_norm": 1.052739143371582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2273, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.02289035247030765, |
|
"grad_norm": 1.0414966344833374, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2082, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.02292461946502368, |
|
"grad_norm": 1.2340532541275024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2241, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.02295888645973971, |
|
"grad_norm": 0.9693310260772705, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2322, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.022993153454455736, |
|
"grad_norm": 1.103025197982788, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.023027420449171766, |
|
"grad_norm": 1.119689702987671, |
|
"learning_rate": 1e-05, |
|
"loss": 0.214, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.023061687443887795, |
|
"grad_norm": 0.93172287940979, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2094, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.023095954438603825, |
|
"grad_norm": 1.0207446813583374, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2238, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.023130221433319855, |
|
"grad_norm": 1.200201392173767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.218, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.023164488428035885, |
|
"grad_norm": 1.1485291719436646, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2314, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.023198755422751915, |
|
"grad_norm": 1.2236285209655762, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2326, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.023233022417467945, |
|
"grad_norm": 1.1756523847579956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2122, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.02326728941218397, |
|
"grad_norm": 1.0356839895248413, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2078, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.0233015564069, |
|
"grad_norm": 1.1896883249282837, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2072, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.02333582340161603, |
|
"grad_norm": 1.1080976724624634, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2127, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.02337009039633206, |
|
"grad_norm": 1.128263235092163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2282, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.02340435739104809, |
|
"grad_norm": 1.0398188829421997, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2095, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.02343862438576412, |
|
"grad_norm": 1.1791975498199463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2216, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.02347289138048015, |
|
"grad_norm": 1.1444710493087769, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2447, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.02350715837519618, |
|
"grad_norm": 1.136607050895691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2093, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.023541425369912206, |
|
"grad_norm": 1.0915231704711914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2128, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.023575692364628236, |
|
"grad_norm": 1.0416276454925537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2092, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.023609959359344266, |
|
"grad_norm": 1.3693732023239136, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2137, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.023644226354060296, |
|
"grad_norm": 1.1747677326202393, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2215, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.023678493348776326, |
|
"grad_norm": 1.1593588590621948, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2234, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.023712760343492355, |
|
"grad_norm": 1.2322016954421997, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.023747027338208385, |
|
"grad_norm": 1.167648196220398, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.023781294332924415, |
|
"grad_norm": 1.0984666347503662, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.023815561327640445, |
|
"grad_norm": 1.1234291791915894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2532, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.02384982832235647, |
|
"grad_norm": 1.2158063650131226, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2567, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.0238840953170725, |
|
"grad_norm": 1.0958101749420166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2387, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.02391836231178853, |
|
"grad_norm": 1.1536844968795776, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2712, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.02395262930650456, |
|
"grad_norm": 1.2437007427215576, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2563, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.02398689630122059, |
|
"grad_norm": 1.0884592533111572, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02398689630122059, |
|
"eval_cer": 13.079908273064033, |
|
"eval_loss": 0.2514401376247406, |
|
"eval_normalized_cer": 9.622302158273381, |
|
"eval_runtime": 227.6705, |
|
"eval_samples_per_second": 2.249, |
|
"eval_steps_per_second": 0.035, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02402116329593662, |
|
"grad_norm": 1.2332980632781982, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2543, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.02405543029065265, |
|
"grad_norm": 1.1041260957717896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2663, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.02408969728536868, |
|
"grad_norm": 1.1479183435440063, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2528, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.024123964280084707, |
|
"grad_norm": 1.103766918182373, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2336, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.024158231274800736, |
|
"grad_norm": 1.238996148109436, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2436, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.024192498269516766, |
|
"grad_norm": 1.2652095556259155, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2464, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.024226765264232796, |
|
"grad_norm": 1.180665373802185, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2541, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.024261032258948826, |
|
"grad_norm": 1.1601506471633911, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.024295299253664856, |
|
"grad_norm": 1.257034420967102, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2446, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.024329566248380886, |
|
"grad_norm": 1.0813285112380981, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2546, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.024363833243096916, |
|
"grad_norm": 1.1124157905578613, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.024398100237812942, |
|
"grad_norm": 1.0615211725234985, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.024432367232528972, |
|
"grad_norm": 1.185677409172058, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2383, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.024466634227245, |
|
"grad_norm": 1.1810061931610107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2603, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.02450090122196103, |
|
"grad_norm": 1.155860424041748, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2434, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.02453516821667706, |
|
"grad_norm": 1.113008737564087, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2529, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.02456943521139309, |
|
"grad_norm": 1.1276872158050537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2265, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.02460370220610912, |
|
"grad_norm": 1.149792194366455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2349, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.02463796920082515, |
|
"grad_norm": 1.1619532108306885, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2336, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.024672236195541177, |
|
"grad_norm": 1.0760303735733032, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2315, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.024706503190257207, |
|
"grad_norm": 1.2807782888412476, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.024740770184973237, |
|
"grad_norm": 1.0910037755966187, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2333, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.024775037179689267, |
|
"grad_norm": 1.2938390970230103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2147, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.024809304174405297, |
|
"grad_norm": 1.185542106628418, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2232, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.024843571169121326, |
|
"grad_norm": 1.0598995685577393, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2278, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.024877838163837356, |
|
"grad_norm": 1.1860477924346924, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2179, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.024912105158553386, |
|
"grad_norm": 1.1935844421386719, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.024946372153269412, |
|
"grad_norm": 1.0449039936065674, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2307, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.024980639147985442, |
|
"grad_norm": 1.0651369094848633, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.025014906142701472, |
|
"grad_norm": 1.0416852235794067, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2208, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.025049173137417502, |
|
"grad_norm": 1.0064860582351685, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2227, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.025083440132133532, |
|
"grad_norm": 1.0357342958450317, |
|
"learning_rate": 1e-05, |
|
"loss": 0.22, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.02511770712684956, |
|
"grad_norm": 1.019918441772461, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2396, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.02515197412156559, |
|
"grad_norm": 1.0327798128128052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2118, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.02518624111628162, |
|
"grad_norm": 0.9973874092102051, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.025220508110997648, |
|
"grad_norm": 1.093544840812683, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2214, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.025254775105713678, |
|
"grad_norm": 1.118829369544983, |
|
"learning_rate": 1e-05, |
|
"loss": 0.237, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.025289042100429707, |
|
"grad_norm": 1.2009224891662598, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2447, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.025323309095145737, |
|
"grad_norm": 1.1427584886550903, |
|
"learning_rate": 1e-05, |
|
"loss": 0.234, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.025357576089861767, |
|
"grad_norm": 0.9685842394828796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2231, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.025391843084577797, |
|
"grad_norm": 1.165501356124878, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2139, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.025426110079293827, |
|
"grad_norm": 1.4023411273956299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.025460377074009857, |
|
"grad_norm": 1.218546748161316, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.025494644068725883, |
|
"grad_norm": 1.4930671453475952, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2466, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.025528911063441913, |
|
"grad_norm": 1.145317554473877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.025563178058157943, |
|
"grad_norm": 1.2366299629211426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2606, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.025597445052873972, |
|
"grad_norm": 1.0542744398117065, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2493, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.025631712047590002, |
|
"grad_norm": 1.2272337675094604, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2537, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.025665979042306032, |
|
"grad_norm": 1.169912576675415, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2581, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.025700246037022062, |
|
"grad_norm": 1.1997913122177124, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2547, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.025700246037022062, |
|
"eval_cer": 12.859410830834362, |
|
"eval_loss": 0.2470153123140335, |
|
"eval_normalized_cer": 9.162669864108713, |
|
"eval_runtime": 227.7782, |
|
"eval_samples_per_second": 2.248, |
|
"eval_steps_per_second": 0.035, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.025734513031738092, |
|
"grad_norm": 1.0920944213867188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.025768780026454118, |
|
"grad_norm": 1.349660038948059, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2591, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.025803047021170148, |
|
"grad_norm": 1.0097490549087524, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2514, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.025837314015886178, |
|
"grad_norm": 1.118241548538208, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2603, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.025871581010602208, |
|
"grad_norm": 1.078802466392517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2532, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.025905848005318238, |
|
"grad_norm": 1.0794482231140137, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2521, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.025940115000034267, |
|
"grad_norm": 1.130106806755066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2574, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.025974381994750297, |
|
"grad_norm": 1.112724781036377, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.026008648989466327, |
|
"grad_norm": 1.2646088600158691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.026042915984182357, |
|
"grad_norm": 1.1961979866027832, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.026077182978898383, |
|
"grad_norm": 1.2568695545196533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.245, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.026111449973614413, |
|
"grad_norm": 1.0233054161071777, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2429, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.026145716968330443, |
|
"grad_norm": 1.4355731010437012, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2623, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.026179983963046473, |
|
"grad_norm": 0.9781149625778198, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2436, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.026214250957762503, |
|
"grad_norm": 1.085255742073059, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.026248517952478533, |
|
"grad_norm": 1.0647081136703491, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2596, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.026282784947194562, |
|
"grad_norm": 1.3411939144134521, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2444, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.026317051941910592, |
|
"grad_norm": 1.0778676271438599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.02635131893662662, |
|
"grad_norm": 1.1606541872024536, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2537, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.02638558593134265, |
|
"grad_norm": 1.0706511735916138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2324, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.02641985292605868, |
|
"grad_norm": 1.2074836492538452, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2487, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.026454119920774708, |
|
"grad_norm": 1.0147804021835327, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2202, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.026488386915490738, |
|
"grad_norm": 1.1806961297988892, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2464, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.026522653910206768, |
|
"grad_norm": 1.1552751064300537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2244, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.026556920904922798, |
|
"grad_norm": 1.115871548652649, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2389, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.026591187899638827, |
|
"grad_norm": 1.0924640893936157, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2237, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.026625454894354854, |
|
"grad_norm": 1.021644115447998, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2257, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.026659721889070884, |
|
"grad_norm": 1.1757131814956665, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2278, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.026693988883786914, |
|
"grad_norm": 1.1914074420928955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2266, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.026728255878502943, |
|
"grad_norm": 1.0416505336761475, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2273, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.026762522873218973, |
|
"grad_norm": 1.0241059064865112, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2342, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.026796789867935003, |
|
"grad_norm": 1.133334994316101, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2303, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.026831056862651033, |
|
"grad_norm": 1.1711792945861816, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2333, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.026865323857367063, |
|
"grad_norm": 1.1120338439941406, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2474, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.02689959085208309, |
|
"grad_norm": 1.1995311975479126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2472, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.02693385784679912, |
|
"grad_norm": 1.1725718975067139, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.02696812484151515, |
|
"grad_norm": 0.9564438462257385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2266, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.02700239183623118, |
|
"grad_norm": 1.140692114830017, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2319, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.02703665883094721, |
|
"grad_norm": 1.0812654495239258, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2434, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.02707092582566324, |
|
"grad_norm": 1.179500937461853, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2191, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.027105192820379268, |
|
"grad_norm": 1.1073647737503052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2315, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.027139459815095298, |
|
"grad_norm": 1.093070387840271, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2256, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.027173726809811324, |
|
"grad_norm": 1.2253212928771973, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2413, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.027207993804527354, |
|
"grad_norm": 1.1531736850738525, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2514, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.027242260799243384, |
|
"grad_norm": 1.0366076231002808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.027276527793959414, |
|
"grad_norm": 1.1657369136810303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.027310794788675444, |
|
"grad_norm": 1.3050105571746826, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2704, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.027345061783391474, |
|
"grad_norm": 1.1378298997879028, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2481, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.027379328778107503, |
|
"grad_norm": 1.1434043645858765, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2671, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.027413595772823533, |
|
"grad_norm": 1.0899518728256226, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2573, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.027413595772823533, |
|
"eval_cer": 12.903510319280295, |
|
"eval_loss": 0.2475583553314209, |
|
"eval_normalized_cer": 9.362509992006395, |
|
"eval_runtime": 228.4278, |
|
"eval_samples_per_second": 2.241, |
|
"eval_steps_per_second": 0.035, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.02744786276753956, |
|
"grad_norm": 0.970212459564209, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.02748212976225559, |
|
"grad_norm": 1.0460257530212402, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2606, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.02751639675697162, |
|
"grad_norm": 1.116742491722107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2571, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.02755066375168765, |
|
"grad_norm": 1.2562140226364136, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2561, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.02758493074640368, |
|
"grad_norm": 1.2499713897705078, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2683, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.02761919774111971, |
|
"grad_norm": 1.151715874671936, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.02765346473583574, |
|
"grad_norm": 1.2527892589569092, |
|
"learning_rate": 1e-05, |
|
"loss": 0.261, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.02768773173055177, |
|
"grad_norm": 1.1776025295257568, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2616, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.027721998725267795, |
|
"grad_norm": 1.1632285118103027, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.027756265719983825, |
|
"grad_norm": 1.3266422748565674, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2667, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.027790532714699855, |
|
"grad_norm": 1.240424633026123, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.027824799709415884, |
|
"grad_norm": 1.1874525547027588, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2505, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.027859066704131914, |
|
"grad_norm": 1.0850279331207275, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2556, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.027893333698847944, |
|
"grad_norm": 1.203342318534851, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.027927600693563974, |
|
"grad_norm": 0.9685319066047668, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2614, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.027961867688280004, |
|
"grad_norm": 1.020749807357788, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2763, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.02799613468299603, |
|
"grad_norm": 1.1530399322509766, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2544, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.02803040167771206, |
|
"grad_norm": 1.0800687074661255, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2628, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.02806466867242809, |
|
"grad_norm": 1.1825618743896484, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2524, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.02809893566714412, |
|
"grad_norm": 1.176870346069336, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.02813320266186015, |
|
"grad_norm": 1.19720458984375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2801, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.02816746965657618, |
|
"grad_norm": 1.0634618997573853, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2607, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.02820173665129221, |
|
"grad_norm": 1.1780894994735718, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2558, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.02823600364600824, |
|
"grad_norm": 1.18949294090271, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2432, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.02827027064072427, |
|
"grad_norm": 1.3350197076797485, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2644, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.028304537635440295, |
|
"grad_norm": 1.1507694721221924, |
|
"learning_rate": 1e-05, |
|
"loss": 0.254, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.028338804630156325, |
|
"grad_norm": 1.0806615352630615, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.028373071624872355, |
|
"grad_norm": 1.1201471090316772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2553, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.028407338619588385, |
|
"grad_norm": 1.0681666135787964, |
|
"learning_rate": 1e-05, |
|
"loss": 0.258, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.028441605614304415, |
|
"grad_norm": 1.0958445072174072, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2502, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.028475872609020444, |
|
"grad_norm": 1.165635585784912, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2642, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.028510139603736474, |
|
"grad_norm": 0.9674690961837769, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2385, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.028544406598452504, |
|
"grad_norm": 1.239996314048767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2706, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.02857867359316853, |
|
"grad_norm": 1.0063962936401367, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2448, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.02861294058788456, |
|
"grad_norm": 1.0466179847717285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.02864720758260059, |
|
"grad_norm": Infinity, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2595, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.02868147457731662, |
|
"grad_norm": 1.1461595296859741, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2515, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.02871574157203265, |
|
"grad_norm": 1.2697845697402954, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2641, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.02875000856674868, |
|
"grad_norm": 1.2665945291519165, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2613, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.02878427556146471, |
|
"grad_norm": 1.1350281238555908, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2524, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.02881854255618074, |
|
"grad_norm": 1.0341808795928955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2466, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.028852809550896766, |
|
"grad_norm": 1.1108484268188477, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2471, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.028887076545612796, |
|
"grad_norm": 1.059414029121399, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2695, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.028921343540328825, |
|
"grad_norm": 1.0888679027557373, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2683, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.028955610535044855, |
|
"grad_norm": 1.1649068593978882, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.028989877529760885, |
|
"grad_norm": 1.218563199043274, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2456, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.029024144524476915, |
|
"grad_norm": 1.3558833599090576, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2517, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.029058411519192945, |
|
"grad_norm": 1.2579597234725952, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2516, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.029092678513908975, |
|
"grad_norm": 1.185253381729126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.029126945508625, |
|
"grad_norm": 1.1937752962112427, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2654, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.029126945508625, |
|
"eval_cer": 12.89469042159111, |
|
"eval_loss": 0.2503049969673157, |
|
"eval_normalized_cer": 8.952837729816148, |
|
"eval_runtime": 229.0216, |
|
"eval_samples_per_second": 2.236, |
|
"eval_steps_per_second": 0.035, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.02916121250334103, |
|
"grad_norm": 1.1005933284759521, |
|
"learning_rate": 1e-05, |
|
"loss": 0.247, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.02919547949805706, |
|
"grad_norm": 1.0437865257263184, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2492, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.02922974649277309, |
|
"grad_norm": 1.0478579998016357, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.02926401348748912, |
|
"grad_norm": 1.0615030527114868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.02929828048220515, |
|
"grad_norm": 1.105209469795227, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2572, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.02933254747692118, |
|
"grad_norm": 1.139857530593872, |
|
"learning_rate": 1e-05, |
|
"loss": 0.237, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.02936681447163721, |
|
"grad_norm": 1.0326822996139526, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2587, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.029401081466353236, |
|
"grad_norm": 1.4446253776550293, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2488, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.029435348461069266, |
|
"grad_norm": 1.070698857307434, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.029469615455785296, |
|
"grad_norm": 1.119545817375183, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2406, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.029503882450501326, |
|
"grad_norm": 1.1146609783172607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2429, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.029538149445217356, |
|
"grad_norm": 1.107639193534851, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2412, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.029572416439933386, |
|
"grad_norm": 1.0722100734710693, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2444, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.029606683434649415, |
|
"grad_norm": 1.3313097953796387, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2482, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.029640950429365445, |
|
"grad_norm": 1.13486647605896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.02967521742408147, |
|
"grad_norm": 1.1610273122787476, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2594, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.0297094844187975, |
|
"grad_norm": 1.3555855751037598, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2521, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.02974375141351353, |
|
"grad_norm": 1.2834869623184204, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2638, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.02977801840822956, |
|
"grad_norm": 1.1000789403915405, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2371, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.02981228540294559, |
|
"grad_norm": 0.9901896119117737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.02984655239766162, |
|
"grad_norm": 1.0514518022537231, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2443, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.02988081939237765, |
|
"grad_norm": 1.1127166748046875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2259, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.02991508638709368, |
|
"grad_norm": 1.0674943923950195, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2309, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.029949353381809707, |
|
"grad_norm": 1.0397884845733643, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2411, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.029983620376525737, |
|
"grad_norm": 1.2052630186080933, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2294, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.030017887371241767, |
|
"grad_norm": 1.1350561380386353, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2264, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.030052154365957796, |
|
"grad_norm": 1.1187571287155151, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2325, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.030086421360673826, |
|
"grad_norm": 1.0860145092010498, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.030120688355389856, |
|
"grad_norm": 1.1102906465530396, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2271, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.030154955350105886, |
|
"grad_norm": 1.0239520072937012, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2489, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.030189222344821916, |
|
"grad_norm": 1.0980205535888672, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2314, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.030223489339537942, |
|
"grad_norm": 1.1887843608856201, |
|
"learning_rate": 1e-05, |
|
"loss": 0.24, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.030257756334253972, |
|
"grad_norm": 1.2101106643676758, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2353, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.030292023328970002, |
|
"grad_norm": 1.1793345212936401, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2482, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.03032629032368603, |
|
"grad_norm": 1.1983866691589355, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2441, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.03036055731840206, |
|
"grad_norm": 0.9888906478881836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2271, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.03039482431311809, |
|
"grad_norm": 1.217824101448059, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2298, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.03042909130783412, |
|
"grad_norm": 0.9851268529891968, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2318, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.03046335830255015, |
|
"grad_norm": 1.0329748392105103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2654, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.03049762529726618, |
|
"grad_norm": 1.067325234413147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.23, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.030531892291982207, |
|
"grad_norm": 1.2636964321136475, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2303, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.030566159286698237, |
|
"grad_norm": 1.1565788984298706, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2494, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.030600426281414267, |
|
"grad_norm": 1.2197197675704956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2338, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.030634693276130297, |
|
"grad_norm": 1.1062088012695312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2157, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.030668960270846327, |
|
"grad_norm": 1.107677936553955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2387, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.030703227265562356, |
|
"grad_norm": 1.1791733503341675, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2337, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.030737494260278386, |
|
"grad_norm": 1.1337239742279053, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2127, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.030771761254994416, |
|
"grad_norm": 1.0418322086334229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.222, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.030806028249710442, |
|
"grad_norm": 1.1591708660125732, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2388, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.030840295244426472, |
|
"grad_norm": 1.0103886127471924, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2264, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.030840295244426472, |
|
"eval_cer": 13.273946022226143, |
|
"eval_loss": 0.2514854073524475, |
|
"eval_normalized_cer": 9.492406075139888, |
|
"eval_runtime": 227.5807, |
|
"eval_samples_per_second": 2.25, |
|
"eval_steps_per_second": 0.035, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.030874562239142502, |
|
"grad_norm": 1.15034019947052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2215, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.030908829233858532, |
|
"grad_norm": 1.183698296546936, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.030943096228574562, |
|
"grad_norm": 1.1930736303329468, |
|
"learning_rate": 1e-05, |
|
"loss": 0.25, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.03097736322329059, |
|
"grad_norm": 1.0650999546051025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.224, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.03101163021800662, |
|
"grad_norm": 1.0613574981689453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.03104589721272265, |
|
"grad_norm": 1.3004292249679565, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2286, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.031080164207438678, |
|
"grad_norm": 1.144573450088501, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2375, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.031114431202154708, |
|
"grad_norm": 1.3552353382110596, |
|
"learning_rate": 1e-05, |
|
"loss": 0.216, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.031148698196870737, |
|
"grad_norm": 1.139901041984558, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.031182965191586767, |
|
"grad_norm": 1.179685354232788, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2389, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.031217232186302797, |
|
"grad_norm": 1.1494848728179932, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.03125149918101883, |
|
"grad_norm": 1.0726871490478516, |
|
"learning_rate": 1e-05, |
|
"loss": 0.211, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.03128576617573486, |
|
"grad_norm": 1.254655361175537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2332, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.03132003317045089, |
|
"grad_norm": 1.1774569749832153, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2441, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.031354300165166916, |
|
"grad_norm": 1.0810356140136719, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.031388567159882946, |
|
"grad_norm": 1.0976495742797852, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2347, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.031422834154598976, |
|
"grad_norm": 1.2417839765548706, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2261, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.031457101149315006, |
|
"grad_norm": 1.0022953748703003, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2277, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.03149136814403103, |
|
"grad_norm": 1.1461567878723145, |
|
"learning_rate": 1e-05, |
|
"loss": 0.218, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.03152563513874706, |
|
"grad_norm": 1.1877334117889404, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2195, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.03155990213346309, |
|
"grad_norm": 1.1513786315917969, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.03159416912817912, |
|
"grad_norm": 1.057938814163208, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2428, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.03162843612289515, |
|
"grad_norm": 1.0752719640731812, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.03166270311761118, |
|
"grad_norm": 1.1784312725067139, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2556, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.03169697011232721, |
|
"grad_norm": 1.0176231861114502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2552, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.03173123710704324, |
|
"grad_norm": 1.0849392414093018, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2483, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.03176550410175927, |
|
"grad_norm": 1.0042351484298706, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2472, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.0317997710964753, |
|
"grad_norm": 1.0480408668518066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.03183403809119133, |
|
"grad_norm": 1.0602933168411255, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2657, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.03186830508590736, |
|
"grad_norm": 1.1704037189483643, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2634, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.03190257208062339, |
|
"grad_norm": 1.2454304695129395, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2648, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.03193683907533942, |
|
"grad_norm": 1.0540211200714111, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2462, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.03197110607005545, |
|
"grad_norm": 1.1440715789794922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2609, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.03200537306477148, |
|
"grad_norm": 1.0083932876586914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2422, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.0320396400594875, |
|
"grad_norm": 1.0180490016937256, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2402, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.03207390705420353, |
|
"grad_norm": 1.1158274412155151, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.03210817404891956, |
|
"grad_norm": 1.2014826536178589, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.03214244104363559, |
|
"grad_norm": 1.1604617834091187, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2396, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.03217670803835162, |
|
"grad_norm": 1.1088517904281616, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2364, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.03221097503306765, |
|
"grad_norm": 1.1905596256256104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2335, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.03224524202778368, |
|
"grad_norm": 1.2237385511398315, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2522, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.03227950902249971, |
|
"grad_norm": 1.0582191944122314, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2467, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.03231377601721574, |
|
"grad_norm": 1.2065699100494385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2359, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.03234804301193177, |
|
"grad_norm": 1.1399251222610474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2507, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.0323823100066478, |
|
"grad_norm": 1.070038914680481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2732, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.03241657700136383, |
|
"grad_norm": 1.0940920114517212, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2609, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.03245084399607986, |
|
"grad_norm": 1.2042659521102905, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2511, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.03248511099079589, |
|
"grad_norm": 1.0865731239318848, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2693, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.03251937798551192, |
|
"grad_norm": 1.0884779691696167, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2303, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.03255364498022795, |
|
"grad_norm": 1.0557243824005127, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2611, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.03255364498022795, |
|
"eval_cer": 13.300405715293703, |
|
"eval_loss": 0.2491595298051834, |
|
"eval_normalized_cer": 9.722222222222223, |
|
"eval_runtime": 227.5398, |
|
"eval_samples_per_second": 2.25, |
|
"eval_steps_per_second": 0.035, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.03258791197494398, |
|
"grad_norm": 1.1241854429244995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2531, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.03262217896966, |
|
"grad_norm": 1.091977834701538, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.03265644596437603, |
|
"grad_norm": 1.0969996452331543, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2595, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.03269071295909206, |
|
"grad_norm": 1.1597386598587036, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.03272497995380809, |
|
"grad_norm": 1.0741667747497559, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2435, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.03275924694852412, |
|
"grad_norm": 1.1600459814071655, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2476, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.03279351394324015, |
|
"grad_norm": 1.0636577606201172, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2323, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.03282778093795618, |
|
"grad_norm": 1.0010998249053955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2436, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.03286204793267221, |
|
"grad_norm": 1.1286782026290894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2585, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.03289631492738824, |
|
"grad_norm": 1.1575970649719238, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2529, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.03293058192210427, |
|
"grad_norm": 1.2235829830169678, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2617, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.0329648489168203, |
|
"grad_norm": 1.203371286392212, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2299, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.03299911591153633, |
|
"grad_norm": 1.2438814640045166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2544, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.03303338290625236, |
|
"grad_norm": 1.2102582454681396, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2415, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.03306764990096839, |
|
"grad_norm": 0.9984006285667419, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2633, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.03310191689568442, |
|
"grad_norm": 1.0693376064300537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2394, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.03313618389040045, |
|
"grad_norm": 1.2770649194717407, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2577, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.03317045088511647, |
|
"grad_norm": 1.3751499652862549, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2595, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.0332047178798325, |
|
"grad_norm": 1.247056484222412, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.03323898487454853, |
|
"grad_norm": 1.2248870134353638, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2609, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.03327325186926456, |
|
"grad_norm": 1.0467997789382935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2353, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.03330751886398059, |
|
"grad_norm": 1.2705328464508057, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2483, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.03334178585869662, |
|
"grad_norm": 1.0360983610153198, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2497, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.03337605285341265, |
|
"grad_norm": 1.1409697532653809, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2506, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.03341031984812868, |
|
"grad_norm": 1.0656344890594482, |
|
"learning_rate": 1e-05, |
|
"loss": 0.252, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.03344458684284471, |
|
"grad_norm": 1.1420108079910278, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2383, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.03347885383756074, |
|
"grad_norm": 1.0792863368988037, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.03351312083227677, |
|
"grad_norm": 1.1297261714935303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2501, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.0335473878269928, |
|
"grad_norm": 1.2627495527267456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.03358165482170883, |
|
"grad_norm": 0.982812762260437, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2443, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.03361592181642486, |
|
"grad_norm": 1.048766016960144, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2531, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.03365018881114089, |
|
"grad_norm": 1.141780972480774, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2271, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.03368445580585692, |
|
"grad_norm": 1.101762056350708, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2338, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.03371872280057294, |
|
"grad_norm": 1.0150196552276611, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2404, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.03375298979528897, |
|
"grad_norm": 1.1550086736679077, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.033787256790005, |
|
"grad_norm": 1.1246519088745117, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2525, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.03382152378472103, |
|
"grad_norm": 0.9673643708229065, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2453, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.03385579077943706, |
|
"grad_norm": 1.019649863243103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2407, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.03389005777415309, |
|
"grad_norm": 1.1477577686309814, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2374, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.03392432476886912, |
|
"grad_norm": 0.9760174751281738, |
|
"learning_rate": 1e-05, |
|
"loss": 0.248, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.03395859176358515, |
|
"grad_norm": 1.1654585599899292, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.03399285875830118, |
|
"grad_norm": 1.0818895101547241, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.03402712575301721, |
|
"grad_norm": 1.1635690927505493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2539, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.03406139274773324, |
|
"grad_norm": 1.0819408893585205, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2352, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.03409565974244927, |
|
"grad_norm": 1.0151749849319458, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.0341299267371653, |
|
"grad_norm": 1.092203140258789, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2406, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.03416419373188133, |
|
"grad_norm": 1.1004047393798828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2375, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.03419846072659736, |
|
"grad_norm": 1.2596560716629028, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2601, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.03423272772131339, |
|
"grad_norm": 1.0936402082443237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2451, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.03426699471602941, |
|
"grad_norm": 1.1660488843917847, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03426699471602941, |
|
"eval_cer": 12.656553183983066, |
|
"eval_loss": 0.24734708666801453, |
|
"eval_normalized_cer": 8.912869704236611, |
|
"eval_runtime": 226.9924, |
|
"eval_samples_per_second": 2.256, |
|
"eval_steps_per_second": 0.035, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03430126171074544, |
|
"grad_norm": 1.0807442665100098, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2583, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.03433552870546147, |
|
"grad_norm": 1.17780339717865, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2685, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.0343697957001775, |
|
"grad_norm": 0.98011314868927, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2445, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.03440406269489353, |
|
"grad_norm": 1.3025845289230347, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.03443832968960956, |
|
"grad_norm": 1.4656189680099487, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2618, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.03447259668432559, |
|
"grad_norm": 1.2372117042541504, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2545, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.03450686367904162, |
|
"grad_norm": 1.1028844118118286, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2511, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.03454113067375765, |
|
"grad_norm": 1.2402809858322144, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2507, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.03457539766847368, |
|
"grad_norm": 1.1127125024795532, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2634, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.03460966466318971, |
|
"grad_norm": 1.1486737728118896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2495, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.03464393165790574, |
|
"grad_norm": 1.0792872905731201, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2347, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.03467819865262177, |
|
"grad_norm": 1.0346540212631226, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2468, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.0347124656473378, |
|
"grad_norm": 1.165614128112793, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2419, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.03474673264205383, |
|
"grad_norm": 1.1619865894317627, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2428, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.03478099963676986, |
|
"grad_norm": 1.2419089078903198, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2652, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.03481526663148589, |
|
"grad_norm": 1.0978246927261353, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2396, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.03484953362620191, |
|
"grad_norm": 0.9916633367538452, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2603, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.03488380062091794, |
|
"grad_norm": 1.026318907737732, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2564, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.03491806761563397, |
|
"grad_norm": 1.1151725053787231, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2587, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.03495233461035, |
|
"grad_norm": 1.1420174837112427, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2695, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.03498660160506603, |
|
"grad_norm": 1.0311987400054932, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2574, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.03502086859978206, |
|
"grad_norm": 0.9759404063224792, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2398, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.03505513559449809, |
|
"grad_norm": 1.5086033344268799, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2589, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.03508940258921412, |
|
"grad_norm": 1.167893409729004, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2536, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.03512366958393015, |
|
"grad_norm": 1.0426411628723145, |
|
"learning_rate": 1e-05, |
|
"loss": 0.237, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.03515793657864618, |
|
"grad_norm": 1.4766713380813599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2363, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.03519220357336221, |
|
"grad_norm": 1.019641399383545, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2281, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.03522647056807824, |
|
"grad_norm": 1.1627217531204224, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2524, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.03526073756279427, |
|
"grad_norm": 1.165414571762085, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.0352950045575103, |
|
"grad_norm": 1.1816645860671997, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2471, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.03532927155222633, |
|
"grad_norm": 1.0116764307022095, |
|
"learning_rate": 1e-05, |
|
"loss": 0.226, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.03536353854694236, |
|
"grad_norm": 1.2257869243621826, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2441, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.03539780554165838, |
|
"grad_norm": 1.1971989870071411, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2268, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 0.03543207253637441, |
|
"grad_norm": 1.057354211807251, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2142, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.03546633953109044, |
|
"grad_norm": 1.2593644857406616, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2149, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.03550060652580647, |
|
"grad_norm": 1.0903648138046265, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2494, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.0355348735205225, |
|
"grad_norm": 1.2079240083694458, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2273, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 0.03556914051523853, |
|
"grad_norm": 1.206696629524231, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2142, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.03560340750995456, |
|
"grad_norm": 1.2176302671432495, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2277, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 0.03563767450467059, |
|
"grad_norm": 0.9780252575874329, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2089, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.03567194149938662, |
|
"grad_norm": 1.1160544157028198, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2268, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 0.03570620849410265, |
|
"grad_norm": 1.1231842041015625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2385, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.03574047548881868, |
|
"grad_norm": 1.142675518989563, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2386, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 0.03577474248353471, |
|
"grad_norm": 1.1563806533813477, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2217, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.03580900947825074, |
|
"grad_norm": 1.0655251741409302, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2207, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.03584327647296677, |
|
"grad_norm": 1.1469309329986572, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2211, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.0358775434676828, |
|
"grad_norm": 1.182896375656128, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2316, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 0.03591181046239883, |
|
"grad_norm": 1.018953800201416, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2308, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.03594607745711485, |
|
"grad_norm": 1.1578072309494019, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2272, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 0.03598034445183088, |
|
"grad_norm": 1.1483505964279175, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2469, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.03598034445183088, |
|
"eval_cer": 13.026988886928912, |
|
"eval_loss": 0.24956555664539337, |
|
"eval_normalized_cer": 9.442446043165468, |
|
"eval_runtime": 227.6026, |
|
"eval_samples_per_second": 2.25, |
|
"eval_steps_per_second": 0.035, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.03601461144654691, |
|
"grad_norm": 1.1269420385360718, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2482, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 0.03604887844126294, |
|
"grad_norm": 1.0399614572525024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2473, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.03608314543597897, |
|
"grad_norm": 1.11722731590271, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2566, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 0.036117412430695, |
|
"grad_norm": 1.0773251056671143, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.03615167942541103, |
|
"grad_norm": 1.0123059749603271, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2527, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.03618594642012706, |
|
"grad_norm": 1.14670991897583, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2474, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.03622021341484309, |
|
"grad_norm": 1.021543025970459, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2314, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 0.03625448040955912, |
|
"grad_norm": 1.1485329866409302, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2603, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.03628874740427515, |
|
"grad_norm": 1.239241600036621, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 0.03632301439899118, |
|
"grad_norm": 1.2692322731018066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2411, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.03635728139370721, |
|
"grad_norm": 1.1440175771713257, |
|
"learning_rate": 1e-05, |
|
"loss": 0.256, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 0.03639154838842324, |
|
"grad_norm": 1.0153664350509644, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2468, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.03642581538313927, |
|
"grad_norm": 1.0046017169952393, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 0.0364600823778553, |
|
"grad_norm": 1.023366928100586, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.03649434937257132, |
|
"grad_norm": 1.1663336753845215, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2476, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.03652861636728735, |
|
"grad_norm": 1.2302120923995972, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2469, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.03656288336200338, |
|
"grad_norm": 1.0369274616241455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2468, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 0.03659715035671941, |
|
"grad_norm": 1.3913209438323975, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.03663141735143544, |
|
"grad_norm": 1.0193636417388916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.25, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 0.03666568434615147, |
|
"grad_norm": 1.2572286128997803, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2441, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.0366999513408675, |
|
"grad_norm": 1.1679438352584839, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2462, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 0.03673421833558353, |
|
"grad_norm": 1.1959030628204346, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2641, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.03676848533029956, |
|
"grad_norm": 1.1327241659164429, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2538, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 0.03680275232501559, |
|
"grad_norm": 1.0999104976654053, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2492, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.03683701931973162, |
|
"grad_norm": 1.1578527688980103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2506, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.03687128631444765, |
|
"grad_norm": 1.243034839630127, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2404, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.03690555330916368, |
|
"grad_norm": 1.1157968044281006, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2597, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 0.03693982030387971, |
|
"grad_norm": 1.0121145248413086, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2457, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.03697408729859574, |
|
"grad_norm": 1.1467009782791138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2714, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 0.03700835429331177, |
|
"grad_norm": 1.1445188522338867, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2652, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.0370426212880278, |
|
"grad_norm": 0.9861304759979248, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2408, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 0.037076888282743824, |
|
"grad_norm": 1.1524399518966675, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2645, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.037111155277459854, |
|
"grad_norm": 1.4263722896575928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2364, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 0.03714542227217588, |
|
"grad_norm": 1.057739019393921, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.03717968926689191, |
|
"grad_norm": 1.232234239578247, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2605, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.03721395626160794, |
|
"grad_norm": 1.2277271747589111, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2646, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.03724822325632397, |
|
"grad_norm": 1.1447609663009644, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 0.03728249025104, |
|
"grad_norm": 1.5821764469146729, |
|
"learning_rate": 1e-05, |
|
"loss": 0.259, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.03731675724575603, |
|
"grad_norm": 1.0948309898376465, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2502, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 0.03735102424047206, |
|
"grad_norm": 1.0639638900756836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2796, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.03738529123518809, |
|
"grad_norm": 1.0613322257995605, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2606, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 0.03741955822990412, |
|
"grad_norm": 1.1825395822525024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.03745382522462015, |
|
"grad_norm": 1.1198760271072388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2601, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 0.03748809221933618, |
|
"grad_norm": 0.9922842383384705, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2567, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.03752235921405221, |
|
"grad_norm": 1.0075231790542603, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2373, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.03755662620876824, |
|
"grad_norm": 1.2739824056625366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2571, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.03759089320348427, |
|
"grad_norm": 1.049249529838562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2505, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 0.037625160198200294, |
|
"grad_norm": 1.0130621194839478, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.037659427192916324, |
|
"grad_norm": 1.143740177154541, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 0.037693694187632354, |
|
"grad_norm": 1.1482734680175781, |
|
"learning_rate": 1e-05, |
|
"loss": 0.245, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.037693694187632354, |
|
"eval_cer": 13.150467454577527, |
|
"eval_loss": 0.24854739010334015, |
|
"eval_normalized_cer": 9.492406075139888, |
|
"eval_runtime": 227.9, |
|
"eval_samples_per_second": 2.247, |
|
"eval_steps_per_second": 0.035, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.037727961182348384, |
|
"grad_norm": 1.175059199333191, |
|
"learning_rate": 1e-05, |
|
"loss": 0.235, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 0.037762228177064414, |
|
"grad_norm": 1.06391441822052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.03779649517178044, |
|
"grad_norm": 1.1072652339935303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2563, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 0.03783076216649647, |
|
"grad_norm": 1.0364381074905396, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2353, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.0378650291612125, |
|
"grad_norm": 1.120888113975525, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2564, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.03789929615592853, |
|
"grad_norm": 1.1769522428512573, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.03793356315064456, |
|
"grad_norm": 1.103209137916565, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2579, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 0.03796783014536059, |
|
"grad_norm": 1.191256046295166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.258, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.03800209714007662, |
|
"grad_norm": 1.103756070137024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 0.03803636413479265, |
|
"grad_norm": 1.2097468376159668, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2581, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.03807063112950868, |
|
"grad_norm": 1.0787367820739746, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2428, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 0.03810489812422471, |
|
"grad_norm": 1.0501831769943237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2585, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.03813916511894074, |
|
"grad_norm": 1.1210603713989258, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2435, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 0.038173432113656765, |
|
"grad_norm": 1.0846295356750488, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2525, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.038207699108372795, |
|
"grad_norm": 1.176488995552063, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.038241966103088824, |
|
"grad_norm": 1.034157395362854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2701, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.038276233097804854, |
|
"grad_norm": 1.0763425827026367, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2327, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 0.038310500092520884, |
|
"grad_norm": 1.010324478149414, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2506, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.038344767087236914, |
|
"grad_norm": 1.0999796390533447, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2482, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 0.038379034081952944, |
|
"grad_norm": 1.2314294576644897, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2413, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.038413301076668974, |
|
"grad_norm": 1.109063982963562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 0.038447568071385003, |
|
"grad_norm": 1.0760470628738403, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2572, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.03848183506610103, |
|
"grad_norm": 1.2139952182769775, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 0.03851610206081706, |
|
"grad_norm": 1.0825960636138916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2446, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.03855036905553309, |
|
"grad_norm": 1.1650110483169556, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2486, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.03858463605024912, |
|
"grad_norm": 1.074236512184143, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2623, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.03861890304496515, |
|
"grad_norm": 1.0651731491088867, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2409, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 0.03865317003968118, |
|
"grad_norm": 1.0689282417297363, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2329, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.03868743703439721, |
|
"grad_norm": 1.1548572778701782, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2572, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 0.038721704029113235, |
|
"grad_norm": 1.1205992698669434, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2424, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.038755971023829265, |
|
"grad_norm": 1.0806999206542969, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2336, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 0.038790238018545295, |
|
"grad_norm": 1.0330880880355835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2371, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.038824505013261325, |
|
"grad_norm": 1.2580816745758057, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2297, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 0.038858772007977355, |
|
"grad_norm": 1.3312656879425049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2478, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.038893039002693384, |
|
"grad_norm": 1.0043836832046509, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2388, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.038927305997409414, |
|
"grad_norm": 1.0665231943130493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.234, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.038961572992125444, |
|
"grad_norm": 1.1114041805267334, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2407, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 0.038995839986841474, |
|
"grad_norm": 1.043134093284607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2352, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.039030106981557504, |
|
"grad_norm": 1.1435351371765137, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2347, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 0.039064373976273534, |
|
"grad_norm": 1.2625036239624023, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2515, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.039098640970989564, |
|
"grad_norm": 1.101953148841858, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 0.03913290796570559, |
|
"grad_norm": 1.0906771421432495, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2145, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.03916717496042162, |
|
"grad_norm": 1.2161511182785034, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2366, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 0.03920144195513765, |
|
"grad_norm": 1.0494539737701416, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2445, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.03923570894985368, |
|
"grad_norm": 1.098476529121399, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2392, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.03926997594456971, |
|
"grad_norm": 1.1904308795928955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.039304242939285736, |
|
"grad_norm": 1.1967114210128784, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2489, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.039338509934001765, |
|
"grad_norm": 1.2335082292556763, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2428, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.039372776928717795, |
|
"grad_norm": 1.0949335098266602, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 0.039407043923433825, |
|
"grad_norm": 0.9655303955078125, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2298, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.039407043923433825, |
|
"eval_cer": 13.238666431469396, |
|
"eval_loss": 0.24733339250087738, |
|
"eval_normalized_cer": 9.532374100719425, |
|
"eval_runtime": 228.8524, |
|
"eval_samples_per_second": 2.237, |
|
"eval_steps_per_second": 0.035, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.039441310918149855, |
|
"grad_norm": 1.303244709968567, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2312, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 0.039475577912865885, |
|
"grad_norm": 1.2272698879241943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2371, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.039509844907581915, |
|
"grad_norm": 1.2555683851242065, |
|
"learning_rate": 1e-05, |
|
"loss": 0.248, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 0.039544111902297945, |
|
"grad_norm": 1.2972025871276855, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2637, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.039578378897013974, |
|
"grad_norm": 0.9733885526657104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.039612645891730004, |
|
"grad_norm": 1.1637110710144043, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2468, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.039646912886446034, |
|
"grad_norm": 1.2355756759643555, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2407, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 0.039681179881162064, |
|
"grad_norm": 1.1079312562942505, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2511, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.039715446875878094, |
|
"grad_norm": 1.13614821434021, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2442, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 0.039749713870594124, |
|
"grad_norm": 1.2050237655639648, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2505, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.03978398086531015, |
|
"grad_norm": 1.020393967628479, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 0.03981824786002618, |
|
"grad_norm": 1.088463544845581, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.039852514854742206, |
|
"grad_norm": 1.121472716331482, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2489, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 0.039886781849458236, |
|
"grad_norm": 1.109485149383545, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2488, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.039921048844174266, |
|
"grad_norm": 1.0824321508407593, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2395, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.039955315838890296, |
|
"grad_norm": 1.1438390016555786, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2318, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.039989582833606326, |
|
"grad_norm": 1.005821943283081, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2482, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 0.040023849828322355, |
|
"grad_norm": 1.187921166419983, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.040058116823038385, |
|
"grad_norm": 1.0112143754959106, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2555, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 0.040092383817754415, |
|
"grad_norm": 1.1568365097045898, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2483, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.040126650812470445, |
|
"grad_norm": 1.1201119422912598, |
|
"learning_rate": 1e-05, |
|
"loss": 0.256, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 0.040160917807186475, |
|
"grad_norm": 1.1281081438064575, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2462, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.040195184801902505, |
|
"grad_norm": 0.9730721116065979, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2534, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 0.040229451796618534, |
|
"grad_norm": 1.1069108247756958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2497, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.040263718791334564, |
|
"grad_norm": 1.1095935106277466, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2448, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.040297985786050594, |
|
"grad_norm": 0.9690611958503723, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2448, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.040332252780766624, |
|
"grad_norm": 1.0263612270355225, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2555, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 0.040366519775482654, |
|
"grad_norm": 1.0731168985366821, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2321, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.04040078677019868, |
|
"grad_norm": 1.1446433067321777, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 0.04043505376491471, |
|
"grad_norm": 1.0776352882385254, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2255, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.040469320759630736, |
|
"grad_norm": 0.9721156358718872, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2234, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 0.040503587754346766, |
|
"grad_norm": 0.9534703493118286, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2163, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.040537854749062796, |
|
"grad_norm": 1.0248794555664062, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2406, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 0.040572121743778826, |
|
"grad_norm": 1.1740145683288574, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2394, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.040606388738494856, |
|
"grad_norm": 1.1622172594070435, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2387, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.040640655733210886, |
|
"grad_norm": 1.0684759616851807, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2196, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.040674922727926915, |
|
"grad_norm": 1.024851679801941, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2178, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 0.040709189722642945, |
|
"grad_norm": 1.2293421030044556, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2372, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.040743456717358975, |
|
"grad_norm": 1.2226061820983887, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2466, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 0.040777723712075005, |
|
"grad_norm": 1.0775419473648071, |
|
"learning_rate": 1e-05, |
|
"loss": 0.221, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.040811990706791035, |
|
"grad_norm": 1.0354384183883667, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2338, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 0.040846257701507065, |
|
"grad_norm": 0.9725399613380432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2312, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.040880524696223094, |
|
"grad_norm": 0.9638645648956299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2238, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 0.040914791690939124, |
|
"grad_norm": 1.1646082401275635, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2339, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.04094905868565515, |
|
"grad_norm": 1.049614429473877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.04098332568037118, |
|
"grad_norm": 1.1187442541122437, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.04101759267508721, |
|
"grad_norm": 1.1472731828689575, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2416, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 0.04105185966980324, |
|
"grad_norm": 1.0694329738616943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2308, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.04108612666451927, |
|
"grad_norm": 0.9863060116767883, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2258, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 0.041120393659235296, |
|
"grad_norm": 1.1150392293930054, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2342, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.041120393659235296, |
|
"eval_cer": 12.797671547010056, |
|
"eval_loss": 0.2504700720310211, |
|
"eval_normalized_cer": 9.152677857713828, |
|
"eval_runtime": 227.2028, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.035, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.041154660653951326, |
|
"grad_norm": 1.0455725193023682, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2263, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 0.041188927648667356, |
|
"grad_norm": 1.2993946075439453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2273, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.041223194643383386, |
|
"grad_norm": 1.159058690071106, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2259, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 0.041257461638099416, |
|
"grad_norm": 1.1908732652664185, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2489, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.041291728632815446, |
|
"grad_norm": 1.3122719526290894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2544, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.041325995627531475, |
|
"grad_norm": 1.1521992683410645, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.041360262622247505, |
|
"grad_norm": 1.1007260084152222, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2453, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 0.041394529616963535, |
|
"grad_norm": 1.1719632148742676, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2419, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.041428796611679565, |
|
"grad_norm": 1.088536024093628, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2408, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.041463063606395595, |
|
"grad_norm": 1.1596314907073975, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.041497330601111625, |
|
"grad_norm": 1.1144077777862549, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2411, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 0.04153159759582765, |
|
"grad_norm": 1.2945681810379028, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2381, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.04156586459054368, |
|
"grad_norm": 1.0997275114059448, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2704, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 0.04160013158525971, |
|
"grad_norm": 1.0945874452590942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2249, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.04163439857997574, |
|
"grad_norm": 1.2051665782928467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2442, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.04166866557469177, |
|
"grad_norm": 0.9178060293197632, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2519, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.0417029325694078, |
|
"grad_norm": 1.1225532293319702, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2425, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 0.04173719956412383, |
|
"grad_norm": 1.0947092771530151, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.041771466558839856, |
|
"grad_norm": 1.0374338626861572, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2445, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 0.041805733553555886, |
|
"grad_norm": 1.1471805572509766, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2447, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.041840000548271916, |
|
"grad_norm": 1.1241774559020996, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 0.041874267542987946, |
|
"grad_norm": 1.243691086769104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2409, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.041908534537703976, |
|
"grad_norm": 1.1077616214752197, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2533, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 0.041942801532420006, |
|
"grad_norm": 1.0907562971115112, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2443, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.041977068527136036, |
|
"grad_norm": 1.3562718629837036, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.042011335521852065, |
|
"grad_norm": 1.0229142904281616, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.042045602516568095, |
|
"grad_norm": 1.0843278169631958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 0.04207986951128412, |
|
"grad_norm": 1.2029650211334229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.04211413650600015, |
|
"grad_norm": 0.9493764638900757, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2232, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 0.04214840350071618, |
|
"grad_norm": 1.2031728029251099, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2473, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.04218267049543221, |
|
"grad_norm": 1.1091227531433105, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2429, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 0.04221693749014824, |
|
"grad_norm": 1.1729086637496948, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2594, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.04225120448486427, |
|
"grad_norm": 1.0592730045318604, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2506, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 0.0422854714795803, |
|
"grad_norm": 1.2366282939910889, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2457, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.04231973847429633, |
|
"grad_norm": 1.12427818775177, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.04235400546901236, |
|
"grad_norm": 1.1663504838943481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.04238827246372839, |
|
"grad_norm": 1.2383378744125366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2492, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 0.04242253945844442, |
|
"grad_norm": 1.184813380241394, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 0.042456806453160446, |
|
"grad_norm": 1.035650610923767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2317, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 0.042491073447876476, |
|
"grad_norm": 1.0495967864990234, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2467, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.042525340442592506, |
|
"grad_norm": 1.0791754722595215, |
|
"learning_rate": 1e-05, |
|
"loss": 0.245, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 0.042559607437308536, |
|
"grad_norm": 1.1513383388519287, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2599, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 0.042593874432024566, |
|
"grad_norm": 1.1093658208847046, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 0.04262814142674059, |
|
"grad_norm": 1.0504255294799805, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2355, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 0.04266240842145662, |
|
"grad_norm": 1.121837854385376, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2309, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.04269667541617265, |
|
"grad_norm": 1.2266592979431152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2384, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 0.04273094241088868, |
|
"grad_norm": 1.0912328958511353, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2392, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 0.04276520940560471, |
|
"grad_norm": 1.1504424810409546, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2716, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.04279947640032074, |
|
"grad_norm": 1.011088490486145, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2501, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 0.04283374339503677, |
|
"grad_norm": 1.2423217296600342, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2562, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.04283374339503677, |
|
"eval_cer": 12.92115011465867, |
|
"eval_loss": 0.24911069869995117, |
|
"eval_normalized_cer": 9.452438049560353, |
|
"eval_runtime": 228.1651, |
|
"eval_samples_per_second": 2.244, |
|
"eval_steps_per_second": 0.035, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.0428680103897528, |
|
"grad_norm": 1.4834926128387451, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 0.04290227738446883, |
|
"grad_norm": 1.26629638671875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2569, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 0.04293654437918486, |
|
"grad_norm": 1.204516053199768, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2502, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 0.04297081137390089, |
|
"grad_norm": 1.0527433156967163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2503, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 0.04300507836861692, |
|
"grad_norm": 1.0310479402542114, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2289, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.04303934536333295, |
|
"grad_norm": 1.2252111434936523, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 0.04307361235804898, |
|
"grad_norm": 1.0729095935821533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2302, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 0.043107879352765006, |
|
"grad_norm": 1.000106930732727, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2192, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 0.043142146347481036, |
|
"grad_norm": 1.0674782991409302, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2334, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 0.04317641334219706, |
|
"grad_norm": 1.1148403882980347, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2402, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.04321068033691309, |
|
"grad_norm": 1.0144375562667847, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2492, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 0.04324494733162912, |
|
"grad_norm": 1.1123058795928955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2334, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 0.04327921432634515, |
|
"grad_norm": 1.1008777618408203, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2319, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 0.04331348132106118, |
|
"grad_norm": 1.1487098932266235, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2451, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 0.04334774831577721, |
|
"grad_norm": 1.1339664459228516, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2267, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.04338201531049324, |
|
"grad_norm": 1.198195219039917, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2472, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 0.04341628230520927, |
|
"grad_norm": 0.9989431500434875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2381, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 0.0434505492999253, |
|
"grad_norm": 1.4252516031265259, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2513, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 0.04348481629464133, |
|
"grad_norm": 1.1313762664794922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2591, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 0.04351908328935736, |
|
"grad_norm": 1.0512256622314453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.04355335028407339, |
|
"grad_norm": 1.158078670501709, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2446, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.04358761727878942, |
|
"grad_norm": 1.0620396137237549, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2403, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.04362188427350545, |
|
"grad_norm": 1.0640372037887573, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 0.04365615126822148, |
|
"grad_norm": 1.113105058670044, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 0.04369041826293751, |
|
"grad_norm": 1.0416456460952759, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2424, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.04372468525765354, |
|
"grad_norm": 1.2312722206115723, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 0.04375895225236956, |
|
"grad_norm": 1.228950023651123, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2462, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 0.04379321924708559, |
|
"grad_norm": 1.0736054182052612, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2299, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 0.04382748624180162, |
|
"grad_norm": 1.4141355752944946, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 0.04386175323651765, |
|
"grad_norm": 1.2224112749099731, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2334, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.04389602023123368, |
|
"grad_norm": 1.3090282678604126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 0.04393028722594971, |
|
"grad_norm": 1.0864715576171875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2711, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 0.04396455422066574, |
|
"grad_norm": 1.0953795909881592, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2467, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 0.04399882121538177, |
|
"grad_norm": 0.9681864976882935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2217, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.0440330882100978, |
|
"grad_norm": 0.9268914461135864, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2492, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.04406735520481383, |
|
"grad_norm": 1.0900733470916748, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2421, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 0.04410162219952986, |
|
"grad_norm": 1.1551947593688965, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 0.04413588919424589, |
|
"grad_norm": 1.0035364627838135, |
|
"learning_rate": 1e-05, |
|
"loss": 0.248, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 0.04417015618896192, |
|
"grad_norm": 1.2478151321411133, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 0.04420442318367795, |
|
"grad_norm": 1.1512874364852905, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2247, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.04423869017839398, |
|
"grad_norm": 1.2012622356414795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 0.04427295717311001, |
|
"grad_norm": 1.1266357898712158, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2464, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 0.04430722416782603, |
|
"grad_norm": 1.11850905418396, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 0.04434149116254206, |
|
"grad_norm": 1.1375716924667358, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2474, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 0.04437575815725809, |
|
"grad_norm": 1.3423253297805786, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2525, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.04441002515197412, |
|
"grad_norm": 1.0608446598052979, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2445, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 0.04444429214669015, |
|
"grad_norm": 1.059899091720581, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2444, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 0.04447855914140618, |
|
"grad_norm": 1.117346167564392, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2529, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 0.04451282613612221, |
|
"grad_norm": 1.2896045446395874, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2473, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 0.04454709313083824, |
|
"grad_norm": 0.9721153974533081, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.04454709313083824, |
|
"eval_cer": 12.84177103545599, |
|
"eval_loss": 0.2521709203720093, |
|
"eval_normalized_cer": 9.152677857713828, |
|
"eval_runtime": 226.778, |
|
"eval_samples_per_second": 2.258, |
|
"eval_steps_per_second": 0.035, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.04458136012555427, |
|
"grad_norm": 1.232352375984192, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2387, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 0.0446156271202703, |
|
"grad_norm": 1.2386256456375122, |
|
"learning_rate": 1e-05, |
|
"loss": 0.255, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.04464989411498633, |
|
"grad_norm": 1.2183597087860107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 0.04468416110970236, |
|
"grad_norm": 1.166823387145996, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2563, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 0.04471842810441839, |
|
"grad_norm": 1.1574853658676147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2557, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.04475269509913442, |
|
"grad_norm": 1.1207836866378784, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2578, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 0.04478696209385045, |
|
"grad_norm": 1.2590343952178955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2551, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 0.04482122908856648, |
|
"grad_norm": 1.0984435081481934, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2816, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 0.0448554960832825, |
|
"grad_norm": 1.1435647010803223, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2481, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 0.04488976307799853, |
|
"grad_norm": 1.1446672677993774, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2561, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.04492403007271456, |
|
"grad_norm": 1.0957670211791992, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 0.04495829706743059, |
|
"grad_norm": 1.1321167945861816, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2621, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.04499256406214662, |
|
"grad_norm": 1.140914797782898, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2376, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 0.04502683105686265, |
|
"grad_norm": 1.1879481077194214, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2457, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 0.04506109805157868, |
|
"grad_norm": 1.240084171295166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2486, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.04509536504629471, |
|
"grad_norm": 1.1524683237075806, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2533, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 0.04512963204101074, |
|
"grad_norm": 1.1614208221435547, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2495, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 0.04516389903572677, |
|
"grad_norm": 1.1307048797607422, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 0.0451981660304428, |
|
"grad_norm": 1.0327478647232056, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2451, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 0.04523243302515883, |
|
"grad_norm": 1.2401607036590576, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2492, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.04526670001987486, |
|
"grad_norm": 1.095413088798523, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2487, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 0.04530096701459089, |
|
"grad_norm": 1.2537821531295776, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 0.04533523400930692, |
|
"grad_norm": 1.1861079931259155, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2487, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 0.04536950100402295, |
|
"grad_norm": 1.1059224605560303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 0.04540376799873898, |
|
"grad_norm": 1.159122109413147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2472, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.045438034993455, |
|
"grad_norm": 1.0307060480117798, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2583, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 0.04547230198817103, |
|
"grad_norm": 1.0377501249313354, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2547, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 0.04550656898288706, |
|
"grad_norm": 1.124543309211731, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2396, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 0.04554083597760309, |
|
"grad_norm": 1.0829116106033325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2417, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 0.04557510297231912, |
|
"grad_norm": 1.057477355003357, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2527, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.04560936996703515, |
|
"grad_norm": 1.062674641609192, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2423, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 0.04564363696175118, |
|
"grad_norm": 1.131895661354065, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2517, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 0.04567790395646721, |
|
"grad_norm": 1.016940951347351, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2419, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 0.04571217095118324, |
|
"grad_norm": 1.273378849029541, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 0.04574643794589927, |
|
"grad_norm": 1.0757806301116943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2447, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.0457807049406153, |
|
"grad_norm": 1.3264166116714478, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2545, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 0.04581497193533133, |
|
"grad_norm": 1.1011106967926025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.269, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 0.04584923893004736, |
|
"grad_norm": 1.0483593940734863, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2443, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 0.04588350592476339, |
|
"grad_norm": 1.2940049171447754, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 0.04591777291947942, |
|
"grad_norm": 1.1630951166152954, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2702, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.04595203991419545, |
|
"grad_norm": 1.0715082883834839, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2402, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 0.04598630690891147, |
|
"grad_norm": 1.0946441888809204, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 0.0460205739036275, |
|
"grad_norm": 1.0796674489974976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 0.04605484089834353, |
|
"grad_norm": 1.0534013509750366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2385, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.04608910789305956, |
|
"grad_norm": 1.0427377223968506, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2557, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.04612337488777559, |
|
"grad_norm": 1.1708178520202637, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 0.04615764188249162, |
|
"grad_norm": 1.0531684160232544, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2594, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 0.04619190887720765, |
|
"grad_norm": 1.1972299814224243, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2566, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 0.04622617587192368, |
|
"grad_norm": 1.0194915533065796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 0.04626044286663971, |
|
"grad_norm": 1.2437708377838135, |
|
"learning_rate": 1e-05, |
|
"loss": 0.251, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.04626044286663971, |
|
"eval_cer": 12.744752160874933, |
|
"eval_loss": 0.24938170611858368, |
|
"eval_normalized_cer": 8.952837729816148, |
|
"eval_runtime": 229.544, |
|
"eval_samples_per_second": 2.231, |
|
"eval_steps_per_second": 0.035, |
|
"step": 13500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 291826, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.73724691447808e+21, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|