|
{ |
|
"best_metric": 8.912869704236611, |
|
"best_model_checkpoint": "kotoba_v2_enc_logs_epoch2_2/checkpoint-4000", |
|
"epoch": 0.025700246037022062, |
|
"eval_steps": 500, |
|
"global_step": 7500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.4266994716029415e-05, |
|
"grad_norm": 1.0561553239822388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.853398943205883e-05, |
|
"grad_norm": 1.1626238822937012, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2265, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00010280098414808825, |
|
"grad_norm": 0.9845689535140991, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2279, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00013706797886411766, |
|
"grad_norm": 1.142356276512146, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00017133497358014707, |
|
"grad_norm": 1.0053240060806274, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2473, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0002056019682961765, |
|
"grad_norm": 1.1098105907440186, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2438, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0002398689630122059, |
|
"grad_norm": 1.191983699798584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0002741359577282353, |
|
"grad_norm": 1.1295104026794434, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2362, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0003084029524442647, |
|
"grad_norm": 1.037972092628479, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2455, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.00034266994716029413, |
|
"grad_norm": 1.1975648403167725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00037693694187632354, |
|
"grad_norm": 1.0676342248916626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2271, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.000411203936592353, |
|
"grad_norm": 1.0749495029449463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2417, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0004454709313083824, |
|
"grad_norm": 1.094260811805725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0004797379260244118, |
|
"grad_norm": 1.0395853519439697, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2381, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0005140049207404412, |
|
"grad_norm": 1.2008885145187378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0005482719154564706, |
|
"grad_norm": 1.0647832155227661, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2321, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0005825389101725, |
|
"grad_norm": 1.327071189880371, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0006168059048885295, |
|
"grad_norm": 1.1184055805206299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2242, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0006510728996045589, |
|
"grad_norm": 1.2512784004211426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0006853398943205883, |
|
"grad_norm": 1.0614465475082397, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0007196068890366177, |
|
"grad_norm": 1.0607149600982666, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2381, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0007538738837526471, |
|
"grad_norm": 1.0422028303146362, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2294, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0007881408784686765, |
|
"grad_norm": 1.0162984132766724, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.000822407873184706, |
|
"grad_norm": 1.1085543632507324, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2161, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.0008566748679007354, |
|
"grad_norm": 1.1854636669158936, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0008909418626167648, |
|
"grad_norm": 1.40137779712677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2579, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0009252088573327942, |
|
"grad_norm": 1.0814112424850464, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2612, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0009594758520488236, |
|
"grad_norm": 1.083736538887024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2711, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.000993742846764853, |
|
"grad_norm": 1.0861411094665527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2642, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.0010280098414808825, |
|
"grad_norm": 1.1141265630722046, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2585, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0010622768361969119, |
|
"grad_norm": 1.326241374015808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2858, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.0010965438309129413, |
|
"grad_norm": 1.393750786781311, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0011308108256289707, |
|
"grad_norm": 1.0851459503173828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2565, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.001165077820345, |
|
"grad_norm": 1.2323757410049438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.0011993448150610295, |
|
"grad_norm": 1.376953125, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2671, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.001233611809777059, |
|
"grad_norm": 1.084592580795288, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2643, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.0012678788044930883, |
|
"grad_norm": 1.2907005548477173, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.0013021457992091177, |
|
"grad_norm": 1.0698130130767822, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0013364127939251471, |
|
"grad_norm": 1.1399807929992676, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2759, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.0013706797886411765, |
|
"grad_norm": 1.1480791568756104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.001404946783357206, |
|
"grad_norm": 1.3095237016677856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2536, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.0014392137780732353, |
|
"grad_norm": 1.068246841430664, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2604, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.0014734807727892648, |
|
"grad_norm": 1.2310419082641602, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2632, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.0015077477675052942, |
|
"grad_norm": 1.161867380142212, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.0015420147622213236, |
|
"grad_norm": 1.1461217403411865, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2592, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.001576281756937353, |
|
"grad_norm": 1.3006030321121216, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2607, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.0016105487516533824, |
|
"grad_norm": 1.1223125457763672, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.001644815746369412, |
|
"grad_norm": 1.2909380197525024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2693, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.0016790827410854414, |
|
"grad_norm": 1.2270597219467163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2661, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.0017133497358014708, |
|
"grad_norm": 1.1439770460128784, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2517, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0017133497358014708, |
|
"eval_cer": 13.0358087846181, |
|
"eval_loss": 0.25224336981773376, |
|
"eval_normalized_cer": 9.4224620303757, |
|
"eval_runtime": 227.2174, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.035, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0017476167305175002, |
|
"grad_norm": 1.1377454996109009, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2579, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.0017818837252335296, |
|
"grad_norm": 1.2096498012542725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2727, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.001816150719949559, |
|
"grad_norm": 1.187213659286499, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2562, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.0018504177146655885, |
|
"grad_norm": 0.969393253326416, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.0018846847093816179, |
|
"grad_norm": 0.9745528697967529, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2774, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0019189517040976473, |
|
"grad_norm": 1.0725352764129639, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2541, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.0019532186988136767, |
|
"grad_norm": 1.217871904373169, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2395, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.001987485693529706, |
|
"grad_norm": 1.3582627773284912, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2594, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0020217526882457355, |
|
"grad_norm": 1.2415379285812378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.002056019682961765, |
|
"grad_norm": 0.9810131192207336, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0020902866776777943, |
|
"grad_norm": 0.9806564450263977, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2688, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.0021245536723938237, |
|
"grad_norm": 1.2755467891693115, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2591, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.002158820667109853, |
|
"grad_norm": 0.9300326704978943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2444, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.0021930876618258825, |
|
"grad_norm": 1.1276524066925049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.002227354656541912, |
|
"grad_norm": 1.1786876916885376, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2443, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0022616216512579414, |
|
"grad_norm": 1.1702712774276733, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2627, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.0022958886459739708, |
|
"grad_norm": 1.2837899923324585, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.00233015564069, |
|
"grad_norm": 1.0623608827590942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.0023644226354060296, |
|
"grad_norm": 1.1288243532180786, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2773, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.002398689630122059, |
|
"grad_norm": 1.0192692279815674, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2492, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0024329566248380884, |
|
"grad_norm": 1.2274680137634277, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2345, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.002467223619554118, |
|
"grad_norm": 1.240645170211792, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2624, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.002501490614270147, |
|
"grad_norm": 1.0681366920471191, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2553, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.0025357576089861766, |
|
"grad_norm": 1.0161867141723633, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2547, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.002570024603702206, |
|
"grad_norm": 1.2384017705917358, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.0026042915984182354, |
|
"grad_norm": 1.1739261150360107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.002638558593134265, |
|
"grad_norm": 1.0396535396575928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.0026728255878502943, |
|
"grad_norm": 1.14767324924469, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2594, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.0027070925825663237, |
|
"grad_norm": 1.1783303022384644, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2546, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.002741359577282353, |
|
"grad_norm": 1.1065645217895508, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2547, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0027756265719983825, |
|
"grad_norm": 1.256645917892456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.002809893566714412, |
|
"grad_norm": 1.058158278465271, |
|
"learning_rate": 1e-05, |
|
"loss": 0.257, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.0028441605614304413, |
|
"grad_norm": 1.0647656917572021, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.0028784275561464707, |
|
"grad_norm": 1.1984691619873047, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2503, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.0029126945508625, |
|
"grad_norm": 1.1380070447921753, |
|
"learning_rate": 1e-05, |
|
"loss": 0.245, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.0029469615455785295, |
|
"grad_norm": 1.2131065130233765, |
|
"learning_rate": 1e-05, |
|
"loss": 0.242, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.002981228540294559, |
|
"grad_norm": 1.1822234392166138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2613, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.0030154955350105883, |
|
"grad_norm": 1.0591018199920654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2654, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.0030497625297266177, |
|
"grad_norm": 1.2318428754806519, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2525, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.003084029524442647, |
|
"grad_norm": 1.0146839618682861, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2609, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0031182965191586766, |
|
"grad_norm": 1.1508561372756958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2541, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.003152563513874706, |
|
"grad_norm": 1.1494849920272827, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.0031868305085907354, |
|
"grad_norm": 1.2423807382583618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2573, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.0032210975033067648, |
|
"grad_norm": 1.2714438438415527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2545, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.0032553644980227946, |
|
"grad_norm": 1.2088007926940918, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2773, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.003289631492738824, |
|
"grad_norm": 1.0737963914871216, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2495, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.0033238984874548534, |
|
"grad_norm": 1.0942472219467163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.003358165482170883, |
|
"grad_norm": 1.1282986402511597, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2638, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.0033924324768869123, |
|
"grad_norm": 1.0762425661087036, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2619, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.0034266994716029417, |
|
"grad_norm": 1.09200119972229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2464, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0034266994716029417, |
|
"eval_cer": 13.80313988357735, |
|
"eval_loss": 0.25397512316703796, |
|
"eval_normalized_cer": 9.952038369304557, |
|
"eval_runtime": 227.5088, |
|
"eval_samples_per_second": 2.25, |
|
"eval_steps_per_second": 0.035, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.003460966466318971, |
|
"grad_norm": 0.9681844711303711, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2567, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.0034952334610350005, |
|
"grad_norm": 1.0064711570739746, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2514, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.00352950045575103, |
|
"grad_norm": 1.190294623374939, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2654, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.0035637674504670593, |
|
"grad_norm": 1.332492709159851, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2725, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.0035980344451830887, |
|
"grad_norm": 1.1110397577285767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.003632301439899118, |
|
"grad_norm": 1.2327215671539307, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2733, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.0036665684346151475, |
|
"grad_norm": 1.1694815158843994, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2611, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.003700835429331177, |
|
"grad_norm": 1.212570309638977, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2556, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.0037351024240472063, |
|
"grad_norm": 1.1467297077178955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.0037693694187632357, |
|
"grad_norm": 0.9628469347953796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.003803636413479265, |
|
"grad_norm": 1.1593494415283203, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.0038379034081952946, |
|
"grad_norm": 1.1376386880874634, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.003872170402911324, |
|
"grad_norm": 1.129338026046753, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2601, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.003906437397627353, |
|
"grad_norm": 1.0889575481414795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2455, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.003940704392343382, |
|
"grad_norm": 1.1437270641326904, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.003974971387059412, |
|
"grad_norm": 1.0283392667770386, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2507, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.004009238381775441, |
|
"grad_norm": 1.130747675895691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2715, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.004043505376491471, |
|
"grad_norm": 1.3483778238296509, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2742, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.0040777723712075, |
|
"grad_norm": 1.0879924297332764, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2641, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.00411203936592353, |
|
"grad_norm": 1.1242927312850952, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2586, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.004146306360639559, |
|
"grad_norm": 1.0185858011245728, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.004180573355355589, |
|
"grad_norm": 0.9555259943008423, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2528, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.004214840350071618, |
|
"grad_norm": 1.210371971130371, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2613, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.0042491073447876474, |
|
"grad_norm": 1.1261368989944458, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2551, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.004283374339503676, |
|
"grad_norm": 1.2142603397369385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.264, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.004317641334219706, |
|
"grad_norm": 1.057758092880249, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2587, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.004351908328935736, |
|
"grad_norm": 1.0871245861053467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.004386175323651765, |
|
"grad_norm": 1.1214648485183716, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.004420442318367795, |
|
"grad_norm": 1.0265707969665527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2123, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.004454709313083824, |
|
"grad_norm": 1.1180216073989868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2245, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.004488976307799854, |
|
"grad_norm": 1.028238296508789, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2118, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.004523243302515883, |
|
"grad_norm": 1.0321682691574097, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2196, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.0045575102972319126, |
|
"grad_norm": 1.1180269718170166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2403, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.0045917772919479415, |
|
"grad_norm": 1.079560399055481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2309, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.004626044286663971, |
|
"grad_norm": 1.0062284469604492, |
|
"learning_rate": 1e-05, |
|
"loss": 0.228, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.00466031128138, |
|
"grad_norm": 1.1098395586013794, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2435, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.00469457827609603, |
|
"grad_norm": 1.0619688034057617, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2342, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.004728845270812059, |
|
"grad_norm": 1.1943925619125366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2315, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.004763112265528089, |
|
"grad_norm": 1.0958552360534668, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.004797379260244118, |
|
"grad_norm": 1.0984197854995728, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2208, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.004831646254960148, |
|
"grad_norm": 1.0741859674453735, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.004865913249676177, |
|
"grad_norm": 1.1457058191299438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2516, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.004900180244392207, |
|
"grad_norm": 0.9849014282226562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2406, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.004934447239108236, |
|
"grad_norm": 1.1174912452697754, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2122, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.0049687142338242654, |
|
"grad_norm": 1.0292854309082031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2349, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.005002981228540294, |
|
"grad_norm": 1.0343785285949707, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2158, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.005037248223256324, |
|
"grad_norm": 1.1178008317947388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2264, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.005071515217972353, |
|
"grad_norm": 1.0238450765609741, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2287, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.005105782212688383, |
|
"grad_norm": 1.1728886365890503, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2373, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.005140049207404412, |
|
"grad_norm": 1.227034091949463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.005140049207404412, |
|
"eval_cer": 13.150467454577527, |
|
"eval_loss": 0.25801682472229004, |
|
"eval_normalized_cer": 9.452438049560353, |
|
"eval_runtime": 227.9378, |
|
"eval_samples_per_second": 2.246, |
|
"eval_steps_per_second": 0.035, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.005174316202120442, |
|
"grad_norm": 1.0703920125961304, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2156, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.005208583196836471, |
|
"grad_norm": 1.1343841552734375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2126, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.005242850191552501, |
|
"grad_norm": 1.1743741035461426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.00527711718626853, |
|
"grad_norm": 1.1476744413375854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.0053113841809845595, |
|
"grad_norm": 1.0899590253829956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.0053456511757005885, |
|
"grad_norm": 1.0281250476837158, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2226, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.005379918170416618, |
|
"grad_norm": 0.9932867884635925, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.005414185165132647, |
|
"grad_norm": 1.1992309093475342, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2179, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.005448452159848677, |
|
"grad_norm": 1.0017774105072021, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2244, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.005482719154564706, |
|
"grad_norm": 1.0827686786651611, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2313, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.005516986149280736, |
|
"grad_norm": 1.2260409593582153, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.005551253143996765, |
|
"grad_norm": 1.2530804872512817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.005585520138712795, |
|
"grad_norm": 1.068452000617981, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2138, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.005619787133428824, |
|
"grad_norm": 1.3108712434768677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.005654054128144854, |
|
"grad_norm": 1.0919209718704224, |
|
"learning_rate": 1e-05, |
|
"loss": 0.213, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.005688321122860883, |
|
"grad_norm": 1.1530914306640625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2292, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.005722588117576912, |
|
"grad_norm": 1.084028959274292, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2393, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.005756855112292941, |
|
"grad_norm": 1.247847557067871, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.005791122107008971, |
|
"grad_norm": 1.03806734085083, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2317, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.005825389101725, |
|
"grad_norm": 1.1643092632293701, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2348, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.00585965609644103, |
|
"grad_norm": 1.1066207885742188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2348, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.005893923091157059, |
|
"grad_norm": 1.1813760995864868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2295, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.005928190085873089, |
|
"grad_norm": 1.1444518566131592, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2101, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.005962457080589118, |
|
"grad_norm": 1.1485129594802856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.005996724075305148, |
|
"grad_norm": 1.1813607215881348, |
|
"learning_rate": 1e-05, |
|
"loss": 0.231, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.006030991070021177, |
|
"grad_norm": 1.4075005054473877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2306, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.0060652580647372065, |
|
"grad_norm": 1.2183804512023926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2227, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.0060995250594532355, |
|
"grad_norm": 1.3654927015304565, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2341, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.006133792054169265, |
|
"grad_norm": 1.2806668281555176, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2226, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.006168059048885294, |
|
"grad_norm": 1.2949618101119995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2698, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.006202326043601324, |
|
"grad_norm": 1.3080159425735474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2691, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.006236593038317353, |
|
"grad_norm": 1.1831908226013184, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2644, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.006270860033033383, |
|
"grad_norm": 1.1216965913772583, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.006305127027749412, |
|
"grad_norm": 1.1943161487579346, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2769, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.006339394022465442, |
|
"grad_norm": 1.0856040716171265, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.006373661017181471, |
|
"grad_norm": 1.1100040674209595, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2576, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.006407928011897501, |
|
"grad_norm": 1.3369051218032837, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2684, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.0064421950066135296, |
|
"grad_norm": 1.158797264099121, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2474, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.006476462001329559, |
|
"grad_norm": 1.1821873188018799, |
|
"learning_rate": 1e-05, |
|
"loss": 0.272, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.006510728996045589, |
|
"grad_norm": 1.0739686489105225, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2798, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.006544995990761618, |
|
"grad_norm": 1.0639653205871582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2682, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.006579262985477648, |
|
"grad_norm": 1.2149512767791748, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2586, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.006613529980193677, |
|
"grad_norm": 1.1057014465332031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2719, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.006647796974909707, |
|
"grad_norm": 1.0929185152053833, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2703, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.006682063969625736, |
|
"grad_norm": 1.0322917699813843, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2477, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.006716330964341766, |
|
"grad_norm": 1.2460272312164307, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2816, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.006750597959057795, |
|
"grad_norm": 1.2049859762191772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2648, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.0067848649537738245, |
|
"grad_norm": 1.1182633638381958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.0068191319484898535, |
|
"grad_norm": 1.1514990329742432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2695, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.006853398943205883, |
|
"grad_norm": 1.0150858163833618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2532, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.006853398943205883, |
|
"eval_cer": 13.565002645969306, |
|
"eval_loss": 0.2523655593395233, |
|
"eval_normalized_cer": 9.942046362909672, |
|
"eval_runtime": 226.5571, |
|
"eval_samples_per_second": 2.26, |
|
"eval_steps_per_second": 0.035, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.006887665937921912, |
|
"grad_norm": 1.0476700067520142, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2555, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.006921932932637942, |
|
"grad_norm": 1.1178691387176514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2489, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.006956199927353971, |
|
"grad_norm": 1.2596313953399658, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2884, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.006990466922070001, |
|
"grad_norm": 1.1929702758789062, |
|
"learning_rate": 1e-05, |
|
"loss": 0.262, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.00702473391678603, |
|
"grad_norm": 1.1269497871398926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2758, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.00705900091150206, |
|
"grad_norm": 1.1495511531829834, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2668, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.007093267906218089, |
|
"grad_norm": 1.0648061037063599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.007127534900934119, |
|
"grad_norm": 1.3193435668945312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2743, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.0071618018956501476, |
|
"grad_norm": 1.2877907752990723, |
|
"learning_rate": 1e-05, |
|
"loss": 0.248, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.007196068890366177, |
|
"grad_norm": 1.2012474536895752, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2662, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.007230335885082206, |
|
"grad_norm": 1.1491566896438599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2666, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.007264602879798236, |
|
"grad_norm": 1.1861019134521484, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2618, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.007298869874514265, |
|
"grad_norm": 1.123963713645935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2646, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.007333136869230295, |
|
"grad_norm": 1.2697441577911377, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2713, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.007367403863946324, |
|
"grad_norm": 0.9741083383560181, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.007401670858662354, |
|
"grad_norm": 1.0292670726776123, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.007435937853378383, |
|
"grad_norm": 1.0958001613616943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.007470204848094413, |
|
"grad_norm": 1.166869044303894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2454, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.007504471842810442, |
|
"grad_norm": 1.2552424669265747, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.0075387388375264715, |
|
"grad_norm": 1.1589868068695068, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2659, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.0075730058322425004, |
|
"grad_norm": 1.1640287637710571, |
|
"learning_rate": 1e-05, |
|
"loss": 0.257, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.00760727282695853, |
|
"grad_norm": 1.0953587293624878, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2444, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.007641539821674559, |
|
"grad_norm": 1.2174441814422607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2626, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.007675806816390589, |
|
"grad_norm": 1.1194220781326294, |
|
"learning_rate": 1e-05, |
|
"loss": 0.241, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.007710073811106618, |
|
"grad_norm": 1.0677419900894165, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2718, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.007744340805822648, |
|
"grad_norm": 1.0956069231033325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2493, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.007778607800538677, |
|
"grad_norm": 1.1772819757461548, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2614, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.007812874795254707, |
|
"grad_norm": 1.0341110229492188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2488, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.007847141789970737, |
|
"grad_norm": 1.174186110496521, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.007881408784686765, |
|
"grad_norm": 0.9867792725563049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.007915675779402795, |
|
"grad_norm": 1.1443661451339722, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2331, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.007949942774118824, |
|
"grad_norm": 1.117896318435669, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2277, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.007984209768834854, |
|
"grad_norm": 1.13510000705719, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2137, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.008018476763550882, |
|
"grad_norm": 0.9749162793159485, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2161, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.008052743758266912, |
|
"grad_norm": 1.1519534587860107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2254, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.008087010752982942, |
|
"grad_norm": 1.0861778259277344, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2153, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.008121277747698972, |
|
"grad_norm": 1.0184444189071655, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2066, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.008155544742415, |
|
"grad_norm": 1.0581239461898804, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2243, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.00818981173713103, |
|
"grad_norm": 0.9954540729522705, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2171, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.00822407873184706, |
|
"grad_norm": 1.121960163116455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2216, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.00825834572656309, |
|
"grad_norm": 1.097725510597229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2142, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.008292612721279118, |
|
"grad_norm": 1.0566459894180298, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2272, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.008326879715995147, |
|
"grad_norm": 1.0077927112579346, |
|
"learning_rate": 1e-05, |
|
"loss": 0.211, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.008361146710711177, |
|
"grad_norm": 1.176035761833191, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2125, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.008395413705427207, |
|
"grad_norm": 1.0064568519592285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2066, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.008429680700143235, |
|
"grad_norm": 1.1852171421051025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2087, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.008463947694859265, |
|
"grad_norm": 0.9580971002578735, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2172, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.008498214689575295, |
|
"grad_norm": 1.1230813264846802, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2104, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.008532481684291325, |
|
"grad_norm": 1.1891340017318726, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.008566748679007353, |
|
"grad_norm": 1.2579045295715332, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2109, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.008566748679007353, |
|
"eval_cer": 13.300405715293703, |
|
"eval_loss": 0.26059621572494507, |
|
"eval_normalized_cer": 9.502398081534773, |
|
"eval_runtime": 226.5522, |
|
"eval_samples_per_second": 2.26, |
|
"eval_steps_per_second": 0.035, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.008601015673723383, |
|
"grad_norm": 1.0522507429122925, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2154, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.008635282668439413, |
|
"grad_norm": 1.0875492095947266, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2251, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.008669549663155442, |
|
"grad_norm": 1.0868346691131592, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2086, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.008703816657871472, |
|
"grad_norm": 1.0993175506591797, |
|
"learning_rate": 1e-05, |
|
"loss": 0.205, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.0087380836525875, |
|
"grad_norm": 1.0495941638946533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2135, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.00877235064730353, |
|
"grad_norm": 1.0326807498931885, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2105, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.00880661764201956, |
|
"grad_norm": 1.0804367065429688, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2438, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.00884088463673559, |
|
"grad_norm": 1.0738023519515991, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2537, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.008875151631451618, |
|
"grad_norm": 1.1695871353149414, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2518, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.008909418626167648, |
|
"grad_norm": 1.155653476715088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2592, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.008943685620883678, |
|
"grad_norm": 1.1516027450561523, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2387, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.008977952615599707, |
|
"grad_norm": 1.2618260383605957, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2638, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.009012219610315736, |
|
"grad_norm": 1.2422987222671509, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.009046486605031765, |
|
"grad_norm": 1.1460082530975342, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2509, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.009080753599747795, |
|
"grad_norm": 1.2502261400222778, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2595, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.009115020594463825, |
|
"grad_norm": 1.139840006828308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.255, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.009149287589179853, |
|
"grad_norm": 1.3247896432876587, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2721, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.009183554583895883, |
|
"grad_norm": 1.1355103254318237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2604, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.009217821578611913, |
|
"grad_norm": 1.106541633605957, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2374, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.009252088573327943, |
|
"grad_norm": 1.2375975847244263, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2719, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.00928635556804397, |
|
"grad_norm": 1.1048275232315063, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2791, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.00932062256276, |
|
"grad_norm": 0.9889766573905945, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2457, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.00935488955747603, |
|
"grad_norm": 1.1566202640533447, |
|
"learning_rate": 1e-05, |
|
"loss": 0.252, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.00938915655219206, |
|
"grad_norm": 1.1586074829101562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2517, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.009423423546908088, |
|
"grad_norm": 0.990419328212738, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2572, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.009457690541624118, |
|
"grad_norm": 1.1101089715957642, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2525, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.009491957536340148, |
|
"grad_norm": 1.0488269329071045, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.009526224531056178, |
|
"grad_norm": 1.1127737760543823, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2578, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.009560491525772206, |
|
"grad_norm": 1.2353262901306152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2412, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.009594758520488236, |
|
"grad_norm": 1.1262571811676025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2438, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.009629025515204266, |
|
"grad_norm": 1.294323205947876, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2512, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.009663292509920296, |
|
"grad_norm": 1.0706703662872314, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2595, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.009697559504636324, |
|
"grad_norm": 1.0089077949523926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2522, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.009731826499352354, |
|
"grad_norm": 0.9697763323783875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2684, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.009766093494068383, |
|
"grad_norm": 1.1122509241104126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2629, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.009800360488784413, |
|
"grad_norm": 1.0381057262420654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2482, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.009834627483500441, |
|
"grad_norm": 1.126947045326233, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2674, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.009868894478216471, |
|
"grad_norm": 1.0714973211288452, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2634, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.009903161472932501, |
|
"grad_norm": 1.0942039489746094, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2751, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.009937428467648531, |
|
"grad_norm": 1.1503955125808716, |
|
"learning_rate": 1e-05, |
|
"loss": 0.272, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.009971695462364559, |
|
"grad_norm": 1.1912988424301147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2645, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.010005962457080589, |
|
"grad_norm": 1.0941249132156372, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2531, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.010040229451796619, |
|
"grad_norm": 1.2545968294143677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2562, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.010074496446512649, |
|
"grad_norm": 1.3605022430419922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2601, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.010108763441228677, |
|
"grad_norm": 1.0911775827407837, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2605, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.010143030435944706, |
|
"grad_norm": 1.133867859840393, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2554, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.010177297430660736, |
|
"grad_norm": 1.2511764764785767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2658, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.010211564425376766, |
|
"grad_norm": 1.1705303192138672, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2737, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.010245831420092794, |
|
"grad_norm": 1.132071614265442, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2665, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.010280098414808824, |
|
"grad_norm": 1.2301791906356812, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2645, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.010280098414808824, |
|
"eval_cer": 12.938789910037043, |
|
"eval_loss": 0.2511608302593231, |
|
"eval_normalized_cer": 9.152677857713828, |
|
"eval_runtime": 227.4553, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.035, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.010314365409524854, |
|
"grad_norm": 1.1527032852172852, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.010348632404240884, |
|
"grad_norm": 1.1162952184677124, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2728, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.010382899398956912, |
|
"grad_norm": 1.062084436416626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.010417166393672942, |
|
"grad_norm": 1.1536457538604736, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2633, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.010451433388388972, |
|
"grad_norm": 1.2096189260482788, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.010485700383105001, |
|
"grad_norm": 0.9950299263000488, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.01051996737782103, |
|
"grad_norm": 1.0628243684768677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2544, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.01055423437253706, |
|
"grad_norm": 1.042555570602417, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.01058850136725309, |
|
"grad_norm": 1.22646164894104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2503, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.010622768361969119, |
|
"grad_norm": 1.0862691402435303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.010657035356685147, |
|
"grad_norm": 1.148868203163147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.010691302351401177, |
|
"grad_norm": 1.1677169799804688, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2481, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.010725569346117207, |
|
"grad_norm": 0.990696132183075, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2421, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.010759836340833237, |
|
"grad_norm": 1.2869263887405396, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.010794103335549265, |
|
"grad_norm": 1.0741721391677856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2617, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.010828370330265295, |
|
"grad_norm": 1.103102445602417, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2442, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.010862637324981324, |
|
"grad_norm": 1.2562378644943237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2589, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.010896904319697354, |
|
"grad_norm": 1.2153191566467285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2417, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.010931171314413384, |
|
"grad_norm": 1.0507330894470215, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2607, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.010965438309129412, |
|
"grad_norm": 1.1882787942886353, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2469, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.010999705303845442, |
|
"grad_norm": 1.1394702196121216, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2574, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.011033972298561472, |
|
"grad_norm": 1.2482614517211914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2456, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.011068239293277502, |
|
"grad_norm": 1.0362995862960815, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2589, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.01110250628799353, |
|
"grad_norm": 1.1730456352233887, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2497, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.01113677328270956, |
|
"grad_norm": 1.1563142538070679, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2439, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.01117104027742559, |
|
"grad_norm": 1.1030769348144531, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2671, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.01120530727214162, |
|
"grad_norm": 1.1719223260879517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2501, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.011239574266857648, |
|
"grad_norm": 1.1840440034866333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2643, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.011273841261573677, |
|
"grad_norm": 1.1928170919418335, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2629, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.011308108256289707, |
|
"grad_norm": 1.0311812162399292, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2552, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.011342375251005737, |
|
"grad_norm": 1.1625889539718628, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2561, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.011376642245721765, |
|
"grad_norm": 1.0287625789642334, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2341, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.011410909240437795, |
|
"grad_norm": 1.1310815811157227, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2554, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.011445176235153825, |
|
"grad_norm": 1.1266168355941772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.234, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.011479443229869855, |
|
"grad_norm": 1.1979014873504639, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2559, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.011513710224585883, |
|
"grad_norm": 1.0378515720367432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2502, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.011547977219301913, |
|
"grad_norm": 1.1832512617111206, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.011582244214017942, |
|
"grad_norm": 0.9605569839477539, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2349, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.011616511208733972, |
|
"grad_norm": 1.0463056564331055, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.01165077820345, |
|
"grad_norm": 1.1021932363510132, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2383, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.01168504519816603, |
|
"grad_norm": 1.040493130683899, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2374, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.01171931219288206, |
|
"grad_norm": 1.1483063697814941, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2398, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.01175357918759809, |
|
"grad_norm": 1.0316531658172607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2329, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.011787846182314118, |
|
"grad_norm": 1.1677886247634888, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2493, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.011822113177030148, |
|
"grad_norm": 1.2078930139541626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2337, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.011856380171746178, |
|
"grad_norm": 1.178202509880066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.011890647166462208, |
|
"grad_norm": 1.0453248023986816, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2233, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.011924914161178236, |
|
"grad_norm": 1.0171067714691162, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2338, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.011959181155894266, |
|
"grad_norm": 1.051792860031128, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2394, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.011993448150610295, |
|
"grad_norm": 1.1237847805023193, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2428, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.011993448150610295, |
|
"eval_cer": 13.071088375374845, |
|
"eval_loss": 0.25454944372177124, |
|
"eval_normalized_cer": 9.542366107114308, |
|
"eval_runtime": 228.9468, |
|
"eval_samples_per_second": 2.236, |
|
"eval_steps_per_second": 0.035, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.012027715145326325, |
|
"grad_norm": 1.1366350650787354, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2353, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.012061982140042353, |
|
"grad_norm": 1.136927604675293, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2358, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.012096249134758383, |
|
"grad_norm": 1.1875656843185425, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.012130516129474413, |
|
"grad_norm": 1.2016057968139648, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2435, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.012164783124190443, |
|
"grad_norm": 1.209622859954834, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.012199050118906471, |
|
"grad_norm": 1.0696970224380493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2385, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.0122333171136225, |
|
"grad_norm": 1.2674167156219482, |
|
"learning_rate": 1e-05, |
|
"loss": 0.243, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.01226758410833853, |
|
"grad_norm": 1.2928141355514526, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.01230185110305456, |
|
"grad_norm": 1.0642272233963013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2356, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.012336118097770589, |
|
"grad_norm": 1.0935972929000854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2389, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.012370385092486618, |
|
"grad_norm": 1.180668830871582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2409, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.012404652087202648, |
|
"grad_norm": 1.2312487363815308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2478, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.012438919081918678, |
|
"grad_norm": 0.947522759437561, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2281, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.012473186076634706, |
|
"grad_norm": 1.0618727207183838, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2423, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.012507453071350736, |
|
"grad_norm": 1.0766098499298096, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2364, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.012541720066066766, |
|
"grad_norm": 1.1174747943878174, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.012575987060782796, |
|
"grad_norm": 1.1940118074417114, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2212, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.012610254055498824, |
|
"grad_norm": 1.1407246589660645, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2423, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.012644521050214854, |
|
"grad_norm": 1.2646050453186035, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2252, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.012678788044930884, |
|
"grad_norm": 1.130337119102478, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2131, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.012713055039646913, |
|
"grad_norm": 1.1432557106018066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2386, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.012747322034362941, |
|
"grad_norm": 1.1370545625686646, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2347, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.012781589029078971, |
|
"grad_norm": 1.3126403093338013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2159, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.012815856023795001, |
|
"grad_norm": 1.2375295162200928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.012850123018511031, |
|
"grad_norm": 1.0877372026443481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2201, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.012884390013227059, |
|
"grad_norm": 1.1122978925704956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.012918657007943089, |
|
"grad_norm": 1.0270159244537354, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2313, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.012952924002659119, |
|
"grad_norm": 1.1370947360992432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.012987190997375149, |
|
"grad_norm": 1.2888813018798828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2384, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.013021457992091178, |
|
"grad_norm": 1.2443634271621704, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2218, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.013055724986807207, |
|
"grad_norm": 1.1919447183609009, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2277, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.013089991981523236, |
|
"grad_norm": 1.140600562095642, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2317, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.013124258976239266, |
|
"grad_norm": 1.074697494506836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2273, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.013158525970955296, |
|
"grad_norm": 1.1003391742706299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2217, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.013192792965671324, |
|
"grad_norm": 1.1427338123321533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.013227059960387354, |
|
"grad_norm": 1.0806514024734497, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2332, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.013261326955103384, |
|
"grad_norm": 1.1547067165374756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2306, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.013295593949819414, |
|
"grad_norm": 1.2483099699020386, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2166, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.013329860944535442, |
|
"grad_norm": 1.096939206123352, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2253, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.013364127939251472, |
|
"grad_norm": 1.1876115798950195, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.013398394933967502, |
|
"grad_norm": 1.1380902528762817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2256, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.013432661928683531, |
|
"grad_norm": 1.0738089084625244, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2307, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.01346692892339956, |
|
"grad_norm": 1.0351170301437378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2296, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.01350119591811559, |
|
"grad_norm": 1.2752678394317627, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2462, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.01353546291283162, |
|
"grad_norm": 1.2618532180786133, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2364, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.013569729907547649, |
|
"grad_norm": 1.1907076835632324, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.013603996902263677, |
|
"grad_norm": 0.9435076117515564, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2391, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.013638263896979707, |
|
"grad_norm": 1.0608407258987427, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2241, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.013672530891695737, |
|
"grad_norm": 1.0729584693908691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2237, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.013706797886411767, |
|
"grad_norm": 1.2006182670593262, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2386, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.013706797886411767, |
|
"eval_cer": 12.594813900158758, |
|
"eval_loss": 0.25156331062316895, |
|
"eval_normalized_cer": 8.912869704236611, |
|
"eval_runtime": 228.7977, |
|
"eval_samples_per_second": 2.238, |
|
"eval_steps_per_second": 0.035, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.013741064881127795, |
|
"grad_norm": 1.2020457983016968, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2318, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.013775331875843825, |
|
"grad_norm": 1.0251790285110474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.248, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.013809598870559854, |
|
"grad_norm": 1.160437822341919, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2385, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.013843865865275884, |
|
"grad_norm": 1.025770664215088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.013878132859991912, |
|
"grad_norm": 1.111954689025879, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.013912399854707942, |
|
"grad_norm": 1.0644809007644653, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2195, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.013946666849423972, |
|
"grad_norm": 1.2926712036132812, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.013980933844140002, |
|
"grad_norm": 1.2169601917266846, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.01401520083885603, |
|
"grad_norm": 1.1396681070327759, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.01404946783357206, |
|
"grad_norm": 1.2242721319198608, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.01408373482828809, |
|
"grad_norm": 1.195324420928955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2368, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.01411800182300412, |
|
"grad_norm": 1.2345412969589233, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.014152268817720148, |
|
"grad_norm": 1.1502156257629395, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2327, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.014186535812436177, |
|
"grad_norm": 1.2128121852874756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.014220802807152207, |
|
"grad_norm": 1.2618858814239502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.231, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.014255069801868237, |
|
"grad_norm": 1.0879299640655518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2302, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.014289336796584265, |
|
"grad_norm": 0.9794358015060425, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.014323603791300295, |
|
"grad_norm": 1.1454006433486938, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.014357870786016325, |
|
"grad_norm": 1.223686933517456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2211, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.014392137780732355, |
|
"grad_norm": 1.1423155069351196, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2391, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.014426404775448383, |
|
"grad_norm": 1.1027394533157349, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2279, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.014460671770164413, |
|
"grad_norm": 1.1777397394180298, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.014494938764880443, |
|
"grad_norm": 1.01688551902771, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.014529205759596472, |
|
"grad_norm": 1.1520488262176514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.0145634727543125, |
|
"grad_norm": 1.2820484638214111, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2205, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.01459773974902853, |
|
"grad_norm": 1.169291377067566, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2389, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.01463200674374456, |
|
"grad_norm": 1.1135886907577515, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2384, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.01466627373846059, |
|
"grad_norm": 1.0846205949783325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.223, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.014700540733176618, |
|
"grad_norm": 0.981488049030304, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2092, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.014734807727892648, |
|
"grad_norm": 1.0437407493591309, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.014769074722608678, |
|
"grad_norm": 1.005792260169983, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2286, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.014803341717324708, |
|
"grad_norm": 1.1903142929077148, |
|
"learning_rate": 1e-05, |
|
"loss": 0.231, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.014837608712040736, |
|
"grad_norm": 1.1308993101119995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.014871875706756766, |
|
"grad_norm": 1.0948210954666138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.213, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.014906142701472795, |
|
"grad_norm": 1.2674663066864014, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2432, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.014940409696188825, |
|
"grad_norm": 1.4228485822677612, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.014974676690904853, |
|
"grad_norm": 1.1533160209655762, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.015008943685620883, |
|
"grad_norm": 1.1454424858093262, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.015043210680336913, |
|
"grad_norm": 1.2944281101226807, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2651, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.015077477675052943, |
|
"grad_norm": 1.2148584127426147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2694, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.015111744669768971, |
|
"grad_norm": 1.091282844543457, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2672, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.015146011664485001, |
|
"grad_norm": 1.2254445552825928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2583, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.01518027865920103, |
|
"grad_norm": 1.367516279220581, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2586, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.01521454565391706, |
|
"grad_norm": 1.1858383417129517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2764, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.01524881264863309, |
|
"grad_norm": 1.1331857442855835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2577, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.015283079643349119, |
|
"grad_norm": 1.2343239784240723, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2661, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.015317346638065148, |
|
"grad_norm": 1.0893656015396118, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2538, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.015351613632781178, |
|
"grad_norm": 1.1467857360839844, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.015385880627497208, |
|
"grad_norm": 1.2753335237503052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2797, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.015420147622213236, |
|
"grad_norm": 1.1355762481689453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2672, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.015420147622213236, |
|
"eval_cer": 13.159287352266713, |
|
"eval_loss": 0.24996142089366913, |
|
"eval_normalized_cer": 9.59232613908873, |
|
"eval_runtime": 228.0477, |
|
"eval_samples_per_second": 2.245, |
|
"eval_steps_per_second": 0.035, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.015454414616929266, |
|
"grad_norm": 1.2256762981414795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2662, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.015488681611645296, |
|
"grad_norm": 1.0631389617919922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2596, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.015522948606361326, |
|
"grad_norm": 1.0759390592575073, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2553, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.015557215601077354, |
|
"grad_norm": 1.1867231130599976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.015591482595793384, |
|
"grad_norm": 1.1203633546829224, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2732, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.015625749590509413, |
|
"grad_norm": 1.1223920583724976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.015660016585225443, |
|
"grad_norm": 1.066497564315796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2456, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.015694283579941473, |
|
"grad_norm": 1.2520133256912231, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2558, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.015728550574657503, |
|
"grad_norm": 1.3602423667907715, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2698, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.01576281756937353, |
|
"grad_norm": 1.1748729944229126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2621, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.01579708456408956, |
|
"grad_norm": 0.9431802034378052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.01583135155880559, |
|
"grad_norm": 1.0146753787994385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.01586561855352162, |
|
"grad_norm": 1.1340891122817993, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.01589988554823765, |
|
"grad_norm": 1.1456454992294312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2307, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.01593415254295368, |
|
"grad_norm": 1.1026827096939087, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2295, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.01596841953766971, |
|
"grad_norm": 1.2215088605880737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.245, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.01600268653238574, |
|
"grad_norm": 1.1760615110397339, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.016036953527101765, |
|
"grad_norm": 1.1690876483917236, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2282, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.016071220521817794, |
|
"grad_norm": 1.182026743888855, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2351, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.016105487516533824, |
|
"grad_norm": 1.0182474851608276, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.016139754511249854, |
|
"grad_norm": 1.2531431913375854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.244, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.016174021505965884, |
|
"grad_norm": 0.9633692502975464, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2297, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.016208288500681914, |
|
"grad_norm": 1.1144667863845825, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.016242555495397944, |
|
"grad_norm": 1.0768555402755737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2216, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.016276822490113974, |
|
"grad_norm": 1.2052035331726074, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2278, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.01631108948483, |
|
"grad_norm": 1.0291496515274048, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2226, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.01634535647954603, |
|
"grad_norm": 1.2100346088409424, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2278, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.01637962347426206, |
|
"grad_norm": 1.214861273765564, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2313, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.01641389046897809, |
|
"grad_norm": 1.137210726737976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.01644815746369412, |
|
"grad_norm": 1.046673059463501, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2231, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.01648242445841015, |
|
"grad_norm": 1.08164644241333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.01651669145312618, |
|
"grad_norm": 1.1432491540908813, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.01655095844784221, |
|
"grad_norm": 1.1684173345565796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.218, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.016585225442558235, |
|
"grad_norm": 1.0895615816116333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2109, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.016619492437274265, |
|
"grad_norm": 1.1505770683288574, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2283, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.016653759431990295, |
|
"grad_norm": 1.3385730981826782, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2344, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.016688026426706325, |
|
"grad_norm": 1.109035611152649, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2558, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.016722293421422355, |
|
"grad_norm": 1.1834880113601685, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2247, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.016756560416138384, |
|
"grad_norm": 1.2369152307510376, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.016790827410854414, |
|
"grad_norm": 1.131173014640808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.016825094405570444, |
|
"grad_norm": 1.1100351810455322, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.01685936140028647, |
|
"grad_norm": 1.1857340335845947, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.0168936283950025, |
|
"grad_norm": 1.1568819284439087, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.01692789538971853, |
|
"grad_norm": 1.104872465133667, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.01696216238443456, |
|
"grad_norm": 1.0907660722732544, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.01699642937915059, |
|
"grad_norm": 1.1100903749465942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.01703069637386662, |
|
"grad_norm": 1.141200065612793, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.01706496336858265, |
|
"grad_norm": 1.2853361368179321, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.01709923036329868, |
|
"grad_norm": 1.1542645692825317, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.017133497358014706, |
|
"grad_norm": 1.2022640705108643, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2371, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.017133497358014706, |
|
"eval_cer": 12.92115011465867, |
|
"eval_loss": 0.2521001100540161, |
|
"eval_normalized_cer": 9.30255795363709, |
|
"eval_runtime": 227.4868, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.035, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.017167764352730736, |
|
"grad_norm": 1.0765001773834229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2455, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.017202031347446765, |
|
"grad_norm": 1.0711493492126465, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2422, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.017236298342162795, |
|
"grad_norm": 1.0719484090805054, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2531, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.017270565336878825, |
|
"grad_norm": 1.1884721517562866, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.017304832331594855, |
|
"grad_norm": 1.068827509880066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2474, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.017339099326310885, |
|
"grad_norm": 1.1308655738830566, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2627, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.017373366321026915, |
|
"grad_norm": 1.1527314186096191, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.017407633315742944, |
|
"grad_norm": 1.1800657510757446, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2587, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.01744190031045897, |
|
"grad_norm": 1.095189094543457, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2424, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.017476167305175, |
|
"grad_norm": 1.109617829322815, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2543, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.01751043429989103, |
|
"grad_norm": 1.2110544443130493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2687, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.01754470129460706, |
|
"grad_norm": 1.0466723442077637, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2424, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.01757896828932309, |
|
"grad_norm": 1.2060648202896118, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2337, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.01761323528403912, |
|
"grad_norm": 1.203142762184143, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2556, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.01764750227875515, |
|
"grad_norm": 1.0751283168792725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.01768176927347118, |
|
"grad_norm": 1.1377781629562378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2448, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.017716036268187206, |
|
"grad_norm": 1.147454023361206, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2172, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.017750303262903236, |
|
"grad_norm": 1.129897952079773, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2418, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.017784570257619266, |
|
"grad_norm": 1.1261131763458252, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.017818837252335296, |
|
"grad_norm": 1.0794824361801147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2546, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.017853104247051325, |
|
"grad_norm": 1.1870142221450806, |
|
"learning_rate": 1e-05, |
|
"loss": 0.249, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.017887371241767355, |
|
"grad_norm": 1.0414400100708008, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2285, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.017921638236483385, |
|
"grad_norm": 1.173405647277832, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2529, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.017955905231199415, |
|
"grad_norm": 1.039650797843933, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2321, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.01799017222591544, |
|
"grad_norm": 1.0359266996383667, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.01802443922063147, |
|
"grad_norm": 1.0630840063095093, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2117, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.0180587062153475, |
|
"grad_norm": 1.0937180519104004, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2454, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.01809297321006353, |
|
"grad_norm": 1.1015993356704712, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.01812724020477956, |
|
"grad_norm": 1.060584545135498, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.01816150719949559, |
|
"grad_norm": 1.1389795541763306, |
|
"learning_rate": 1e-05, |
|
"loss": 0.233, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.01819577419421162, |
|
"grad_norm": 1.0018917322158813, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2453, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.01823004118892765, |
|
"grad_norm": 1.0546092987060547, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2333, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.018264308183643677, |
|
"grad_norm": 1.1121848821640015, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2317, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.018298575178359706, |
|
"grad_norm": 1.1613191366195679, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.018332842173075736, |
|
"grad_norm": 1.1250524520874023, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2471, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.018367109167791766, |
|
"grad_norm": 1.0905226469039917, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.018401376162507796, |
|
"grad_norm": 0.9885173439979553, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.018435643157223826, |
|
"grad_norm": 1.288758635520935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2472, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.018469910151939856, |
|
"grad_norm": 1.2433462142944336, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2427, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.018504177146655885, |
|
"grad_norm": 1.2367336750030518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2511, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.018538444141371912, |
|
"grad_norm": 1.1871395111083984, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2276, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.01857271113608794, |
|
"grad_norm": 0.9569379091262817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.01860697813080397, |
|
"grad_norm": 1.1487014293670654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2295, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.01864124512552, |
|
"grad_norm": 1.0800844430923462, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2247, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.01867551212023603, |
|
"grad_norm": 1.1834380626678467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.226, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.01870977911495206, |
|
"grad_norm": 1.0035191774368286, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2414, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.01874404610966809, |
|
"grad_norm": 1.0685466527938843, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.01877831310438412, |
|
"grad_norm": 1.1921565532684326, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2419, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.018812580099100147, |
|
"grad_norm": 1.1201281547546387, |
|
"learning_rate": 1e-05, |
|
"loss": 0.255, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.018846847093816177, |
|
"grad_norm": 1.1162866353988647, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2426, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.018846847093816177, |
|
"eval_cer": 13.238666431469396, |
|
"eval_loss": 0.25262224674224854, |
|
"eval_normalized_cer": 9.562350119904076, |
|
"eval_runtime": 229.0802, |
|
"eval_samples_per_second": 2.235, |
|
"eval_steps_per_second": 0.035, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.018881114088532207, |
|
"grad_norm": 1.0215845108032227, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2368, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.018915381083248237, |
|
"grad_norm": 1.0062447786331177, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2308, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.018949648077964266, |
|
"grad_norm": 1.223649024963379, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2409, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.018983915072680296, |
|
"grad_norm": 1.2076172828674316, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2236, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.019018182067396326, |
|
"grad_norm": 1.154416561126709, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2419, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.019052449062112356, |
|
"grad_norm": 1.284858226776123, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2321, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.019086716056828382, |
|
"grad_norm": 1.0406948328018188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.019120983051544412, |
|
"grad_norm": 1.1980571746826172, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2274, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.019155250046260442, |
|
"grad_norm": 1.073560357093811, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.019189517040976472, |
|
"grad_norm": 1.0982617139816284, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2391, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.019223784035692502, |
|
"grad_norm": 1.015085220336914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2269, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.01925805103040853, |
|
"grad_norm": 1.238585352897644, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2428, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.01929231802512456, |
|
"grad_norm": 1.3326079845428467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.25, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.01932658501984059, |
|
"grad_norm": 1.1263608932495117, |
|
"learning_rate": 1e-05, |
|
"loss": 0.234, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.019360852014556618, |
|
"grad_norm": 1.083595633506775, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.019395119009272647, |
|
"grad_norm": 1.0787022113800049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2248, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.019429386003988677, |
|
"grad_norm": 1.312565803527832, |
|
"learning_rate": 1e-05, |
|
"loss": 0.263, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.019463652998704707, |
|
"grad_norm": 1.0305407047271729, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2358, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.019497919993420737, |
|
"grad_norm": 1.0905306339263916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2358, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.019532186988136767, |
|
"grad_norm": 1.1105730533599854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2371, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.019566453982852797, |
|
"grad_norm": 1.1664555072784424, |
|
"learning_rate": 1e-05, |
|
"loss": 0.244, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.019600720977568827, |
|
"grad_norm": 1.0702719688415527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.019634987972284856, |
|
"grad_norm": 1.0736626386642456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2406, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.019669254967000883, |
|
"grad_norm": 1.0510461330413818, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2335, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.019703521961716913, |
|
"grad_norm": 1.0435370206832886, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2211, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.019737788956432942, |
|
"grad_norm": 1.2461049556732178, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2188, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.019772055951148972, |
|
"grad_norm": 1.0351046323776245, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2269, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.019806322945865002, |
|
"grad_norm": 1.124671459197998, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.019840589940581032, |
|
"grad_norm": 1.145488977432251, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2415, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.019874856935297062, |
|
"grad_norm": 1.1410046815872192, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2296, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.01990912393001309, |
|
"grad_norm": 1.2782517671585083, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2367, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.019943390924729118, |
|
"grad_norm": 1.204562783241272, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2289, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.019977657919445148, |
|
"grad_norm": 1.1141811609268188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2223, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.020011924914161178, |
|
"grad_norm": 1.1790316104888916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2308, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.020046191908877208, |
|
"grad_norm": 1.0944266319274902, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2366, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.020080458903593237, |
|
"grad_norm": 1.0892263650894165, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2384, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.020114725898309267, |
|
"grad_norm": 1.1419873237609863, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2414, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.020148992893025297, |
|
"grad_norm": 1.2230783700942993, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2394, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.020183259887741327, |
|
"grad_norm": 1.1309173107147217, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2561, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.020217526882457353, |
|
"grad_norm": 1.2405802011489868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.259, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.020251793877173383, |
|
"grad_norm": 1.2853388786315918, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2668, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.020286060871889413, |
|
"grad_norm": 1.299046277999878, |
|
"learning_rate": 1e-05, |
|
"loss": 0.251, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.020320327866605443, |
|
"grad_norm": 1.142052173614502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2655, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.020354594861321473, |
|
"grad_norm": 1.3770766258239746, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.020388861856037502, |
|
"grad_norm": 1.1458237171173096, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2742, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.020423128850753532, |
|
"grad_norm": 1.3130786418914795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2514, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.020457395845469562, |
|
"grad_norm": 1.2816088199615479, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2593, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.02049166284018559, |
|
"grad_norm": 1.0405460596084595, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2608, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.02052592983490162, |
|
"grad_norm": 1.2035329341888428, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2558, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.020560196829617648, |
|
"grad_norm": 1.0495450496673584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2468, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.020560196829617648, |
|
"eval_cer": 13.079908273064033, |
|
"eval_loss": 0.2540421485900879, |
|
"eval_normalized_cer": 9.292565947242206, |
|
"eval_runtime": 227.4153, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.035, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.020594463824333678, |
|
"grad_norm": 1.1614056825637817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2527, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.020628730819049708, |
|
"grad_norm": 1.1835705041885376, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2592, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.020662997813765738, |
|
"grad_norm": 1.1335136890411377, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2727, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.020697264808481768, |
|
"grad_norm": 1.052079439163208, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2514, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.020731531803197797, |
|
"grad_norm": 1.096330165863037, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2684, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.020765798797913824, |
|
"grad_norm": 1.2359880208969116, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2638, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.020800065792629854, |
|
"grad_norm": 1.2259430885314941, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2488, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.020834332787345883, |
|
"grad_norm": 1.0531619787216187, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.020868599782061913, |
|
"grad_norm": 1.1754058599472046, |
|
"learning_rate": 1e-05, |
|
"loss": 0.254, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.020902866776777943, |
|
"grad_norm": 1.0922538042068481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2522, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.020937133771493973, |
|
"grad_norm": 1.1970179080963135, |
|
"learning_rate": 1e-05, |
|
"loss": 0.267, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.020971400766210003, |
|
"grad_norm": 1.2625236511230469, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.021005667760926033, |
|
"grad_norm": 1.152846336364746, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2429, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.02103993475564206, |
|
"grad_norm": 1.1184160709381104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2566, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.02107420175035809, |
|
"grad_norm": 1.1153484582901, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2583, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.02110846874507412, |
|
"grad_norm": 1.2822504043579102, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.02114273573979015, |
|
"grad_norm": 1.1332992315292358, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2799, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.02117700273450618, |
|
"grad_norm": 1.0284112691879272, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.02121126972922221, |
|
"grad_norm": 1.1097975969314575, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2513, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.021245536723938238, |
|
"grad_norm": 1.168990969657898, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2843, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.021279803718654268, |
|
"grad_norm": 0.9956926107406616, |
|
"learning_rate": 1e-05, |
|
"loss": 0.247, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.021314070713370294, |
|
"grad_norm": 1.2191492319107056, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2608, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.021348337708086324, |
|
"grad_norm": 1.0872688293457031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.021382604702802354, |
|
"grad_norm": 1.0746614933013916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.244, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.021416871697518384, |
|
"grad_norm": 1.1560328006744385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2639, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.021451138692234414, |
|
"grad_norm": 1.1529641151428223, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2585, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.021485405686950444, |
|
"grad_norm": 1.0708386898040771, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2669, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.021519672681666473, |
|
"grad_norm": 1.208079218864441, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2436, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.021553939676382503, |
|
"grad_norm": 1.1871508359909058, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2655, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.02158820667109853, |
|
"grad_norm": 1.0997953414916992, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2578, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.02162247366581456, |
|
"grad_norm": 1.2404417991638184, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2726, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.02165674066053059, |
|
"grad_norm": 1.1724058389663696, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2611, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.02169100765524662, |
|
"grad_norm": 1.124932885169983, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.02172527464996265, |
|
"grad_norm": 1.129584550857544, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2651, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.02175954164467868, |
|
"grad_norm": 1.1869479417800903, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2451, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.02179380863939471, |
|
"grad_norm": 1.1753504276275635, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2509, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.02182807563411074, |
|
"grad_norm": 1.1704761981964111, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2614, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.02186234262882677, |
|
"grad_norm": 1.347970724105835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.021896609623542795, |
|
"grad_norm": 1.0677597522735596, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2539, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.021930876618258825, |
|
"grad_norm": 1.1567541360855103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2621, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.021965143612974854, |
|
"grad_norm": 1.1231553554534912, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2453, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.021999410607690884, |
|
"grad_norm": 1.0485198497772217, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2503, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.022033677602406914, |
|
"grad_norm": 1.12228262424469, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2488, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.022067944597122944, |
|
"grad_norm": 1.2610136270523071, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2445, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.022102211591838974, |
|
"grad_norm": 0.9546436071395874, |
|
"learning_rate": 1e-05, |
|
"loss": 0.226, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.022136478586555004, |
|
"grad_norm": 1.3363466262817383, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2489, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.02217074558127103, |
|
"grad_norm": 1.1454704999923706, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2434, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.02220501257598706, |
|
"grad_norm": 1.1578549146652222, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.02223927957070309, |
|
"grad_norm": 1.096081018447876, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2472, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.02227354656541912, |
|
"grad_norm": 1.2388731241226196, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2457, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02227354656541912, |
|
"eval_cer": 12.929970012347859, |
|
"eval_loss": 0.24839338660240173, |
|
"eval_normalized_cer": 9.242605915267786, |
|
"eval_runtime": 227.6401, |
|
"eval_samples_per_second": 2.249, |
|
"eval_steps_per_second": 0.035, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02230781356013515, |
|
"grad_norm": 1.0306715965270996, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2393, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.02234208055485118, |
|
"grad_norm": 1.1339504718780518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2563, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.02237634754956721, |
|
"grad_norm": 0.912266731262207, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.02241061454428324, |
|
"grad_norm": 1.1917020082473755, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2395, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.022444881538999265, |
|
"grad_norm": 1.248515248298645, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.022479148533715295, |
|
"grad_norm": 1.180799961090088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2616, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.022513415528431325, |
|
"grad_norm": 1.0700205564498901, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.022547682523147355, |
|
"grad_norm": 1.1814614534378052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2471, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.022581949517863385, |
|
"grad_norm": 1.3973134756088257, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2383, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.022616216512579414, |
|
"grad_norm": 1.244265079498291, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.022650483507295444, |
|
"grad_norm": 1.1685833930969238, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.022684750502011474, |
|
"grad_norm": 1.1566667556762695, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2443, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.0227190174967275, |
|
"grad_norm": 1.0241929292678833, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2412, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.02275328449144353, |
|
"grad_norm": 1.0359474420547485, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2374, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.02278755148615956, |
|
"grad_norm": 1.040810227394104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2254, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.02282181848087559, |
|
"grad_norm": 1.0343252420425415, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2366, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.02285608547559162, |
|
"grad_norm": 1.052739143371582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2273, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.02289035247030765, |
|
"grad_norm": 1.0414966344833374, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2082, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.02292461946502368, |
|
"grad_norm": 1.2340532541275024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2241, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.02295888645973971, |
|
"grad_norm": 0.9693310260772705, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2322, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.022993153454455736, |
|
"grad_norm": 1.103025197982788, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.023027420449171766, |
|
"grad_norm": 1.119689702987671, |
|
"learning_rate": 1e-05, |
|
"loss": 0.214, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.023061687443887795, |
|
"grad_norm": 0.93172287940979, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2094, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.023095954438603825, |
|
"grad_norm": 1.0207446813583374, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2238, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.023130221433319855, |
|
"grad_norm": 1.200201392173767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.218, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.023164488428035885, |
|
"grad_norm": 1.1485291719436646, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2314, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.023198755422751915, |
|
"grad_norm": 1.2236285209655762, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2326, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.023233022417467945, |
|
"grad_norm": 1.1756523847579956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2122, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.02326728941218397, |
|
"grad_norm": 1.0356839895248413, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2078, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.0233015564069, |
|
"grad_norm": 1.1896883249282837, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2072, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.02333582340161603, |
|
"grad_norm": 1.1080976724624634, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2127, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.02337009039633206, |
|
"grad_norm": 1.128263235092163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2282, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.02340435739104809, |
|
"grad_norm": 1.0398188829421997, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2095, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.02343862438576412, |
|
"grad_norm": 1.1791975498199463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2216, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.02347289138048015, |
|
"grad_norm": 1.1444710493087769, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2447, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.02350715837519618, |
|
"grad_norm": 1.136607050895691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2093, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.023541425369912206, |
|
"grad_norm": 1.0915231704711914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2128, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.023575692364628236, |
|
"grad_norm": 1.0416276454925537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2092, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.023609959359344266, |
|
"grad_norm": 1.3693732023239136, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2137, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.023644226354060296, |
|
"grad_norm": 1.1747677326202393, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2215, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.023678493348776326, |
|
"grad_norm": 1.1593588590621948, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2234, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.023712760343492355, |
|
"grad_norm": 1.2322016954421997, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.023747027338208385, |
|
"grad_norm": 1.167648196220398, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.023781294332924415, |
|
"grad_norm": 1.0984666347503662, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.023815561327640445, |
|
"grad_norm": 1.1234291791915894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2532, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.02384982832235647, |
|
"grad_norm": 1.2158063650131226, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2567, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.0238840953170725, |
|
"grad_norm": 1.0958101749420166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2387, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.02391836231178853, |
|
"grad_norm": 1.1536844968795776, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2712, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.02395262930650456, |
|
"grad_norm": 1.2437007427215576, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2563, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.02398689630122059, |
|
"grad_norm": 1.0884592533111572, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02398689630122059, |
|
"eval_cer": 13.079908273064033, |
|
"eval_loss": 0.2514401376247406, |
|
"eval_normalized_cer": 9.622302158273381, |
|
"eval_runtime": 227.6705, |
|
"eval_samples_per_second": 2.249, |
|
"eval_steps_per_second": 0.035, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02402116329593662, |
|
"grad_norm": 1.2332980632781982, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2543, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.02405543029065265, |
|
"grad_norm": 1.1041260957717896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2663, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.02408969728536868, |
|
"grad_norm": 1.1479183435440063, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2528, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.024123964280084707, |
|
"grad_norm": 1.103766918182373, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2336, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.024158231274800736, |
|
"grad_norm": 1.238996148109436, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2436, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.024192498269516766, |
|
"grad_norm": 1.2652095556259155, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2464, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.024226765264232796, |
|
"grad_norm": 1.180665373802185, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2541, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.024261032258948826, |
|
"grad_norm": 1.1601506471633911, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.024295299253664856, |
|
"grad_norm": 1.257034420967102, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2446, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.024329566248380886, |
|
"grad_norm": 1.0813285112380981, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2546, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.024363833243096916, |
|
"grad_norm": 1.1124157905578613, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.024398100237812942, |
|
"grad_norm": 1.0615211725234985, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.024432367232528972, |
|
"grad_norm": 1.185677409172058, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2383, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.024466634227245, |
|
"grad_norm": 1.1810061931610107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2603, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.02450090122196103, |
|
"grad_norm": 1.155860424041748, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2434, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.02453516821667706, |
|
"grad_norm": 1.113008737564087, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2529, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.02456943521139309, |
|
"grad_norm": 1.1276872158050537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2265, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.02460370220610912, |
|
"grad_norm": 1.149792194366455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2349, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.02463796920082515, |
|
"grad_norm": 1.1619532108306885, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2336, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.024672236195541177, |
|
"grad_norm": 1.0760303735733032, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2315, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.024706503190257207, |
|
"grad_norm": 1.2807782888412476, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.024740770184973237, |
|
"grad_norm": 1.0910037755966187, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2333, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.024775037179689267, |
|
"grad_norm": 1.2938390970230103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2147, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.024809304174405297, |
|
"grad_norm": 1.185542106628418, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2232, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.024843571169121326, |
|
"grad_norm": 1.0598995685577393, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2278, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.024877838163837356, |
|
"grad_norm": 1.1860477924346924, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2179, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.024912105158553386, |
|
"grad_norm": 1.1935844421386719, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.024946372153269412, |
|
"grad_norm": 1.0449039936065674, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2307, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.024980639147985442, |
|
"grad_norm": 1.0651369094848633, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.025014906142701472, |
|
"grad_norm": 1.0416852235794067, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2208, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.025049173137417502, |
|
"grad_norm": 1.0064860582351685, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2227, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.025083440132133532, |
|
"grad_norm": 1.0357342958450317, |
|
"learning_rate": 1e-05, |
|
"loss": 0.22, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.02511770712684956, |
|
"grad_norm": 1.019918441772461, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2396, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.02515197412156559, |
|
"grad_norm": 1.0327798128128052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2118, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.02518624111628162, |
|
"grad_norm": 0.9973874092102051, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.025220508110997648, |
|
"grad_norm": 1.093544840812683, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2214, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.025254775105713678, |
|
"grad_norm": 1.118829369544983, |
|
"learning_rate": 1e-05, |
|
"loss": 0.237, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.025289042100429707, |
|
"grad_norm": 1.2009224891662598, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2447, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.025323309095145737, |
|
"grad_norm": 1.1427584886550903, |
|
"learning_rate": 1e-05, |
|
"loss": 0.234, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.025357576089861767, |
|
"grad_norm": 0.9685842394828796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2231, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.025391843084577797, |
|
"grad_norm": 1.165501356124878, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2139, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.025426110079293827, |
|
"grad_norm": 1.4023411273956299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.025460377074009857, |
|
"grad_norm": 1.218546748161316, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.025494644068725883, |
|
"grad_norm": 1.4930671453475952, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2466, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.025528911063441913, |
|
"grad_norm": 1.145317554473877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.025563178058157943, |
|
"grad_norm": 1.2366299629211426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2606, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.025597445052873972, |
|
"grad_norm": 1.0542744398117065, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2493, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.025631712047590002, |
|
"grad_norm": 1.2272337675094604, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2537, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.025665979042306032, |
|
"grad_norm": 1.169912576675415, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2581, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.025700246037022062, |
|
"grad_norm": 1.1997913122177124, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2547, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.025700246037022062, |
|
"eval_cer": 12.859410830834362, |
|
"eval_loss": 0.2470153123140335, |
|
"eval_normalized_cer": 9.162669864108713, |
|
"eval_runtime": 227.7782, |
|
"eval_samples_per_second": 2.248, |
|
"eval_steps_per_second": 0.035, |
|
"step": 7500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 291826, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5206927302656e+21, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|