|
{ |
|
"best_metric": 8.912869704236611, |
|
"best_model_checkpoint": "kotoba_v2_enc_logs_epoch2_2/checkpoint-4000", |
|
"epoch": 0.017133497358014706, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.4266994716029415e-05, |
|
"grad_norm": 1.0561553239822388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.853398943205883e-05, |
|
"grad_norm": 1.1626238822937012, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2265, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00010280098414808825, |
|
"grad_norm": 0.9845689535140991, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2279, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00013706797886411766, |
|
"grad_norm": 1.142356276512146, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00017133497358014707, |
|
"grad_norm": 1.0053240060806274, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2473, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0002056019682961765, |
|
"grad_norm": 1.1098105907440186, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2438, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0002398689630122059, |
|
"grad_norm": 1.191983699798584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0002741359577282353, |
|
"grad_norm": 1.1295104026794434, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2362, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0003084029524442647, |
|
"grad_norm": 1.037972092628479, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2455, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.00034266994716029413, |
|
"grad_norm": 1.1975648403167725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00037693694187632354, |
|
"grad_norm": 1.0676342248916626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2271, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.000411203936592353, |
|
"grad_norm": 1.0749495029449463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2417, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0004454709313083824, |
|
"grad_norm": 1.094260811805725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0004797379260244118, |
|
"grad_norm": 1.0395853519439697, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2381, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0005140049207404412, |
|
"grad_norm": 1.2008885145187378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2354, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0005482719154564706, |
|
"grad_norm": 1.0647832155227661, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2321, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0005825389101725, |
|
"grad_norm": 1.327071189880371, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0006168059048885295, |
|
"grad_norm": 1.1184055805206299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2242, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0006510728996045589, |
|
"grad_norm": 1.2512784004211426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0006853398943205883, |
|
"grad_norm": 1.0614465475082397, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0007196068890366177, |
|
"grad_norm": 1.0607149600982666, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2381, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0007538738837526471, |
|
"grad_norm": 1.0422028303146362, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2294, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0007881408784686765, |
|
"grad_norm": 1.0162984132766724, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.000822407873184706, |
|
"grad_norm": 1.1085543632507324, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2161, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.0008566748679007354, |
|
"grad_norm": 1.1854636669158936, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2382, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0008909418626167648, |
|
"grad_norm": 1.40137779712677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2579, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0009252088573327942, |
|
"grad_norm": 1.0814112424850464, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2612, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0009594758520488236, |
|
"grad_norm": 1.083736538887024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2711, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.000993742846764853, |
|
"grad_norm": 1.0861411094665527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2642, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.0010280098414808825, |
|
"grad_norm": 1.1141265630722046, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2585, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0010622768361969119, |
|
"grad_norm": 1.326241374015808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2858, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.0010965438309129413, |
|
"grad_norm": 1.393750786781311, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0011308108256289707, |
|
"grad_norm": 1.0851459503173828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2565, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.001165077820345, |
|
"grad_norm": 1.2323757410049438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.0011993448150610295, |
|
"grad_norm": 1.376953125, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2671, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.001233611809777059, |
|
"grad_norm": 1.084592580795288, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2643, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.0012678788044930883, |
|
"grad_norm": 1.2907005548477173, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.0013021457992091177, |
|
"grad_norm": 1.0698130130767822, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0013364127939251471, |
|
"grad_norm": 1.1399807929992676, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2759, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.0013706797886411765, |
|
"grad_norm": 1.1480791568756104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2499, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.001404946783357206, |
|
"grad_norm": 1.3095237016677856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2536, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.0014392137780732353, |
|
"grad_norm": 1.068246841430664, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2604, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.0014734807727892648, |
|
"grad_norm": 1.2310419082641602, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2632, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.0015077477675052942, |
|
"grad_norm": 1.161867380142212, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2584, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.0015420147622213236, |
|
"grad_norm": 1.1461217403411865, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2592, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.001576281756937353, |
|
"grad_norm": 1.3006030321121216, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2607, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.0016105487516533824, |
|
"grad_norm": 1.1223125457763672, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.001644815746369412, |
|
"grad_norm": 1.2909380197525024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2693, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.0016790827410854414, |
|
"grad_norm": 1.2270597219467163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2661, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.0017133497358014708, |
|
"grad_norm": 1.1439770460128784, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2517, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0017133497358014708, |
|
"eval_cer": 13.0358087846181, |
|
"eval_loss": 0.25224336981773376, |
|
"eval_normalized_cer": 9.4224620303757, |
|
"eval_runtime": 227.2174, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.035, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0017476167305175002, |
|
"grad_norm": 1.1377454996109009, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2579, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.0017818837252335296, |
|
"grad_norm": 1.2096498012542725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2727, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.001816150719949559, |
|
"grad_norm": 1.187213659286499, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2562, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.0018504177146655885, |
|
"grad_norm": 0.969393253326416, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.0018846847093816179, |
|
"grad_norm": 0.9745528697967529, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2774, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0019189517040976473, |
|
"grad_norm": 1.0725352764129639, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2541, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.0019532186988136767, |
|
"grad_norm": 1.217871904373169, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2395, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.001987485693529706, |
|
"grad_norm": 1.3582627773284912, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2594, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0020217526882457355, |
|
"grad_norm": 1.2415379285812378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.002056019682961765, |
|
"grad_norm": 0.9810131192207336, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0020902866776777943, |
|
"grad_norm": 0.9806564450263977, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2688, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.0021245536723938237, |
|
"grad_norm": 1.2755467891693115, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2591, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.002158820667109853, |
|
"grad_norm": 0.9300326704978943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2444, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.0021930876618258825, |
|
"grad_norm": 1.1276524066925049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.002227354656541912, |
|
"grad_norm": 1.1786876916885376, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2443, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0022616216512579414, |
|
"grad_norm": 1.1702712774276733, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2627, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.0022958886459739708, |
|
"grad_norm": 1.2837899923324585, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.00233015564069, |
|
"grad_norm": 1.0623608827590942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.0023644226354060296, |
|
"grad_norm": 1.1288243532180786, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2773, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.002398689630122059, |
|
"grad_norm": 1.0192692279815674, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2492, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0024329566248380884, |
|
"grad_norm": 1.2274680137634277, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2345, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.002467223619554118, |
|
"grad_norm": 1.240645170211792, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2624, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.002501490614270147, |
|
"grad_norm": 1.0681366920471191, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2553, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.0025357576089861766, |
|
"grad_norm": 1.0161867141723633, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2547, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.002570024603702206, |
|
"grad_norm": 1.2384017705917358, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.0026042915984182354, |
|
"grad_norm": 1.1739261150360107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.002638558593134265, |
|
"grad_norm": 1.0396535396575928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.0026728255878502943, |
|
"grad_norm": 1.14767324924469, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2594, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.0027070925825663237, |
|
"grad_norm": 1.1783303022384644, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2546, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.002741359577282353, |
|
"grad_norm": 1.1065645217895508, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2547, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0027756265719983825, |
|
"grad_norm": 1.256645917892456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.002809893566714412, |
|
"grad_norm": 1.058158278465271, |
|
"learning_rate": 1e-05, |
|
"loss": 0.257, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.0028441605614304413, |
|
"grad_norm": 1.0647656917572021, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2479, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.0028784275561464707, |
|
"grad_norm": 1.1984691619873047, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2503, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.0029126945508625, |
|
"grad_norm": 1.1380070447921753, |
|
"learning_rate": 1e-05, |
|
"loss": 0.245, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.0029469615455785295, |
|
"grad_norm": 1.2131065130233765, |
|
"learning_rate": 1e-05, |
|
"loss": 0.242, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.002981228540294559, |
|
"grad_norm": 1.1822234392166138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2613, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.0030154955350105883, |
|
"grad_norm": 1.0591018199920654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2654, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.0030497625297266177, |
|
"grad_norm": 1.2318428754806519, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2525, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.003084029524442647, |
|
"grad_norm": 1.0146839618682861, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2609, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0031182965191586766, |
|
"grad_norm": 1.1508561372756958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2541, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.003152563513874706, |
|
"grad_norm": 1.1494849920272827, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.0031868305085907354, |
|
"grad_norm": 1.2423807382583618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2573, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.0032210975033067648, |
|
"grad_norm": 1.2714438438415527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2545, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.0032553644980227946, |
|
"grad_norm": 1.2088007926940918, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2773, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.003289631492738824, |
|
"grad_norm": 1.0737963914871216, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2495, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.0033238984874548534, |
|
"grad_norm": 1.0942472219467163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.003358165482170883, |
|
"grad_norm": 1.1282986402511597, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2638, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.0033924324768869123, |
|
"grad_norm": 1.0762425661087036, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2619, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.0034266994716029417, |
|
"grad_norm": 1.09200119972229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2464, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0034266994716029417, |
|
"eval_cer": 13.80313988357735, |
|
"eval_loss": 0.25397512316703796, |
|
"eval_normalized_cer": 9.952038369304557, |
|
"eval_runtime": 227.5088, |
|
"eval_samples_per_second": 2.25, |
|
"eval_steps_per_second": 0.035, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.003460966466318971, |
|
"grad_norm": 0.9681844711303711, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2567, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.0034952334610350005, |
|
"grad_norm": 1.0064711570739746, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2514, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.00352950045575103, |
|
"grad_norm": 1.190294623374939, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2654, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.0035637674504670593, |
|
"grad_norm": 1.332492709159851, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2725, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.0035980344451830887, |
|
"grad_norm": 1.1110397577285767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.003632301439899118, |
|
"grad_norm": 1.2327215671539307, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2733, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.0036665684346151475, |
|
"grad_norm": 1.1694815158843994, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2611, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.003700835429331177, |
|
"grad_norm": 1.212570309638977, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2556, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.0037351024240472063, |
|
"grad_norm": 1.1467297077178955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.0037693694187632357, |
|
"grad_norm": 0.9628469347953796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.003803636413479265, |
|
"grad_norm": 1.1593494415283203, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.0038379034081952946, |
|
"grad_norm": 1.1376386880874634, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.003872170402911324, |
|
"grad_norm": 1.129338026046753, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2601, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.003906437397627353, |
|
"grad_norm": 1.0889575481414795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2455, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.003940704392343382, |
|
"grad_norm": 1.1437270641326904, |
|
"learning_rate": 1e-05, |
|
"loss": 0.253, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.003974971387059412, |
|
"grad_norm": 1.0283392667770386, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2507, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.004009238381775441, |
|
"grad_norm": 1.130747675895691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2715, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.004043505376491471, |
|
"grad_norm": 1.3483778238296509, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2742, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.0040777723712075, |
|
"grad_norm": 1.0879924297332764, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2641, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.00411203936592353, |
|
"grad_norm": 1.1242927312850952, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2586, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.004146306360639559, |
|
"grad_norm": 1.0185858011245728, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2465, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.004180573355355589, |
|
"grad_norm": 0.9555259943008423, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2528, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.004214840350071618, |
|
"grad_norm": 1.210371971130371, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2613, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.0042491073447876474, |
|
"grad_norm": 1.1261368989944458, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2551, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.004283374339503676, |
|
"grad_norm": 1.2142603397369385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.264, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.004317641334219706, |
|
"grad_norm": 1.057758092880249, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2587, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.004351908328935736, |
|
"grad_norm": 1.0871245861053467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.004386175323651765, |
|
"grad_norm": 1.1214648485183716, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.004420442318367795, |
|
"grad_norm": 1.0265707969665527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2123, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.004454709313083824, |
|
"grad_norm": 1.1180216073989868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2245, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.004488976307799854, |
|
"grad_norm": 1.028238296508789, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2118, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.004523243302515883, |
|
"grad_norm": 1.0321682691574097, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2196, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.0045575102972319126, |
|
"grad_norm": 1.1180269718170166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2403, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.0045917772919479415, |
|
"grad_norm": 1.079560399055481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2309, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.004626044286663971, |
|
"grad_norm": 1.0062284469604492, |
|
"learning_rate": 1e-05, |
|
"loss": 0.228, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.00466031128138, |
|
"grad_norm": 1.1098395586013794, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2435, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.00469457827609603, |
|
"grad_norm": 1.0619688034057617, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2342, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.004728845270812059, |
|
"grad_norm": 1.1943925619125366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2315, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.004763112265528089, |
|
"grad_norm": 1.0958552360534668, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2379, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.004797379260244118, |
|
"grad_norm": 1.0984197854995728, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2208, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.004831646254960148, |
|
"grad_norm": 1.0741859674453735, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2378, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.004865913249676177, |
|
"grad_norm": 1.1457058191299438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2516, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.004900180244392207, |
|
"grad_norm": 0.9849014282226562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2406, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.004934447239108236, |
|
"grad_norm": 1.1174912452697754, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2122, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.0049687142338242654, |
|
"grad_norm": 1.0292854309082031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2349, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.005002981228540294, |
|
"grad_norm": 1.0343785285949707, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2158, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.005037248223256324, |
|
"grad_norm": 1.1178008317947388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2264, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.005071515217972353, |
|
"grad_norm": 1.0238450765609741, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2287, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.005105782212688383, |
|
"grad_norm": 1.1728886365890503, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2373, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.005140049207404412, |
|
"grad_norm": 1.227034091949463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.005140049207404412, |
|
"eval_cer": 13.150467454577527, |
|
"eval_loss": 0.25801682472229004, |
|
"eval_normalized_cer": 9.452438049560353, |
|
"eval_runtime": 227.9378, |
|
"eval_samples_per_second": 2.246, |
|
"eval_steps_per_second": 0.035, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.005174316202120442, |
|
"grad_norm": 1.0703920125961304, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2156, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.005208583196836471, |
|
"grad_norm": 1.1343841552734375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2126, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.005242850191552501, |
|
"grad_norm": 1.1743741035461426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.00527711718626853, |
|
"grad_norm": 1.1476744413375854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.0053113841809845595, |
|
"grad_norm": 1.0899590253829956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.0053456511757005885, |
|
"grad_norm": 1.0281250476837158, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2226, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.005379918170416618, |
|
"grad_norm": 0.9932867884635925, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.005414185165132647, |
|
"grad_norm": 1.1992309093475342, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2179, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.005448452159848677, |
|
"grad_norm": 1.0017774105072021, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2244, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.005482719154564706, |
|
"grad_norm": 1.0827686786651611, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2313, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.005516986149280736, |
|
"grad_norm": 1.2260409593582153, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.005551253143996765, |
|
"grad_norm": 1.2530804872512817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.005585520138712795, |
|
"grad_norm": 1.068452000617981, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2138, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.005619787133428824, |
|
"grad_norm": 1.3108712434768677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.005654054128144854, |
|
"grad_norm": 1.0919209718704224, |
|
"learning_rate": 1e-05, |
|
"loss": 0.213, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.005688321122860883, |
|
"grad_norm": 1.1530914306640625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2292, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.005722588117576912, |
|
"grad_norm": 1.084028959274292, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2393, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.005756855112292941, |
|
"grad_norm": 1.247847557067871, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.005791122107008971, |
|
"grad_norm": 1.03806734085083, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2317, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.005825389101725, |
|
"grad_norm": 1.1643092632293701, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2348, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.00585965609644103, |
|
"grad_norm": 1.1066207885742188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2348, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.005893923091157059, |
|
"grad_norm": 1.1813760995864868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2295, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.005928190085873089, |
|
"grad_norm": 1.1444518566131592, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2101, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.005962457080589118, |
|
"grad_norm": 1.1485129594802856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.005996724075305148, |
|
"grad_norm": 1.1813607215881348, |
|
"learning_rate": 1e-05, |
|
"loss": 0.231, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.006030991070021177, |
|
"grad_norm": 1.4075005054473877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2306, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.0060652580647372065, |
|
"grad_norm": 1.2183804512023926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2227, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.0060995250594532355, |
|
"grad_norm": 1.3654927015304565, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2341, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.006133792054169265, |
|
"grad_norm": 1.2806668281555176, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2226, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.006168059048885294, |
|
"grad_norm": 1.2949618101119995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2698, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.006202326043601324, |
|
"grad_norm": 1.3080159425735474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2691, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.006236593038317353, |
|
"grad_norm": 1.1831908226013184, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2644, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.006270860033033383, |
|
"grad_norm": 1.1216965913772583, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.006305127027749412, |
|
"grad_norm": 1.1943161487579346, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2769, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.006339394022465442, |
|
"grad_norm": 1.0856040716171265, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.006373661017181471, |
|
"grad_norm": 1.1100040674209595, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2576, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.006407928011897501, |
|
"grad_norm": 1.3369051218032837, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2684, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.0064421950066135296, |
|
"grad_norm": 1.158797264099121, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2474, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.006476462001329559, |
|
"grad_norm": 1.1821873188018799, |
|
"learning_rate": 1e-05, |
|
"loss": 0.272, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.006510728996045589, |
|
"grad_norm": 1.0739686489105225, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2798, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.006544995990761618, |
|
"grad_norm": 1.0639653205871582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2682, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.006579262985477648, |
|
"grad_norm": 1.2149512767791748, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2586, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.006613529980193677, |
|
"grad_norm": 1.1057014465332031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2719, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.006647796974909707, |
|
"grad_norm": 1.0929185152053833, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2703, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.006682063969625736, |
|
"grad_norm": 1.0322917699813843, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2477, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.006716330964341766, |
|
"grad_norm": 1.2460272312164307, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2816, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.006750597959057795, |
|
"grad_norm": 1.2049859762191772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2648, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.0067848649537738245, |
|
"grad_norm": 1.1182633638381958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.0068191319484898535, |
|
"grad_norm": 1.1514990329742432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2695, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.006853398943205883, |
|
"grad_norm": 1.0150858163833618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2532, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.006853398943205883, |
|
"eval_cer": 13.565002645969306, |
|
"eval_loss": 0.2523655593395233, |
|
"eval_normalized_cer": 9.942046362909672, |
|
"eval_runtime": 226.5571, |
|
"eval_samples_per_second": 2.26, |
|
"eval_steps_per_second": 0.035, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.006887665937921912, |
|
"grad_norm": 1.0476700067520142, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2555, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.006921932932637942, |
|
"grad_norm": 1.1178691387176514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2489, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.006956199927353971, |
|
"grad_norm": 1.2596313953399658, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2884, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.006990466922070001, |
|
"grad_norm": 1.1929702758789062, |
|
"learning_rate": 1e-05, |
|
"loss": 0.262, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.00702473391678603, |
|
"grad_norm": 1.1269497871398926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2758, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.00705900091150206, |
|
"grad_norm": 1.1495511531829834, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2668, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.007093267906218089, |
|
"grad_norm": 1.0648061037063599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2548, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.007127534900934119, |
|
"grad_norm": 1.3193435668945312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2743, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.0071618018956501476, |
|
"grad_norm": 1.2877907752990723, |
|
"learning_rate": 1e-05, |
|
"loss": 0.248, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.007196068890366177, |
|
"grad_norm": 1.2012474536895752, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2662, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.007230335885082206, |
|
"grad_norm": 1.1491566896438599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2666, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.007264602879798236, |
|
"grad_norm": 1.1861019134521484, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2618, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.007298869874514265, |
|
"grad_norm": 1.123963713645935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2646, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.007333136869230295, |
|
"grad_norm": 1.2697441577911377, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2713, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.007367403863946324, |
|
"grad_norm": 0.9741083383560181, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.007401670858662354, |
|
"grad_norm": 1.0292670726776123, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.007435937853378383, |
|
"grad_norm": 1.0958001613616943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.007470204848094413, |
|
"grad_norm": 1.166869044303894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2454, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.007504471842810442, |
|
"grad_norm": 1.2552424669265747, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.0075387388375264715, |
|
"grad_norm": 1.1589868068695068, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2659, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.0075730058322425004, |
|
"grad_norm": 1.1640287637710571, |
|
"learning_rate": 1e-05, |
|
"loss": 0.257, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.00760727282695853, |
|
"grad_norm": 1.0953587293624878, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2444, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.007641539821674559, |
|
"grad_norm": 1.2174441814422607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2626, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.007675806816390589, |
|
"grad_norm": 1.1194220781326294, |
|
"learning_rate": 1e-05, |
|
"loss": 0.241, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.007710073811106618, |
|
"grad_norm": 1.0677419900894165, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2718, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.007744340805822648, |
|
"grad_norm": 1.0956069231033325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2493, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.007778607800538677, |
|
"grad_norm": 1.1772819757461548, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2614, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.007812874795254707, |
|
"grad_norm": 1.0341110229492188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2488, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.007847141789970737, |
|
"grad_norm": 1.174186110496521, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.007881408784686765, |
|
"grad_norm": 0.9867792725563049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2582, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.007915675779402795, |
|
"grad_norm": 1.1443661451339722, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2331, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.007949942774118824, |
|
"grad_norm": 1.117896318435669, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2277, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.007984209768834854, |
|
"grad_norm": 1.13510000705719, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2137, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.008018476763550882, |
|
"grad_norm": 0.9749162793159485, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2161, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.008052743758266912, |
|
"grad_norm": 1.1519534587860107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2254, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.008087010752982942, |
|
"grad_norm": 1.0861778259277344, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2153, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.008121277747698972, |
|
"grad_norm": 1.0184444189071655, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2066, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.008155544742415, |
|
"grad_norm": 1.0581239461898804, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2243, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.00818981173713103, |
|
"grad_norm": 0.9954540729522705, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2171, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.00822407873184706, |
|
"grad_norm": 1.121960163116455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2216, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.00825834572656309, |
|
"grad_norm": 1.097725510597229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2142, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.008292612721279118, |
|
"grad_norm": 1.0566459894180298, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2272, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.008326879715995147, |
|
"grad_norm": 1.0077927112579346, |
|
"learning_rate": 1e-05, |
|
"loss": 0.211, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.008361146710711177, |
|
"grad_norm": 1.176035761833191, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2125, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.008395413705427207, |
|
"grad_norm": 1.0064568519592285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2066, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.008429680700143235, |
|
"grad_norm": 1.1852171421051025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2087, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.008463947694859265, |
|
"grad_norm": 0.9580971002578735, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2172, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.008498214689575295, |
|
"grad_norm": 1.1230813264846802, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2104, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.008532481684291325, |
|
"grad_norm": 1.1891340017318726, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.008566748679007353, |
|
"grad_norm": 1.2579045295715332, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2109, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.008566748679007353, |
|
"eval_cer": 13.300405715293703, |
|
"eval_loss": 0.26059621572494507, |
|
"eval_normalized_cer": 9.502398081534773, |
|
"eval_runtime": 226.5522, |
|
"eval_samples_per_second": 2.26, |
|
"eval_steps_per_second": 0.035, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.008601015673723383, |
|
"grad_norm": 1.0522507429122925, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2154, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.008635282668439413, |
|
"grad_norm": 1.0875492095947266, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2251, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.008669549663155442, |
|
"grad_norm": 1.0868346691131592, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2086, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.008703816657871472, |
|
"grad_norm": 1.0993175506591797, |
|
"learning_rate": 1e-05, |
|
"loss": 0.205, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.0087380836525875, |
|
"grad_norm": 1.0495941638946533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2135, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.00877235064730353, |
|
"grad_norm": 1.0326807498931885, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2105, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.00880661764201956, |
|
"grad_norm": 1.0804367065429688, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2438, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.00884088463673559, |
|
"grad_norm": 1.0738023519515991, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2537, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.008875151631451618, |
|
"grad_norm": 1.1695871353149414, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2518, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.008909418626167648, |
|
"grad_norm": 1.155653476715088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2592, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.008943685620883678, |
|
"grad_norm": 1.1516027450561523, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2387, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.008977952615599707, |
|
"grad_norm": 1.2618260383605957, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2638, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.009012219610315736, |
|
"grad_norm": 1.2422987222671509, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.009046486605031765, |
|
"grad_norm": 1.1460082530975342, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2509, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.009080753599747795, |
|
"grad_norm": 1.2502261400222778, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2595, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.009115020594463825, |
|
"grad_norm": 1.139840006828308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.255, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.009149287589179853, |
|
"grad_norm": 1.3247896432876587, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2721, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.009183554583895883, |
|
"grad_norm": 1.1355103254318237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2604, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.009217821578611913, |
|
"grad_norm": 1.106541633605957, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2374, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.009252088573327943, |
|
"grad_norm": 1.2375975847244263, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2719, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.00928635556804397, |
|
"grad_norm": 1.1048275232315063, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2791, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.00932062256276, |
|
"grad_norm": 0.9889766573905945, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2457, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.00935488955747603, |
|
"grad_norm": 1.1566202640533447, |
|
"learning_rate": 1e-05, |
|
"loss": 0.252, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.00938915655219206, |
|
"grad_norm": 1.1586074829101562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2517, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.009423423546908088, |
|
"grad_norm": 0.990419328212738, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2572, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.009457690541624118, |
|
"grad_norm": 1.1101089715957642, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2525, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.009491957536340148, |
|
"grad_norm": 1.0488269329071045, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.009526224531056178, |
|
"grad_norm": 1.1127737760543823, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2578, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.009560491525772206, |
|
"grad_norm": 1.2353262901306152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2412, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.009594758520488236, |
|
"grad_norm": 1.1262571811676025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2438, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.009629025515204266, |
|
"grad_norm": 1.294323205947876, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2512, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.009663292509920296, |
|
"grad_norm": 1.0706703662872314, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2595, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.009697559504636324, |
|
"grad_norm": 1.0089077949523926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2522, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.009731826499352354, |
|
"grad_norm": 0.9697763323783875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2684, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.009766093494068383, |
|
"grad_norm": 1.1122509241104126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2629, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.009800360488784413, |
|
"grad_norm": 1.0381057262420654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2482, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.009834627483500441, |
|
"grad_norm": 1.126947045326233, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2674, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.009868894478216471, |
|
"grad_norm": 1.0714973211288452, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2634, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.009903161472932501, |
|
"grad_norm": 1.0942039489746094, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2751, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.009937428467648531, |
|
"grad_norm": 1.1503955125808716, |
|
"learning_rate": 1e-05, |
|
"loss": 0.272, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.009971695462364559, |
|
"grad_norm": 1.1912988424301147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2645, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.010005962457080589, |
|
"grad_norm": 1.0941249132156372, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2531, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.010040229451796619, |
|
"grad_norm": 1.2545968294143677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2562, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.010074496446512649, |
|
"grad_norm": 1.3605022430419922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2601, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.010108763441228677, |
|
"grad_norm": 1.0911775827407837, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2605, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.010143030435944706, |
|
"grad_norm": 1.133867859840393, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2554, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.010177297430660736, |
|
"grad_norm": 1.2511764764785767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2658, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.010211564425376766, |
|
"grad_norm": 1.1705303192138672, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2737, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.010245831420092794, |
|
"grad_norm": 1.132071614265442, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2665, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.010280098414808824, |
|
"grad_norm": 1.2301791906356812, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2645, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.010280098414808824, |
|
"eval_cer": 12.938789910037043, |
|
"eval_loss": 0.2511608302593231, |
|
"eval_normalized_cer": 9.152677857713828, |
|
"eval_runtime": 227.4553, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.035, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.010314365409524854, |
|
"grad_norm": 1.1527032852172852, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.010348632404240884, |
|
"grad_norm": 1.1162952184677124, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2728, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.010382899398956912, |
|
"grad_norm": 1.062084436416626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.010417166393672942, |
|
"grad_norm": 1.1536457538604736, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2633, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.010451433388388972, |
|
"grad_norm": 1.2096189260482788, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.010485700383105001, |
|
"grad_norm": 0.9950299263000488, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.01051996737782103, |
|
"grad_norm": 1.0628243684768677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2544, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.01055423437253706, |
|
"grad_norm": 1.042555570602417, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.01058850136725309, |
|
"grad_norm": 1.22646164894104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2503, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.010622768361969119, |
|
"grad_norm": 1.0862691402435303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.010657035356685147, |
|
"grad_norm": 1.148868203163147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2526, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.010691302351401177, |
|
"grad_norm": 1.1677169799804688, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2481, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.010725569346117207, |
|
"grad_norm": 0.990696132183075, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2421, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.010759836340833237, |
|
"grad_norm": 1.2869263887405396, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2463, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.010794103335549265, |
|
"grad_norm": 1.0741721391677856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2617, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.010828370330265295, |
|
"grad_norm": 1.103102445602417, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2442, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.010862637324981324, |
|
"grad_norm": 1.2562378644943237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2589, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.010896904319697354, |
|
"grad_norm": 1.2153191566467285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2417, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.010931171314413384, |
|
"grad_norm": 1.0507330894470215, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2607, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.010965438309129412, |
|
"grad_norm": 1.1882787942886353, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2469, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.010999705303845442, |
|
"grad_norm": 1.1394702196121216, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2574, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.011033972298561472, |
|
"grad_norm": 1.2482614517211914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2456, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.011068239293277502, |
|
"grad_norm": 1.0362995862960815, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2589, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.01110250628799353, |
|
"grad_norm": 1.1730456352233887, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2497, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.01113677328270956, |
|
"grad_norm": 1.1563142538070679, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2439, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.01117104027742559, |
|
"grad_norm": 1.1030769348144531, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2671, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.01120530727214162, |
|
"grad_norm": 1.1719223260879517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2501, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.011239574266857648, |
|
"grad_norm": 1.1840440034866333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2643, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.011273841261573677, |
|
"grad_norm": 1.1928170919418335, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2629, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.011308108256289707, |
|
"grad_norm": 1.0311812162399292, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2552, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.011342375251005737, |
|
"grad_norm": 1.1625889539718628, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2561, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.011376642245721765, |
|
"grad_norm": 1.0287625789642334, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2341, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.011410909240437795, |
|
"grad_norm": 1.1310815811157227, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2554, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.011445176235153825, |
|
"grad_norm": 1.1266168355941772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.234, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.011479443229869855, |
|
"grad_norm": 1.1979014873504639, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2559, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.011513710224585883, |
|
"grad_norm": 1.0378515720367432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2502, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.011547977219301913, |
|
"grad_norm": 1.1832512617111206, |
|
"learning_rate": 1e-05, |
|
"loss": 0.236, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.011582244214017942, |
|
"grad_norm": 0.9605569839477539, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2349, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.011616511208733972, |
|
"grad_norm": 1.0463056564331055, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.01165077820345, |
|
"grad_norm": 1.1021932363510132, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2383, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.01168504519816603, |
|
"grad_norm": 1.040493130683899, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2374, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.01171931219288206, |
|
"grad_norm": 1.1483063697814941, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2398, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.01175357918759809, |
|
"grad_norm": 1.0316531658172607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2329, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.011787846182314118, |
|
"grad_norm": 1.1677886247634888, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2493, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.011822113177030148, |
|
"grad_norm": 1.2078930139541626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2337, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.011856380171746178, |
|
"grad_norm": 1.178202509880066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.011890647166462208, |
|
"grad_norm": 1.0453248023986816, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2233, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.011924914161178236, |
|
"grad_norm": 1.0171067714691162, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2338, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.011959181155894266, |
|
"grad_norm": 1.051792860031128, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2394, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.011993448150610295, |
|
"grad_norm": 1.1237847805023193, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2428, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.011993448150610295, |
|
"eval_cer": 13.071088375374845, |
|
"eval_loss": 0.25454944372177124, |
|
"eval_normalized_cer": 9.542366107114308, |
|
"eval_runtime": 228.9468, |
|
"eval_samples_per_second": 2.236, |
|
"eval_steps_per_second": 0.035, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.012027715145326325, |
|
"grad_norm": 1.1366350650787354, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2353, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.012061982140042353, |
|
"grad_norm": 1.136927604675293, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2358, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.012096249134758383, |
|
"grad_norm": 1.1875656843185425, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.012130516129474413, |
|
"grad_norm": 1.2016057968139648, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2435, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.012164783124190443, |
|
"grad_norm": 1.209622859954834, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2361, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.012199050118906471, |
|
"grad_norm": 1.0696970224380493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2385, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.0122333171136225, |
|
"grad_norm": 1.2674167156219482, |
|
"learning_rate": 1e-05, |
|
"loss": 0.243, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.01226758410833853, |
|
"grad_norm": 1.2928141355514526, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.01230185110305456, |
|
"grad_norm": 1.0642272233963013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2356, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.012336118097770589, |
|
"grad_norm": 1.0935972929000854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2389, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.012370385092486618, |
|
"grad_norm": 1.180668830871582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2409, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.012404652087202648, |
|
"grad_norm": 1.2312487363815308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2478, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.012438919081918678, |
|
"grad_norm": 0.947522759437561, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2281, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.012473186076634706, |
|
"grad_norm": 1.0618727207183838, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2423, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.012507453071350736, |
|
"grad_norm": 1.0766098499298096, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2364, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.012541720066066766, |
|
"grad_norm": 1.1174747943878174, |
|
"learning_rate": 1e-05, |
|
"loss": 0.238, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.012575987060782796, |
|
"grad_norm": 1.1940118074417114, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2212, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.012610254055498824, |
|
"grad_norm": 1.1407246589660645, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2423, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.012644521050214854, |
|
"grad_norm": 1.2646050453186035, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2252, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.012678788044930884, |
|
"grad_norm": 1.130337119102478, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2131, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.012713055039646913, |
|
"grad_norm": 1.1432557106018066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2386, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.012747322034362941, |
|
"grad_norm": 1.1370545625686646, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2347, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.012781589029078971, |
|
"grad_norm": 1.3126403093338013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2159, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.012815856023795001, |
|
"grad_norm": 1.2375295162200928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.012850123018511031, |
|
"grad_norm": 1.0877372026443481, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2201, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.012884390013227059, |
|
"grad_norm": 1.1122978925704956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.012918657007943089, |
|
"grad_norm": 1.0270159244537354, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2313, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.012952924002659119, |
|
"grad_norm": 1.1370947360992432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.229, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.012987190997375149, |
|
"grad_norm": 1.2888813018798828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2384, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.013021457992091178, |
|
"grad_norm": 1.2443634271621704, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2218, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.013055724986807207, |
|
"grad_norm": 1.1919447183609009, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2277, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.013089991981523236, |
|
"grad_norm": 1.140600562095642, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2317, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.013124258976239266, |
|
"grad_norm": 1.074697494506836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2273, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.013158525970955296, |
|
"grad_norm": 1.1003391742706299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2217, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.013192792965671324, |
|
"grad_norm": 1.1427338123321533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.013227059960387354, |
|
"grad_norm": 1.0806514024734497, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2332, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.013261326955103384, |
|
"grad_norm": 1.1547067165374756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2306, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.013295593949819414, |
|
"grad_norm": 1.2483099699020386, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2166, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.013329860944535442, |
|
"grad_norm": 1.096939206123352, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2253, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.013364127939251472, |
|
"grad_norm": 1.1876115798950195, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.013398394933967502, |
|
"grad_norm": 1.1380902528762817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2256, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.013432661928683531, |
|
"grad_norm": 1.0738089084625244, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2307, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.01346692892339956, |
|
"grad_norm": 1.0351170301437378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2296, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.01350119591811559, |
|
"grad_norm": 1.2752678394317627, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2462, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.01353546291283162, |
|
"grad_norm": 1.2618532180786133, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2364, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.013569729907547649, |
|
"grad_norm": 1.1907076835632324, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2397, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.013603996902263677, |
|
"grad_norm": 0.9435076117515564, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2391, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.013638263896979707, |
|
"grad_norm": 1.0608407258987427, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2241, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.013672530891695737, |
|
"grad_norm": 1.0729584693908691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2237, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.013706797886411767, |
|
"grad_norm": 1.2006182670593262, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2386, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.013706797886411767, |
|
"eval_cer": 12.594813900158758, |
|
"eval_loss": 0.25156331062316895, |
|
"eval_normalized_cer": 8.912869704236611, |
|
"eval_runtime": 228.7977, |
|
"eval_samples_per_second": 2.238, |
|
"eval_steps_per_second": 0.035, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.013741064881127795, |
|
"grad_norm": 1.2020457983016968, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2318, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.013775331875843825, |
|
"grad_norm": 1.0251790285110474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.248, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.013809598870559854, |
|
"grad_norm": 1.160437822341919, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2385, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.013843865865275884, |
|
"grad_norm": 1.025770664215088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.013878132859991912, |
|
"grad_norm": 1.111954689025879, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2377, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.013912399854707942, |
|
"grad_norm": 1.0644809007644653, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2195, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.013946666849423972, |
|
"grad_norm": 1.2926712036132812, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2508, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.013980933844140002, |
|
"grad_norm": 1.2169601917266846, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2401, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.01401520083885603, |
|
"grad_norm": 1.1396681070327759, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2305, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.01404946783357206, |
|
"grad_norm": 1.2242721319198608, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.01408373482828809, |
|
"grad_norm": 1.195324420928955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2368, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.01411800182300412, |
|
"grad_norm": 1.2345412969589233, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.014152268817720148, |
|
"grad_norm": 1.1502156257629395, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2327, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.014186535812436177, |
|
"grad_norm": 1.2128121852874756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.014220802807152207, |
|
"grad_norm": 1.2618858814239502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.231, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.014255069801868237, |
|
"grad_norm": 1.0879299640655518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2302, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.014289336796584265, |
|
"grad_norm": 0.9794358015060425, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.014323603791300295, |
|
"grad_norm": 1.1454006433486938, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2328, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.014357870786016325, |
|
"grad_norm": 1.223686933517456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2211, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.014392137780732355, |
|
"grad_norm": 1.1423155069351196, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2391, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.014426404775448383, |
|
"grad_norm": 1.1027394533157349, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2279, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.014460671770164413, |
|
"grad_norm": 1.1777397394180298, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.014494938764880443, |
|
"grad_norm": 1.01688551902771, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2275, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.014529205759596472, |
|
"grad_norm": 1.1520488262176514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.0145634727543125, |
|
"grad_norm": 1.2820484638214111, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2205, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.01459773974902853, |
|
"grad_norm": 1.169291377067566, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2389, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.01463200674374456, |
|
"grad_norm": 1.1135886907577515, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2384, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.01466627373846059, |
|
"grad_norm": 1.0846205949783325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.223, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.014700540733176618, |
|
"grad_norm": 0.981488049030304, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2092, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.014734807727892648, |
|
"grad_norm": 1.0437407493591309, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2293, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.014769074722608678, |
|
"grad_norm": 1.005792260169983, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2286, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.014803341717324708, |
|
"grad_norm": 1.1903142929077148, |
|
"learning_rate": 1e-05, |
|
"loss": 0.231, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.014837608712040736, |
|
"grad_norm": 1.1308993101119995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.014871875706756766, |
|
"grad_norm": 1.0948210954666138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.213, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.014906142701472795, |
|
"grad_norm": 1.2674663066864014, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2432, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.014940409696188825, |
|
"grad_norm": 1.4228485822677612, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.014974676690904853, |
|
"grad_norm": 1.1533160209655762, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2485, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.015008943685620883, |
|
"grad_norm": 1.1454424858093262, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.015043210680336913, |
|
"grad_norm": 1.2944281101226807, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2651, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.015077477675052943, |
|
"grad_norm": 1.2148584127426147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2694, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.015111744669768971, |
|
"grad_norm": 1.091282844543457, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2672, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.015146011664485001, |
|
"grad_norm": 1.2254445552825928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2583, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.01518027865920103, |
|
"grad_norm": 1.367516279220581, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2586, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.01521454565391706, |
|
"grad_norm": 1.1858383417129517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2764, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.01524881264863309, |
|
"grad_norm": 1.1331857442855835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2577, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.015283079643349119, |
|
"grad_norm": 1.2343239784240723, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2661, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.015317346638065148, |
|
"grad_norm": 1.0893656015396118, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2538, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.015351613632781178, |
|
"grad_norm": 1.1467857360839844, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.015385880627497208, |
|
"grad_norm": 1.2753335237503052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2797, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.015420147622213236, |
|
"grad_norm": 1.1355762481689453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2672, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.015420147622213236, |
|
"eval_cer": 13.159287352266713, |
|
"eval_loss": 0.24996142089366913, |
|
"eval_normalized_cer": 9.59232613908873, |
|
"eval_runtime": 228.0477, |
|
"eval_samples_per_second": 2.245, |
|
"eval_steps_per_second": 0.035, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.015454414616929266, |
|
"grad_norm": 1.2256762981414795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2662, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.015488681611645296, |
|
"grad_norm": 1.0631389617919922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2596, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.015522948606361326, |
|
"grad_norm": 1.0759390592575073, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2553, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.015557215601077354, |
|
"grad_norm": 1.1867231130599976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2498, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.015591482595793384, |
|
"grad_norm": 1.1203633546829224, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2732, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.015625749590509413, |
|
"grad_norm": 1.1223920583724976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2535, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.015660016585225443, |
|
"grad_norm": 1.066497564315796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2456, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.015694283579941473, |
|
"grad_norm": 1.2520133256912231, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2558, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.015728550574657503, |
|
"grad_norm": 1.3602423667907715, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2698, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.01576281756937353, |
|
"grad_norm": 1.1748729944229126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2621, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.01579708456408956, |
|
"grad_norm": 0.9431802034378052, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2433, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.01583135155880559, |
|
"grad_norm": 1.0146753787994385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.01586561855352162, |
|
"grad_norm": 1.1340891122817993, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2437, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.01589988554823765, |
|
"grad_norm": 1.1456454992294312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2307, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.01593415254295368, |
|
"grad_norm": 1.1026827096939087, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2295, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.01596841953766971, |
|
"grad_norm": 1.2215088605880737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.245, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.01600268653238574, |
|
"grad_norm": 1.1760615110397339, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2461, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.016036953527101765, |
|
"grad_norm": 1.1690876483917236, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2282, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.016071220521817794, |
|
"grad_norm": 1.182026743888855, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2351, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.016105487516533824, |
|
"grad_norm": 1.0182474851608276, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.016139754511249854, |
|
"grad_norm": 1.2531431913375854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.244, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.016174021505965884, |
|
"grad_norm": 0.9633692502975464, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2297, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.016208288500681914, |
|
"grad_norm": 1.1144667863845825, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2475, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.016242555495397944, |
|
"grad_norm": 1.0768555402755737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2216, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.016276822490113974, |
|
"grad_norm": 1.2052035331726074, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2278, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.01631108948483, |
|
"grad_norm": 1.0291496515274048, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2226, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.01634535647954603, |
|
"grad_norm": 1.2100346088409424, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2278, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.01637962347426206, |
|
"grad_norm": 1.214861273765564, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2313, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.01641389046897809, |
|
"grad_norm": 1.137210726737976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.01644815746369412, |
|
"grad_norm": 1.046673059463501, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2231, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.01648242445841015, |
|
"grad_norm": 1.08164644241333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2235, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.01651669145312618, |
|
"grad_norm": 1.1432491540908813, |
|
"learning_rate": 1e-05, |
|
"loss": 0.246, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.01655095844784221, |
|
"grad_norm": 1.1684173345565796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.218, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.016585225442558235, |
|
"grad_norm": 1.0895615816116333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2109, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.016619492437274265, |
|
"grad_norm": 1.1505770683288574, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2283, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.016653759431990295, |
|
"grad_norm": 1.3385730981826782, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2344, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.016688026426706325, |
|
"grad_norm": 1.109035611152649, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2558, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.016722293421422355, |
|
"grad_norm": 1.1834880113601685, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2247, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.016756560416138384, |
|
"grad_norm": 1.2369152307510376, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.016790827410854414, |
|
"grad_norm": 1.131173014640808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.016825094405570444, |
|
"grad_norm": 1.1100351810455322, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.01685936140028647, |
|
"grad_norm": 1.1857340335845947, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2523, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.0168936283950025, |
|
"grad_norm": 1.1568819284439087, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2549, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.01692789538971853, |
|
"grad_norm": 1.104872465133667, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2449, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.01696216238443456, |
|
"grad_norm": 1.0907660722732544, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.01699642937915059, |
|
"grad_norm": 1.1100903749465942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.239, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.01703069637386662, |
|
"grad_norm": 1.141200065612793, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2459, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.01706496336858265, |
|
"grad_norm": 1.2853361368179321, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2452, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.01709923036329868, |
|
"grad_norm": 1.1542645692825317, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.017133497358014706, |
|
"grad_norm": 1.2022640705108643, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2371, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.017133497358014706, |
|
"eval_cer": 12.92115011465867, |
|
"eval_loss": 0.2521001100540161, |
|
"eval_normalized_cer": 9.30255795363709, |
|
"eval_runtime": 227.4868, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.035, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 291826, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0137951535104e+21, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|