|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 39.96271826333176, |
|
"eval_steps": 100.0, |
|
"global_step": 42360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4719207173194903, |
|
"grad_norm": 60.370758056640625, |
|
"learning_rate": 1.188e-06, |
|
"loss": 28.5881, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9438414346389806, |
|
"grad_norm": 47.990840911865234, |
|
"learning_rate": 2.3880000000000003e-06, |
|
"loss": 11.3302, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.1283783783783783, |
|
"eval_loss": 10.027889251708984, |
|
"eval_runtime": 143.8913, |
|
"eval_samples_per_second": 47.536, |
|
"eval_steps_per_second": 5.942, |
|
"eval_wer": 1.0, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.4152902312411515, |
|
"grad_norm": 40.47557067871094, |
|
"learning_rate": 3.588e-06, |
|
"loss": 9.2112, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8872109485606419, |
|
"grad_norm": 25.79726219177246, |
|
"learning_rate": 4.788e-06, |
|
"loss": 7.0068, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 1.1283487434803223, |
|
"eval_loss": 5.3823137283325195, |
|
"eval_runtime": 138.9276, |
|
"eval_samples_per_second": 49.234, |
|
"eval_steps_per_second": 6.154, |
|
"eval_wer": 1.0, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.3586597451628126, |
|
"grad_norm": 10.715571403503418, |
|
"learning_rate": 5.988e-06, |
|
"loss": 4.9098, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.830580462482303, |
|
"grad_norm": 2.4069929122924805, |
|
"learning_rate": 7.1880000000000005e-06, |
|
"loss": 3.682, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 1.1283487434803223, |
|
"eval_loss": 3.189443349838257, |
|
"eval_runtime": 133.8287, |
|
"eval_samples_per_second": 51.11, |
|
"eval_steps_per_second": 6.389, |
|
"eval_wer": 1.0, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 3.302029259084474, |
|
"grad_norm": 1.6513175964355469, |
|
"learning_rate": 8.388e-06, |
|
"loss": 3.1332, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.773949976403964, |
|
"grad_norm": 1.9935938119888306, |
|
"learning_rate": 9.588e-06, |
|
"loss": 2.725, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 1.1283783783783783, |
|
"eval_loss": 2.161285400390625, |
|
"eval_runtime": 133.493, |
|
"eval_samples_per_second": 51.239, |
|
"eval_steps_per_second": 6.405, |
|
"eval_wer": 1.0, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 4.245398773006135, |
|
"grad_norm": 2.0250744819641113, |
|
"learning_rate": 1.0787999999999999e-05, |
|
"loss": 2.2125, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.717319490325625, |
|
"grad_norm": 6.084537506103516, |
|
"learning_rate": 1.1988000000000001e-05, |
|
"loss": 1.4317, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 1.1232219061166429, |
|
"eval_loss": 0.7817353010177612, |
|
"eval_runtime": 140.0025, |
|
"eval_samples_per_second": 48.856, |
|
"eval_steps_per_second": 6.107, |
|
"eval_wer": 1.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 5.188768286927796, |
|
"grad_norm": 8.009575843811035, |
|
"learning_rate": 1.3188e-05, |
|
"loss": 0.8716, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.660689004247287, |
|
"grad_norm": 6.703843116760254, |
|
"learning_rate": 1.4388000000000002e-05, |
|
"loss": 0.5734, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 1.0337541488857278, |
|
"eval_loss": 0.2728799879550934, |
|
"eval_runtime": 137.2562, |
|
"eval_samples_per_second": 49.834, |
|
"eval_steps_per_second": 6.229, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 6.132137800849457, |
|
"grad_norm": 6.002254009246826, |
|
"learning_rate": 1.5588e-05, |
|
"loss": 0.4288, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.604058518168948, |
|
"grad_norm": 8.313450813293457, |
|
"learning_rate": 1.6788e-05, |
|
"loss": 0.3485, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 1.0258712660028448, |
|
"eval_loss": 0.16624058783054352, |
|
"eval_runtime": 68.7385, |
|
"eval_samples_per_second": 99.508, |
|
"eval_steps_per_second": 12.438, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 7.0755073147711185, |
|
"grad_norm": 2.5701239109039307, |
|
"learning_rate": 1.7988e-05, |
|
"loss": 0.2995, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.547428032090608, |
|
"grad_norm": 5.836677551269531, |
|
"learning_rate": 1.9188e-05, |
|
"loss": 0.2631, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 1.0223743480322427, |
|
"eval_loss": 0.083324134349823, |
|
"eval_runtime": 70.5926, |
|
"eval_samples_per_second": 96.894, |
|
"eval_steps_per_second": 12.112, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 8.018876828692779, |
|
"grad_norm": 5.063877105712891, |
|
"learning_rate": 2.0388e-05, |
|
"loss": 0.2388, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 8.49079754601227, |
|
"grad_norm": 3.660646677017212, |
|
"learning_rate": 2.1588e-05, |
|
"loss": 0.218, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.96271826333176, |
|
"grad_norm": 6.175236225128174, |
|
"learning_rate": 2.2788000000000003e-05, |
|
"loss": 0.2068, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 1.0209518729255571, |
|
"eval_loss": 0.06159723177552223, |
|
"eval_runtime": 68.413, |
|
"eval_samples_per_second": 99.981, |
|
"eval_steps_per_second": 12.498, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 9.43416705993393, |
|
"grad_norm": 0.2773754894733429, |
|
"learning_rate": 2.3988e-05, |
|
"loss": 0.2023, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.906087777253422, |
|
"grad_norm": 1.5312055349349976, |
|
"learning_rate": 2.5188e-05, |
|
"loss": 0.1846, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 1.0198850165955429, |
|
"eval_loss": 0.07861880213022232, |
|
"eval_runtime": 59.5147, |
|
"eval_samples_per_second": 114.929, |
|
"eval_steps_per_second": 14.366, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 10.377536573855592, |
|
"grad_norm": 6.2259297370910645, |
|
"learning_rate": 2.6388000000000002e-05, |
|
"loss": 0.1781, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 10.849457291175083, |
|
"grad_norm": 5.571898460388184, |
|
"learning_rate": 2.7585600000000002e-05, |
|
"loss": 0.1792, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 1.019825746799431, |
|
"eval_loss": 0.0471838042140007, |
|
"eval_runtime": 65.0557, |
|
"eval_samples_per_second": 105.141, |
|
"eval_steps_per_second": 13.143, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 11.320906087777253, |
|
"grad_norm": 3.878492832183838, |
|
"learning_rate": 2.87856e-05, |
|
"loss": 0.1662, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 11.792826805096745, |
|
"grad_norm": 6.27423095703125, |
|
"learning_rate": 2.99856e-05, |
|
"loss": 0.1669, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 1.0206555239449977, |
|
"eval_loss": 0.05153690651059151, |
|
"eval_runtime": 65.9815, |
|
"eval_samples_per_second": 103.665, |
|
"eval_steps_per_second": 12.958, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 12.264275601698914, |
|
"grad_norm": 3.428331136703491, |
|
"learning_rate": 2.9979744753439937e-05, |
|
"loss": 0.159, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 12.736196319018404, |
|
"grad_norm": 3.60664701461792, |
|
"learning_rate": 2.991821289408868e-05, |
|
"loss": 0.1609, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 1.0218705547652915, |
|
"eval_loss": 0.05291549116373062, |
|
"eval_runtime": 81.1439, |
|
"eval_samples_per_second": 84.295, |
|
"eval_steps_per_second": 10.537, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 13.207645115620576, |
|
"grad_norm": 3.0601279735565186, |
|
"learning_rate": 2.981532510892707e-05, |
|
"loss": 0.156, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 13.679565832940066, |
|
"grad_norm": 1.1705658435821533, |
|
"learning_rate": 2.9671447959906427e-05, |
|
"loss": 0.1508, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 1.0183143669985775, |
|
"eval_loss": 0.043218065053224564, |
|
"eval_runtime": 72.3333, |
|
"eval_samples_per_second": 94.562, |
|
"eval_steps_per_second": 11.82, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 14.151014629542237, |
|
"grad_norm": 3.3974997997283936, |
|
"learning_rate": 2.9486979510030355e-05, |
|
"loss": 0.1454, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 14.622935346861727, |
|
"grad_norm": 1.8044121265411377, |
|
"learning_rate": 2.9262430125717204e-05, |
|
"loss": 0.1427, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 1.0147878141299194, |
|
"eval_loss": 0.08599487692117691, |
|
"eval_runtime": 59.8056, |
|
"eval_samples_per_second": 114.37, |
|
"eval_steps_per_second": 14.296, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 15.094384143463898, |
|
"grad_norm": 2.6493678092956543, |
|
"learning_rate": 2.899842106477605e-05, |
|
"loss": 0.1398, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 15.566304860783388, |
|
"grad_norm": 4.24639892578125, |
|
"learning_rate": 2.869568275758094e-05, |
|
"loss": 0.1316, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 1.0185218112849692, |
|
"eval_loss": 0.0349772572517395, |
|
"eval_runtime": 60.1896, |
|
"eval_samples_per_second": 113.641, |
|
"eval_steps_per_second": 14.205, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 16.037753657385558, |
|
"grad_norm": 4.061686992645264, |
|
"learning_rate": 2.8355052786198856e-05, |
|
"loss": 0.1306, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 16.50967437470505, |
|
"grad_norm": 2.1003165245056152, |
|
"learning_rate": 2.7977473567062487e-05, |
|
"loss": 0.1248, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 16.98159509202454, |
|
"grad_norm": 3.3991761207580566, |
|
"learning_rate": 2.7563989743599158e-05, |
|
"loss": 0.1296, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 1.0190552394499763, |
|
"eval_loss": 0.04494578763842583, |
|
"eval_runtime": 86.2502, |
|
"eval_samples_per_second": 79.304, |
|
"eval_steps_per_second": 9.913, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 18020 |
|
}, |
|
{ |
|
"epoch": 17.45304388862671, |
|
"grad_norm": 3.5397164821624756, |
|
"learning_rate": 2.7116675667417045e-05, |
|
"loss": 0.1239, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 17.9249646059462, |
|
"grad_norm": 3.1016619205474854, |
|
"learning_rate": 2.663497648875447e-05, |
|
"loss": 0.1236, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 1.0170400663821717, |
|
"eval_loss": 0.04502090439200401, |
|
"eval_runtime": 67.5469, |
|
"eval_samples_per_second": 101.263, |
|
"eval_steps_per_second": 12.658, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 19080 |
|
}, |
|
{ |
|
"epoch": 18.396413402548372, |
|
"grad_norm": 1.3774545192718506, |
|
"learning_rate": 2.6121086974660007e-05, |
|
"loss": 0.1168, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 18.86833411986786, |
|
"grad_norm": 3.927860736846924, |
|
"learning_rate": 2.557642889644962e-05, |
|
"loss": 0.1149, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 1.018136557610242, |
|
"eval_loss": 0.03580431640148163, |
|
"eval_runtime": 62.0438, |
|
"eval_samples_per_second": 110.245, |
|
"eval_steps_per_second": 13.781, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 20140 |
|
}, |
|
{ |
|
"epoch": 19.339782916470032, |
|
"grad_norm": 1.4613689184188843, |
|
"learning_rate": 2.500368515269978e-05, |
|
"loss": 0.1097, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 19.811703633789524, |
|
"grad_norm": 1.7840094566345215, |
|
"learning_rate": 2.440214531030174e-05, |
|
"loss": 0.1149, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 1.0176327643432908, |
|
"eval_loss": 0.0317009836435318, |
|
"eval_runtime": 64.2537, |
|
"eval_samples_per_second": 106.453, |
|
"eval_steps_per_second": 13.307, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 20.283152430391695, |
|
"grad_norm": 9.317741394042969, |
|
"learning_rate": 2.3774592676935842e-05, |
|
"loss": 0.1096, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 20.755073147711183, |
|
"grad_norm": 2.2778842449188232, |
|
"learning_rate": 2.3122763494165503e-05, |
|
"loss": 0.106, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 1.0170104314841157, |
|
"eval_loss": 0.03687233105301857, |
|
"eval_runtime": 70.0875, |
|
"eval_samples_per_second": 97.592, |
|
"eval_steps_per_second": 12.199, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 22260 |
|
}, |
|
{ |
|
"epoch": 21.226521944313355, |
|
"grad_norm": 5.864170551300049, |
|
"learning_rate": 2.2449830971856908e-05, |
|
"loss": 0.1042, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 21.698442661632846, |
|
"grad_norm": 1.5493875741958618, |
|
"learning_rate": 2.1754960402430945e-05, |
|
"loss": 0.102, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 1.018018018018018, |
|
"eval_loss": 0.034220367670059204, |
|
"eval_runtime": 60.8599, |
|
"eval_samples_per_second": 112.389, |
|
"eval_steps_per_second": 14.049, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 23320 |
|
}, |
|
{ |
|
"epoch": 22.169891458235018, |
|
"grad_norm": 2.6867339611053467, |
|
"learning_rate": 2.1041400973479267e-05, |
|
"loss": 0.099, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 22.641812175554506, |
|
"grad_norm": 6.666200637817383, |
|
"learning_rate": 2.031112688042812e-05, |
|
"loss": 0.1011, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 1.0178994784257942, |
|
"eval_loss": 0.04105741158127785, |
|
"eval_runtime": 63.1164, |
|
"eval_samples_per_second": 108.371, |
|
"eval_steps_per_second": 13.546, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 24380 |
|
}, |
|
{ |
|
"epoch": 23.113260972156677, |
|
"grad_norm": 2.3484959602355957, |
|
"learning_rate": 1.956766180067445e-05, |
|
"loss": 0.0991, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 23.58518168947617, |
|
"grad_norm": 3.396759510040283, |
|
"learning_rate": 1.881008353783706e-05, |
|
"loss": 0.0948, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 1.0163288288288288, |
|
"eval_loss": 0.031411658972501755, |
|
"eval_runtime": 60.6777, |
|
"eval_samples_per_second": 112.727, |
|
"eval_steps_per_second": 14.091, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 25440 |
|
}, |
|
{ |
|
"epoch": 24.05663048607834, |
|
"grad_norm": 0.44371920824050903, |
|
"learning_rate": 1.8041963967312025e-05, |
|
"loss": 0.0963, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 24.52855120339783, |
|
"grad_norm": 2.3664116859436035, |
|
"learning_rate": 1.7265428235347026e-05, |
|
"loss": 0.0906, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 2.171415090560913, |
|
"learning_rate": 1.6482624773072205e-05, |
|
"loss": 0.0934, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 1.017454954954955, |
|
"eval_loss": 0.030156882479786873, |
|
"eval_runtime": 61.4594, |
|
"eval_samples_per_second": 111.293, |
|
"eval_steps_per_second": 13.912, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 25.47192071731949, |
|
"grad_norm": 3.737889051437378, |
|
"learning_rate": 1.5697295811286228e-05, |
|
"loss": 0.0844, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 25.94384143463898, |
|
"grad_norm": 5.188897609710693, |
|
"learning_rate": 1.4908467225519327e-05, |
|
"loss": 0.0843, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 1.0172475106685632, |
|
"eval_loss": 0.04401924088597298, |
|
"eval_runtime": 59.6879, |
|
"eval_samples_per_second": 114.596, |
|
"eval_steps_per_second": 14.325, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 27560 |
|
}, |
|
{ |
|
"epoch": 26.41529023124115, |
|
"grad_norm": 3.3139688968658447, |
|
"learning_rate": 1.4119891882267897e-05, |
|
"loss": 0.0789, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 26.887210948560643, |
|
"grad_norm": 3.2081096172332764, |
|
"learning_rate": 1.3333751522495247e-05, |
|
"loss": 0.0833, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 1.016743717401612, |
|
"eval_loss": 0.034057337790727615, |
|
"eval_runtime": 60.1277, |
|
"eval_samples_per_second": 113.758, |
|
"eval_steps_per_second": 14.22, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 28620 |
|
}, |
|
{ |
|
"epoch": 27.35865974516281, |
|
"grad_norm": 1.3224974870681763, |
|
"learning_rate": 1.2552221150328132e-05, |
|
"loss": 0.0749, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 27.830580462482303, |
|
"grad_norm": 0.9007667303085327, |
|
"learning_rate": 1.1779004344640104e-05, |
|
"loss": 0.0781, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 1.0157064959696538, |
|
"eval_loss": 0.05653638020157814, |
|
"eval_runtime": 63.0029, |
|
"eval_samples_per_second": 108.566, |
|
"eval_steps_per_second": 13.571, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 29680 |
|
}, |
|
{ |
|
"epoch": 28.302029259084474, |
|
"grad_norm": 1.9431039094924927, |
|
"learning_rate": 1.101314200491722e-05, |
|
"loss": 0.072, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 28.773949976403966, |
|
"grad_norm": 0.08316487073898315, |
|
"learning_rate": 1.0258310052432775e-05, |
|
"loss": 0.0741, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 1.0160917496443813, |
|
"eval_loss": 0.03570393845438957, |
|
"eval_runtime": 61.5019, |
|
"eval_samples_per_second": 111.216, |
|
"eval_steps_per_second": 13.902, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 30740 |
|
}, |
|
{ |
|
"epoch": 29.245398773006134, |
|
"grad_norm": 0.031967077404260635, |
|
"learning_rate": 9.516596870757504e-06, |
|
"loss": 0.0717, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 29.717319490325625, |
|
"grad_norm": 0.1671179085969925, |
|
"learning_rate": 8.79005454793263e-06, |
|
"loss": 0.0704, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 1.016239924134661, |
|
"eval_loss": 0.030619077384471893, |
|
"eval_runtime": 59.7711, |
|
"eval_samples_per_second": 114.437, |
|
"eval_steps_per_second": 14.305, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 30.188768286927797, |
|
"grad_norm": 4.69877815246582, |
|
"learning_rate": 8.080693198985019e-06, |
|
"loss": 0.0723, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 30.660689004247285, |
|
"grad_norm": 1.8107829093933105, |
|
"learning_rate": 7.39319559220429e-06, |
|
"loss": 0.0678, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 1.0159435751541015, |
|
"eval_loss": 0.02890847437083721, |
|
"eval_runtime": 62.096, |
|
"eval_samples_per_second": 110.152, |
|
"eval_steps_per_second": 13.769, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 32860 |
|
}, |
|
{ |
|
"epoch": 31.132137800849456, |
|
"grad_norm": 2.0504305362701416, |
|
"learning_rate": 6.7239430275857026e-06, |
|
"loss": 0.0628, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 31.604058518168948, |
|
"grad_norm": 3.536367654800415, |
|
"learning_rate": 6.077587720237289e-06, |
|
"loss": 0.067, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 1.0158843053579896, |
|
"eval_loss": 0.027865121141076088, |
|
"eval_runtime": 60.0098, |
|
"eval_samples_per_second": 113.981, |
|
"eval_steps_per_second": 14.248, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 33920 |
|
}, |
|
{ |
|
"epoch": 32.075507314771116, |
|
"grad_norm": 0.30863332748413086, |
|
"learning_rate": 5.4559179328316745e-06, |
|
"loss": 0.0617, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 32.54742803209061, |
|
"grad_norm": 1.2579513788223267, |
|
"learning_rate": 4.860653630940242e-06, |
|
"loss": 0.0641, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 1.0159732100521575, |
|
"eval_loss": 0.032531462609767914, |
|
"eval_runtime": 61.6714, |
|
"eval_samples_per_second": 110.91, |
|
"eval_steps_per_second": 13.864, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 34980 |
|
}, |
|
{ |
|
"epoch": 33.01887682869278, |
|
"grad_norm": 1.1170421838760376, |
|
"learning_rate": 4.293441724427136e-06, |
|
"loss": 0.0582, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 33.49079754601227, |
|
"grad_norm": 2.8375914096832275, |
|
"learning_rate": 3.7558515109655663e-06, |
|
"loss": 0.0609, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 33.96271826333176, |
|
"grad_norm": 0.19227160513401031, |
|
"learning_rate": 3.249370334282695e-06, |
|
"loss": 0.0595, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 1.0166251778093884, |
|
"eval_loss": 0.033033788204193115, |
|
"eval_runtime": 59.4597, |
|
"eval_samples_per_second": 115.036, |
|
"eval_steps_per_second": 14.379, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 36040 |
|
}, |
|
{ |
|
"epoch": 34.434167059933934, |
|
"grad_norm": 1.7270156145095825, |
|
"learning_rate": 2.775399469145443e-06, |
|
"loss": 0.0614, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 34.90608777725342, |
|
"grad_norm": 0.2758292257785797, |
|
"learning_rate": 2.336095963596342e-06, |
|
"loss": 0.055, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 1.0164177335229967, |
|
"eval_loss": 0.030884480103850365, |
|
"eval_runtime": 58.7794, |
|
"eval_samples_per_second": 116.367, |
|
"eval_steps_per_second": 14.546, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 35.37753657385559, |
|
"grad_norm": 2.685821771621704, |
|
"learning_rate": 1.9309149042102644e-06, |
|
"loss": 0.0564, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 35.849457291175085, |
|
"grad_norm": 2.9631125926971436, |
|
"learning_rate": 1.5618919095549582e-06, |
|
"loss": 0.055, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 1.016714082503556, |
|
"eval_loss": 0.03032144159078598, |
|
"eval_runtime": 71.6691, |
|
"eval_samples_per_second": 95.439, |
|
"eval_steps_per_second": 11.93, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 38160 |
|
}, |
|
{ |
|
"epoch": 36.32090608777725, |
|
"grad_norm": 0.7306509613990784, |
|
"learning_rate": 1.2300479506647383e-06, |
|
"loss": 0.0554, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 36.792826805096745, |
|
"grad_norm": 1.8589800596237183, |
|
"learning_rate": 9.363011358278406e-07, |
|
"loss": 0.0555, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_cer": 1.0165955429113325, |
|
"eval_loss": 0.03112892434000969, |
|
"eval_runtime": 70.4338, |
|
"eval_samples_per_second": 97.112, |
|
"eval_steps_per_second": 12.139, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 39220 |
|
}, |
|
{ |
|
"epoch": 37.26427560169891, |
|
"grad_norm": 1.8065155744552612, |
|
"learning_rate": 6.814641704683633e-07, |
|
"loss": 0.0568, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 37.736196319018404, |
|
"grad_norm": 1.0842024087905884, |
|
"learning_rate": 4.6663260289360743e-07, |
|
"loss": 0.0533, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_cer": 1.0165659080132765, |
|
"eval_loss": 0.031128019094467163, |
|
"eval_runtime": 62.073, |
|
"eval_samples_per_second": 110.193, |
|
"eval_steps_per_second": 13.774, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 40280 |
|
}, |
|
{ |
|
"epoch": 38.20764511562058, |
|
"grad_norm": 1.8371340036392212, |
|
"learning_rate": 2.915399553636205e-07, |
|
"loss": 0.0571, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 38.679565832940064, |
|
"grad_norm": 1.5724213123321533, |
|
"learning_rate": 1.5714100953393373e-07, |
|
"loss": 0.0528, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_cer": 1.0165659080132765, |
|
"eval_loss": 0.031000742688775063, |
|
"eval_runtime": 61.7575, |
|
"eval_samples_per_second": 110.756, |
|
"eval_steps_per_second": 13.844, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 41340 |
|
}, |
|
{ |
|
"epoch": 39.15101462954224, |
|
"grad_norm": 1.2510491609573364, |
|
"learning_rate": 6.380760518884532e-08, |
|
"loss": 0.0568, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 39.62293534686173, |
|
"grad_norm": 0.06529413163661957, |
|
"learning_rate": 1.1860630053380229e-08, |
|
"loss": 0.0509, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 39.96271826333176, |
|
"eval_cer": 1.0165659080132765, |
|
"eval_loss": 0.03096253238618374, |
|
"eval_runtime": 109.6526, |
|
"eval_samples_per_second": 62.379, |
|
"eval_steps_per_second": 7.797, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 42360 |
|
}, |
|
{ |
|
"epoch": 39.96271826333176, |
|
"step": 42360, |
|
"total_flos": 2.2101401353322127e+19, |
|
"train_loss": 0.9936401144753547, |
|
"train_runtime": 36758.8562, |
|
"train_samples_per_second": 73.778, |
|
"train_steps_per_second": 1.152 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 42360, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.2101401353322127e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|