Hubert-kakeiken-W-closed_add / trainer_state.json
utakumi's picture
End of training
b37bc73 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 39.96271826333176,
"eval_steps": 100.0,
"global_step": 42360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4719207173194903,
"grad_norm": 60.370758056640625,
"learning_rate": 1.188e-06,
"loss": 28.5881,
"step": 500
},
{
"epoch": 0.9438414346389806,
"grad_norm": 47.990840911865234,
"learning_rate": 2.3880000000000003e-06,
"loss": 11.3302,
"step": 1000
},
{
"epoch": 1.0,
"eval_cer": 1.1283783783783783,
"eval_loss": 10.027889251708984,
"eval_runtime": 143.8913,
"eval_samples_per_second": 47.536,
"eval_steps_per_second": 5.942,
"eval_wer": 1.0,
"step": 1060
},
{
"epoch": 1.4152902312411515,
"grad_norm": 40.47557067871094,
"learning_rate": 3.588e-06,
"loss": 9.2112,
"step": 1500
},
{
"epoch": 1.8872109485606419,
"grad_norm": 25.79726219177246,
"learning_rate": 4.788e-06,
"loss": 7.0068,
"step": 2000
},
{
"epoch": 2.0,
"eval_cer": 1.1283487434803223,
"eval_loss": 5.3823137283325195,
"eval_runtime": 138.9276,
"eval_samples_per_second": 49.234,
"eval_steps_per_second": 6.154,
"eval_wer": 1.0,
"step": 2120
},
{
"epoch": 2.3586597451628126,
"grad_norm": 10.715571403503418,
"learning_rate": 5.988e-06,
"loss": 4.9098,
"step": 2500
},
{
"epoch": 2.830580462482303,
"grad_norm": 2.4069929122924805,
"learning_rate": 7.1880000000000005e-06,
"loss": 3.682,
"step": 3000
},
{
"epoch": 3.0,
"eval_cer": 1.1283487434803223,
"eval_loss": 3.189443349838257,
"eval_runtime": 133.8287,
"eval_samples_per_second": 51.11,
"eval_steps_per_second": 6.389,
"eval_wer": 1.0,
"step": 3180
},
{
"epoch": 3.302029259084474,
"grad_norm": 1.6513175964355469,
"learning_rate": 8.388e-06,
"loss": 3.1332,
"step": 3500
},
{
"epoch": 3.773949976403964,
"grad_norm": 1.9935938119888306,
"learning_rate": 9.588e-06,
"loss": 2.725,
"step": 4000
},
{
"epoch": 4.0,
"eval_cer": 1.1283783783783783,
"eval_loss": 2.161285400390625,
"eval_runtime": 133.493,
"eval_samples_per_second": 51.239,
"eval_steps_per_second": 6.405,
"eval_wer": 1.0,
"step": 4240
},
{
"epoch": 4.245398773006135,
"grad_norm": 2.0250744819641113,
"learning_rate": 1.0787999999999999e-05,
"loss": 2.2125,
"step": 4500
},
{
"epoch": 4.717319490325625,
"grad_norm": 6.084537506103516,
"learning_rate": 1.1988000000000001e-05,
"loss": 1.4317,
"step": 5000
},
{
"epoch": 5.0,
"eval_cer": 1.1232219061166429,
"eval_loss": 0.7817353010177612,
"eval_runtime": 140.0025,
"eval_samples_per_second": 48.856,
"eval_steps_per_second": 6.107,
"eval_wer": 1.0,
"step": 5300
},
{
"epoch": 5.188768286927796,
"grad_norm": 8.009575843811035,
"learning_rate": 1.3188e-05,
"loss": 0.8716,
"step": 5500
},
{
"epoch": 5.660689004247287,
"grad_norm": 6.703843116760254,
"learning_rate": 1.4388000000000002e-05,
"loss": 0.5734,
"step": 6000
},
{
"epoch": 6.0,
"eval_cer": 1.0337541488857278,
"eval_loss": 0.2728799879550934,
"eval_runtime": 137.2562,
"eval_samples_per_second": 49.834,
"eval_steps_per_second": 6.229,
"eval_wer": 0.9991228070175439,
"step": 6360
},
{
"epoch": 6.132137800849457,
"grad_norm": 6.002254009246826,
"learning_rate": 1.5588e-05,
"loss": 0.4288,
"step": 6500
},
{
"epoch": 6.604058518168948,
"grad_norm": 8.313450813293457,
"learning_rate": 1.6788e-05,
"loss": 0.3485,
"step": 7000
},
{
"epoch": 7.0,
"eval_cer": 1.0258712660028448,
"eval_loss": 0.16624058783054352,
"eval_runtime": 68.7385,
"eval_samples_per_second": 99.508,
"eval_steps_per_second": 12.438,
"eval_wer": 0.9988304093567252,
"step": 7420
},
{
"epoch": 7.0755073147711185,
"grad_norm": 2.5701239109039307,
"learning_rate": 1.7988e-05,
"loss": 0.2995,
"step": 7500
},
{
"epoch": 7.547428032090608,
"grad_norm": 5.836677551269531,
"learning_rate": 1.9188e-05,
"loss": 0.2631,
"step": 8000
},
{
"epoch": 8.0,
"eval_cer": 1.0223743480322427,
"eval_loss": 0.083324134349823,
"eval_runtime": 70.5926,
"eval_samples_per_second": 96.894,
"eval_steps_per_second": 12.112,
"eval_wer": 0.9988304093567252,
"step": 8480
},
{
"epoch": 8.018876828692779,
"grad_norm": 5.063877105712891,
"learning_rate": 2.0388e-05,
"loss": 0.2388,
"step": 8500
},
{
"epoch": 8.49079754601227,
"grad_norm": 3.660646677017212,
"learning_rate": 2.1588e-05,
"loss": 0.218,
"step": 9000
},
{
"epoch": 8.96271826333176,
"grad_norm": 6.175236225128174,
"learning_rate": 2.2788000000000003e-05,
"loss": 0.2068,
"step": 9500
},
{
"epoch": 9.0,
"eval_cer": 1.0209518729255571,
"eval_loss": 0.06159723177552223,
"eval_runtime": 68.413,
"eval_samples_per_second": 99.981,
"eval_steps_per_second": 12.498,
"eval_wer": 0.9989766081871345,
"step": 9540
},
{
"epoch": 9.43416705993393,
"grad_norm": 0.2773754894733429,
"learning_rate": 2.3988e-05,
"loss": 0.2023,
"step": 10000
},
{
"epoch": 9.906087777253422,
"grad_norm": 1.5312055349349976,
"learning_rate": 2.5188e-05,
"loss": 0.1846,
"step": 10500
},
{
"epoch": 10.0,
"eval_cer": 1.0198850165955429,
"eval_loss": 0.07861880213022232,
"eval_runtime": 59.5147,
"eval_samples_per_second": 114.929,
"eval_steps_per_second": 14.366,
"eval_wer": 0.9988304093567252,
"step": 10600
},
{
"epoch": 10.377536573855592,
"grad_norm": 6.2259297370910645,
"learning_rate": 2.6388000000000002e-05,
"loss": 0.1781,
"step": 11000
},
{
"epoch": 10.849457291175083,
"grad_norm": 5.571898460388184,
"learning_rate": 2.7585600000000002e-05,
"loss": 0.1792,
"step": 11500
},
{
"epoch": 11.0,
"eval_cer": 1.019825746799431,
"eval_loss": 0.0471838042140007,
"eval_runtime": 65.0557,
"eval_samples_per_second": 105.141,
"eval_steps_per_second": 13.143,
"eval_wer": 0.9989766081871345,
"step": 11660
},
{
"epoch": 11.320906087777253,
"grad_norm": 3.878492832183838,
"learning_rate": 2.87856e-05,
"loss": 0.1662,
"step": 12000
},
{
"epoch": 11.792826805096745,
"grad_norm": 6.27423095703125,
"learning_rate": 2.99856e-05,
"loss": 0.1669,
"step": 12500
},
{
"epoch": 12.0,
"eval_cer": 1.0206555239449977,
"eval_loss": 0.05153690651059151,
"eval_runtime": 65.9815,
"eval_samples_per_second": 103.665,
"eval_steps_per_second": 12.958,
"eval_wer": 0.9988304093567252,
"step": 12720
},
{
"epoch": 12.264275601698914,
"grad_norm": 3.428331136703491,
"learning_rate": 2.9979744753439937e-05,
"loss": 0.159,
"step": 13000
},
{
"epoch": 12.736196319018404,
"grad_norm": 3.60664701461792,
"learning_rate": 2.991821289408868e-05,
"loss": 0.1609,
"step": 13500
},
{
"epoch": 13.0,
"eval_cer": 1.0218705547652915,
"eval_loss": 0.05291549116373062,
"eval_runtime": 81.1439,
"eval_samples_per_second": 84.295,
"eval_steps_per_second": 10.537,
"eval_wer": 0.9988304093567252,
"step": 13780
},
{
"epoch": 13.207645115620576,
"grad_norm": 3.0601279735565186,
"learning_rate": 2.981532510892707e-05,
"loss": 0.156,
"step": 14000
},
{
"epoch": 13.679565832940066,
"grad_norm": 1.1705658435821533,
"learning_rate": 2.9671447959906427e-05,
"loss": 0.1508,
"step": 14500
},
{
"epoch": 14.0,
"eval_cer": 1.0183143669985775,
"eval_loss": 0.043218065053224564,
"eval_runtime": 72.3333,
"eval_samples_per_second": 94.562,
"eval_steps_per_second": 11.82,
"eval_wer": 0.9988304093567252,
"step": 14840
},
{
"epoch": 14.151014629542237,
"grad_norm": 3.3974997997283936,
"learning_rate": 2.9486979510030355e-05,
"loss": 0.1454,
"step": 15000
},
{
"epoch": 14.622935346861727,
"grad_norm": 1.8044121265411377,
"learning_rate": 2.9262430125717204e-05,
"loss": 0.1427,
"step": 15500
},
{
"epoch": 15.0,
"eval_cer": 1.0147878141299194,
"eval_loss": 0.08599487692117691,
"eval_runtime": 59.8056,
"eval_samples_per_second": 114.37,
"eval_steps_per_second": 14.296,
"eval_wer": 0.9988304093567252,
"step": 15900
},
{
"epoch": 15.094384143463898,
"grad_norm": 2.6493678092956543,
"learning_rate": 2.899842106477605e-05,
"loss": 0.1398,
"step": 16000
},
{
"epoch": 15.566304860783388,
"grad_norm": 4.24639892578125,
"learning_rate": 2.869568275758094e-05,
"loss": 0.1316,
"step": 16500
},
{
"epoch": 16.0,
"eval_cer": 1.0185218112849692,
"eval_loss": 0.0349772572517395,
"eval_runtime": 60.1896,
"eval_samples_per_second": 113.641,
"eval_steps_per_second": 14.205,
"eval_wer": 0.9988304093567252,
"step": 16960
},
{
"epoch": 16.037753657385558,
"grad_norm": 4.061686992645264,
"learning_rate": 2.8355052786198856e-05,
"loss": 0.1306,
"step": 17000
},
{
"epoch": 16.50967437470505,
"grad_norm": 2.1003165245056152,
"learning_rate": 2.7977473567062487e-05,
"loss": 0.1248,
"step": 17500
},
{
"epoch": 16.98159509202454,
"grad_norm": 3.3991761207580566,
"learning_rate": 2.7563989743599158e-05,
"loss": 0.1296,
"step": 18000
},
{
"epoch": 17.0,
"eval_cer": 1.0190552394499763,
"eval_loss": 0.04494578763842583,
"eval_runtime": 86.2502,
"eval_samples_per_second": 79.304,
"eval_steps_per_second": 9.913,
"eval_wer": 0.9988304093567252,
"step": 18020
},
{
"epoch": 17.45304388862671,
"grad_norm": 3.5397164821624756,
"learning_rate": 2.7116675667417045e-05,
"loss": 0.1239,
"step": 18500
},
{
"epoch": 17.9249646059462,
"grad_norm": 3.1016619205474854,
"learning_rate": 2.663497648875447e-05,
"loss": 0.1236,
"step": 19000
},
{
"epoch": 18.0,
"eval_cer": 1.0170400663821717,
"eval_loss": 0.04502090439200401,
"eval_runtime": 67.5469,
"eval_samples_per_second": 101.263,
"eval_steps_per_second": 12.658,
"eval_wer": 0.9988304093567252,
"step": 19080
},
{
"epoch": 18.396413402548372,
"grad_norm": 1.3774545192718506,
"learning_rate": 2.6121086974660007e-05,
"loss": 0.1168,
"step": 19500
},
{
"epoch": 18.86833411986786,
"grad_norm": 3.927860736846924,
"learning_rate": 2.557642889644962e-05,
"loss": 0.1149,
"step": 20000
},
{
"epoch": 19.0,
"eval_cer": 1.018136557610242,
"eval_loss": 0.03580431640148163,
"eval_runtime": 62.0438,
"eval_samples_per_second": 110.245,
"eval_steps_per_second": 13.781,
"eval_wer": 0.9989766081871345,
"step": 20140
},
{
"epoch": 19.339782916470032,
"grad_norm": 1.4613689184188843,
"learning_rate": 2.500368515269978e-05,
"loss": 0.1097,
"step": 20500
},
{
"epoch": 19.811703633789524,
"grad_norm": 1.7840094566345215,
"learning_rate": 2.440214531030174e-05,
"loss": 0.1149,
"step": 21000
},
{
"epoch": 20.0,
"eval_cer": 1.0176327643432908,
"eval_loss": 0.0317009836435318,
"eval_runtime": 64.2537,
"eval_samples_per_second": 106.453,
"eval_steps_per_second": 13.307,
"eval_wer": 0.9989766081871345,
"step": 21200
},
{
"epoch": 20.283152430391695,
"grad_norm": 9.317741394042969,
"learning_rate": 2.3774592676935842e-05,
"loss": 0.1096,
"step": 21500
},
{
"epoch": 20.755073147711183,
"grad_norm": 2.2778842449188232,
"learning_rate": 2.3122763494165503e-05,
"loss": 0.106,
"step": 22000
},
{
"epoch": 21.0,
"eval_cer": 1.0170104314841157,
"eval_loss": 0.03687233105301857,
"eval_runtime": 70.0875,
"eval_samples_per_second": 97.592,
"eval_steps_per_second": 12.199,
"eval_wer": 0.9988304093567252,
"step": 22260
},
{
"epoch": 21.226521944313355,
"grad_norm": 5.864170551300049,
"learning_rate": 2.2449830971856908e-05,
"loss": 0.1042,
"step": 22500
},
{
"epoch": 21.698442661632846,
"grad_norm": 1.5493875741958618,
"learning_rate": 2.1754960402430945e-05,
"loss": 0.102,
"step": 23000
},
{
"epoch": 22.0,
"eval_cer": 1.018018018018018,
"eval_loss": 0.034220367670059204,
"eval_runtime": 60.8599,
"eval_samples_per_second": 112.389,
"eval_steps_per_second": 14.049,
"eval_wer": 0.9988304093567252,
"step": 23320
},
{
"epoch": 22.169891458235018,
"grad_norm": 2.6867339611053467,
"learning_rate": 2.1041400973479267e-05,
"loss": 0.099,
"step": 23500
},
{
"epoch": 22.641812175554506,
"grad_norm": 6.666200637817383,
"learning_rate": 2.031112688042812e-05,
"loss": 0.1011,
"step": 24000
},
{
"epoch": 23.0,
"eval_cer": 1.0178994784257942,
"eval_loss": 0.04105741158127785,
"eval_runtime": 63.1164,
"eval_samples_per_second": 108.371,
"eval_steps_per_second": 13.546,
"eval_wer": 0.9988304093567252,
"step": 24380
},
{
"epoch": 23.113260972156677,
"grad_norm": 2.3484959602355957,
"learning_rate": 1.956766180067445e-05,
"loss": 0.0991,
"step": 24500
},
{
"epoch": 23.58518168947617,
"grad_norm": 3.396759510040283,
"learning_rate": 1.881008353783706e-05,
"loss": 0.0948,
"step": 25000
},
{
"epoch": 24.0,
"eval_cer": 1.0163288288288288,
"eval_loss": 0.031411658972501755,
"eval_runtime": 60.6777,
"eval_samples_per_second": 112.727,
"eval_steps_per_second": 14.091,
"eval_wer": 0.9988304093567252,
"step": 25440
},
{
"epoch": 24.05663048607834,
"grad_norm": 0.44371920824050903,
"learning_rate": 1.8041963967312025e-05,
"loss": 0.0963,
"step": 25500
},
{
"epoch": 24.52855120339783,
"grad_norm": 2.3664116859436035,
"learning_rate": 1.7265428235347026e-05,
"loss": 0.0906,
"step": 26000
},
{
"epoch": 25.0,
"grad_norm": 2.171415090560913,
"learning_rate": 1.6482624773072205e-05,
"loss": 0.0934,
"step": 26500
},
{
"epoch": 25.0,
"eval_cer": 1.017454954954955,
"eval_loss": 0.030156882479786873,
"eval_runtime": 61.4594,
"eval_samples_per_second": 111.293,
"eval_steps_per_second": 13.912,
"eval_wer": 0.9988304093567252,
"step": 26500
},
{
"epoch": 25.47192071731949,
"grad_norm": 3.737889051437378,
"learning_rate": 1.5697295811286228e-05,
"loss": 0.0844,
"step": 27000
},
{
"epoch": 25.94384143463898,
"grad_norm": 5.188897609710693,
"learning_rate": 1.4908467225519327e-05,
"loss": 0.0843,
"step": 27500
},
{
"epoch": 26.0,
"eval_cer": 1.0172475106685632,
"eval_loss": 0.04401924088597298,
"eval_runtime": 59.6879,
"eval_samples_per_second": 114.596,
"eval_steps_per_second": 14.325,
"eval_wer": 0.9988304093567252,
"step": 27560
},
{
"epoch": 26.41529023124115,
"grad_norm": 3.3139688968658447,
"learning_rate": 1.4119891882267897e-05,
"loss": 0.0789,
"step": 28000
},
{
"epoch": 26.887210948560643,
"grad_norm": 3.2081096172332764,
"learning_rate": 1.3333751522495247e-05,
"loss": 0.0833,
"step": 28500
},
{
"epoch": 27.0,
"eval_cer": 1.016743717401612,
"eval_loss": 0.034057337790727615,
"eval_runtime": 60.1277,
"eval_samples_per_second": 113.758,
"eval_steps_per_second": 14.22,
"eval_wer": 0.9988304093567252,
"step": 28620
},
{
"epoch": 27.35865974516281,
"grad_norm": 1.3224974870681763,
"learning_rate": 1.2552221150328132e-05,
"loss": 0.0749,
"step": 29000
},
{
"epoch": 27.830580462482303,
"grad_norm": 0.9007667303085327,
"learning_rate": 1.1779004344640104e-05,
"loss": 0.0781,
"step": 29500
},
{
"epoch": 28.0,
"eval_cer": 1.0157064959696538,
"eval_loss": 0.05653638020157814,
"eval_runtime": 63.0029,
"eval_samples_per_second": 108.566,
"eval_steps_per_second": 13.571,
"eval_wer": 0.9988304093567252,
"step": 29680
},
{
"epoch": 28.302029259084474,
"grad_norm": 1.9431039094924927,
"learning_rate": 1.101314200491722e-05,
"loss": 0.072,
"step": 30000
},
{
"epoch": 28.773949976403966,
"grad_norm": 0.08316487073898315,
"learning_rate": 1.0258310052432775e-05,
"loss": 0.0741,
"step": 30500
},
{
"epoch": 29.0,
"eval_cer": 1.0160917496443813,
"eval_loss": 0.03570393845438957,
"eval_runtime": 61.5019,
"eval_samples_per_second": 111.216,
"eval_steps_per_second": 13.902,
"eval_wer": 0.9988304093567252,
"step": 30740
},
{
"epoch": 29.245398773006134,
"grad_norm": 0.031967077404260635,
"learning_rate": 9.516596870757504e-06,
"loss": 0.0717,
"step": 31000
},
{
"epoch": 29.717319490325625,
"grad_norm": 0.1671179085969925,
"learning_rate": 8.79005454793263e-06,
"loss": 0.0704,
"step": 31500
},
{
"epoch": 30.0,
"eval_cer": 1.016239924134661,
"eval_loss": 0.030619077384471893,
"eval_runtime": 59.7711,
"eval_samples_per_second": 114.437,
"eval_steps_per_second": 14.305,
"eval_wer": 0.9988304093567252,
"step": 31800
},
{
"epoch": 30.188768286927797,
"grad_norm": 4.69877815246582,
"learning_rate": 8.080693198985019e-06,
"loss": 0.0723,
"step": 32000
},
{
"epoch": 30.660689004247285,
"grad_norm": 1.8107829093933105,
"learning_rate": 7.39319559220429e-06,
"loss": 0.0678,
"step": 32500
},
{
"epoch": 31.0,
"eval_cer": 1.0159435751541015,
"eval_loss": 0.02890847437083721,
"eval_runtime": 62.096,
"eval_samples_per_second": 110.152,
"eval_steps_per_second": 13.769,
"eval_wer": 0.9988304093567252,
"step": 32860
},
{
"epoch": 31.132137800849456,
"grad_norm": 2.0504305362701416,
"learning_rate": 6.7239430275857026e-06,
"loss": 0.0628,
"step": 33000
},
{
"epoch": 31.604058518168948,
"grad_norm": 3.536367654800415,
"learning_rate": 6.077587720237289e-06,
"loss": 0.067,
"step": 33500
},
{
"epoch": 32.0,
"eval_cer": 1.0158843053579896,
"eval_loss": 0.027865121141076088,
"eval_runtime": 60.0098,
"eval_samples_per_second": 113.981,
"eval_steps_per_second": 14.248,
"eval_wer": 0.9988304093567252,
"step": 33920
},
{
"epoch": 32.075507314771116,
"grad_norm": 0.30863332748413086,
"learning_rate": 5.4559179328316745e-06,
"loss": 0.0617,
"step": 34000
},
{
"epoch": 32.54742803209061,
"grad_norm": 1.2579513788223267,
"learning_rate": 4.860653630940242e-06,
"loss": 0.0641,
"step": 34500
},
{
"epoch": 33.0,
"eval_cer": 1.0159732100521575,
"eval_loss": 0.032531462609767914,
"eval_runtime": 61.6714,
"eval_samples_per_second": 110.91,
"eval_steps_per_second": 13.864,
"eval_wer": 0.9988304093567252,
"step": 34980
},
{
"epoch": 33.01887682869278,
"grad_norm": 1.1170421838760376,
"learning_rate": 4.293441724427136e-06,
"loss": 0.0582,
"step": 35000
},
{
"epoch": 33.49079754601227,
"grad_norm": 2.8375914096832275,
"learning_rate": 3.7558515109655663e-06,
"loss": 0.0609,
"step": 35500
},
{
"epoch": 33.96271826333176,
"grad_norm": 0.19227160513401031,
"learning_rate": 3.249370334282695e-06,
"loss": 0.0595,
"step": 36000
},
{
"epoch": 34.0,
"eval_cer": 1.0166251778093884,
"eval_loss": 0.033033788204193115,
"eval_runtime": 59.4597,
"eval_samples_per_second": 115.036,
"eval_steps_per_second": 14.379,
"eval_wer": 0.9988304093567252,
"step": 36040
},
{
"epoch": 34.434167059933934,
"grad_norm": 1.7270156145095825,
"learning_rate": 2.775399469145443e-06,
"loss": 0.0614,
"step": 36500
},
{
"epoch": 34.90608777725342,
"grad_norm": 0.2758292257785797,
"learning_rate": 2.336095963596342e-06,
"loss": 0.055,
"step": 37000
},
{
"epoch": 35.0,
"eval_cer": 1.0164177335229967,
"eval_loss": 0.030884480103850365,
"eval_runtime": 58.7794,
"eval_samples_per_second": 116.367,
"eval_steps_per_second": 14.546,
"eval_wer": 0.9988304093567252,
"step": 37100
},
{
"epoch": 35.37753657385559,
"grad_norm": 2.685821771621704,
"learning_rate": 1.9309149042102644e-06,
"loss": 0.0564,
"step": 37500
},
{
"epoch": 35.849457291175085,
"grad_norm": 2.9631125926971436,
"learning_rate": 1.5618919095549582e-06,
"loss": 0.055,
"step": 38000
},
{
"epoch": 36.0,
"eval_cer": 1.016714082503556,
"eval_loss": 0.03032144159078598,
"eval_runtime": 71.6691,
"eval_samples_per_second": 95.439,
"eval_steps_per_second": 11.93,
"eval_wer": 0.9988304093567252,
"step": 38160
},
{
"epoch": 36.32090608777725,
"grad_norm": 0.7306509613990784,
"learning_rate": 1.2300479506647383e-06,
"loss": 0.0554,
"step": 38500
},
{
"epoch": 36.792826805096745,
"grad_norm": 1.8589800596237183,
"learning_rate": 9.363011358278406e-07,
"loss": 0.0555,
"step": 39000
},
{
"epoch": 37.0,
"eval_cer": 1.0165955429113325,
"eval_loss": 0.03112892434000969,
"eval_runtime": 70.4338,
"eval_samples_per_second": 97.112,
"eval_steps_per_second": 12.139,
"eval_wer": 0.9988304093567252,
"step": 39220
},
{
"epoch": 37.26427560169891,
"grad_norm": 1.8065155744552612,
"learning_rate": 6.814641704683633e-07,
"loss": 0.0568,
"step": 39500
},
{
"epoch": 37.736196319018404,
"grad_norm": 1.0842024087905884,
"learning_rate": 4.6663260289360743e-07,
"loss": 0.0533,
"step": 40000
},
{
"epoch": 38.0,
"eval_cer": 1.0165659080132765,
"eval_loss": 0.031128019094467163,
"eval_runtime": 62.073,
"eval_samples_per_second": 110.193,
"eval_steps_per_second": 13.774,
"eval_wer": 0.9988304093567252,
"step": 40280
},
{
"epoch": 38.20764511562058,
"grad_norm": 1.8371340036392212,
"learning_rate": 2.915399553636205e-07,
"loss": 0.0571,
"step": 40500
},
{
"epoch": 38.679565832940064,
"grad_norm": 1.5724213123321533,
"learning_rate": 1.5714100953393373e-07,
"loss": 0.0528,
"step": 41000
},
{
"epoch": 39.0,
"eval_cer": 1.0165659080132765,
"eval_loss": 0.031000742688775063,
"eval_runtime": 61.7575,
"eval_samples_per_second": 110.756,
"eval_steps_per_second": 13.844,
"eval_wer": 0.9988304093567252,
"step": 41340
},
{
"epoch": 39.15101462954224,
"grad_norm": 1.2510491609573364,
"learning_rate": 6.380760518884532e-08,
"loss": 0.0568,
"step": 41500
},
{
"epoch": 39.62293534686173,
"grad_norm": 0.06529413163661957,
"learning_rate": 1.1860630053380229e-08,
"loss": 0.0509,
"step": 42000
},
{
"epoch": 39.96271826333176,
"eval_cer": 1.0165659080132765,
"eval_loss": 0.03096253238618374,
"eval_runtime": 109.6526,
"eval_samples_per_second": 62.379,
"eval_steps_per_second": 7.797,
"eval_wer": 0.9988304093567252,
"step": 42360
},
{
"epoch": 39.96271826333176,
"step": 42360,
"total_flos": 2.2101401353322127e+19,
"train_loss": 0.9936401144753547,
"train_runtime": 36758.8562,
"train_samples_per_second": 73.778,
"train_steps_per_second": 1.152
}
],
"logging_steps": 500,
"max_steps": 42360,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.2101401353322127e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}