mms-1b-swagen-baseline-model / trainer_state.json
csikasote's picture
End of training
7001040 verified
{
"best_metric": 0.22245945036411285,
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-swagen-baseline-model/checkpoint-2200",
"epoch": 6.205250596658711,
"eval_steps": 100,
"global_step": 2600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2386634844868735,
"grad_norm": 19.838756561279297,
"learning_rate": 0.00028799999999999995,
"loss": 15.3971,
"step": 100
},
{
"epoch": 0.2386634844868735,
"eval_loss": 3.594007968902588,
"eval_runtime": 32.6156,
"eval_samples_per_second": 17.446,
"eval_steps_per_second": 4.384,
"eval_wer": 1.0050711917300565,
"step": 100
},
{
"epoch": 0.477326968973747,
"grad_norm": 6.821050643920898,
"learning_rate": 0.00029769045709703284,
"loss": 6.4342,
"step": 200
},
{
"epoch": 0.477326968973747,
"eval_loss": 2.992433547973633,
"eval_runtime": 32.5813,
"eval_samples_per_second": 17.464,
"eval_steps_per_second": 4.389,
"eval_wer": 0.9869319290033158,
"step": 200
},
{
"epoch": 0.7159904534606205,
"grad_norm": 5.072504997253418,
"learning_rate": 0.00029528468323977545,
"loss": 3.4197,
"step": 300
},
{
"epoch": 0.7159904534606205,
"eval_loss": 0.2737194001674652,
"eval_runtime": 32.4531,
"eval_samples_per_second": 17.533,
"eval_steps_per_second": 4.406,
"eval_wer": 0.20226253169494832,
"step": 300
},
{
"epoch": 0.954653937947494,
"grad_norm": 1.5491737127304077,
"learning_rate": 0.000292878909382518,
"loss": 0.56,
"step": 400
},
{
"epoch": 0.954653937947494,
"eval_loss": 0.2543294429779053,
"eval_runtime": 32.2994,
"eval_samples_per_second": 17.616,
"eval_steps_per_second": 4.427,
"eval_wer": 0.1962161107860347,
"step": 400
},
{
"epoch": 1.1933174224343674,
"grad_norm": 2.1193718910217285,
"learning_rate": 0.0002904731355252606,
"loss": 0.5187,
"step": 500
},
{
"epoch": 1.1933174224343674,
"eval_loss": 0.2419968843460083,
"eval_runtime": 32.686,
"eval_samples_per_second": 17.408,
"eval_steps_per_second": 4.375,
"eval_wer": 0.1929003315779208,
"step": 500
},
{
"epoch": 1.431980906921241,
"grad_norm": 1.6395008563995361,
"learning_rate": 0.0002880673616680032,
"loss": 0.5115,
"step": 600
},
{
"epoch": 1.431980906921241,
"eval_loss": 0.2393239438533783,
"eval_runtime": 32.6608,
"eval_samples_per_second": 17.421,
"eval_steps_per_second": 4.378,
"eval_wer": 0.19465574409986347,
"step": 600
},
{
"epoch": 1.6706443914081146,
"grad_norm": 1.2931002378463745,
"learning_rate": 0.00028566158781074576,
"loss": 0.5086,
"step": 700
},
{
"epoch": 1.6706443914081146,
"eval_loss": 0.23598293960094452,
"eval_runtime": 32.3728,
"eval_samples_per_second": 17.576,
"eval_steps_per_second": 4.417,
"eval_wer": 0.1891944606982641,
"step": 700
},
{
"epoch": 1.9093078758949882,
"grad_norm": 1.0657984018325806,
"learning_rate": 0.0002832558139534883,
"loss": 0.4801,
"step": 800
},
{
"epoch": 1.9093078758949882,
"eval_loss": 0.2332880049943924,
"eval_runtime": 32.406,
"eval_samples_per_second": 17.558,
"eval_steps_per_second": 4.413,
"eval_wer": 0.18743904817632143,
"step": 800
},
{
"epoch": 2.1479713603818618,
"grad_norm": 0.8465839624404907,
"learning_rate": 0.0002808740978348035,
"loss": 0.5281,
"step": 900
},
{
"epoch": 2.1479713603818618,
"eval_loss": 0.2355046272277832,
"eval_runtime": 32.703,
"eval_samples_per_second": 17.399,
"eval_steps_per_second": 4.373,
"eval_wer": 0.1958260191144919,
"step": 900
},
{
"epoch": 2.386634844868735,
"grad_norm": 1.4161683320999146,
"learning_rate": 0.00027849238171611865,
"loss": 0.4683,
"step": 1000
},
{
"epoch": 2.386634844868735,
"eval_loss": 0.23779602348804474,
"eval_runtime": 32.6391,
"eval_samples_per_second": 17.433,
"eval_steps_per_second": 4.381,
"eval_wer": 0.1956309732787205,
"step": 1000
},
{
"epoch": 2.6252983293556085,
"grad_norm": 0.9228078126907349,
"learning_rate": 0.0002760866078588612,
"loss": 0.4548,
"step": 1100
},
{
"epoch": 2.6252983293556085,
"eval_loss": 0.22832728922367096,
"eval_runtime": 32.4057,
"eval_samples_per_second": 17.559,
"eval_steps_per_second": 4.413,
"eval_wer": 0.18743904817632143,
"step": 1100
},
{
"epoch": 2.863961813842482,
"grad_norm": 1.0473681688308716,
"learning_rate": 0.00027368083400160383,
"loss": 0.4654,
"step": 1200
},
{
"epoch": 2.863961813842482,
"eval_loss": 0.23233628273010254,
"eval_runtime": 32.4947,
"eval_samples_per_second": 17.511,
"eval_steps_per_second": 4.401,
"eval_wer": 0.1891944606982641,
"step": 1200
},
{
"epoch": 3.1026252983293556,
"grad_norm": 1.9320465326309204,
"learning_rate": 0.0002712750601443464,
"loss": 0.453,
"step": 1300
},
{
"epoch": 3.1026252983293556,
"eval_loss": 0.22875255346298218,
"eval_runtime": 32.7978,
"eval_samples_per_second": 17.349,
"eval_steps_per_second": 4.36,
"eval_wer": 0.1897795982055783,
"step": 1300
},
{
"epoch": 3.341288782816229,
"grad_norm": 3.4785170555114746,
"learning_rate": 0.000268869286287089,
"loss": 0.4542,
"step": 1400
},
{
"epoch": 3.341288782816229,
"eval_loss": 0.2302933633327484,
"eval_runtime": 32.7775,
"eval_samples_per_second": 17.359,
"eval_steps_per_second": 4.363,
"eval_wer": 0.19016968987712113,
"step": 1400
},
{
"epoch": 3.579952267303103,
"grad_norm": 2.0417723655700684,
"learning_rate": 0.00026646351242983157,
"loss": 0.4621,
"step": 1500
},
{
"epoch": 3.579952267303103,
"eval_loss": 0.22530591487884521,
"eval_runtime": 32.5076,
"eval_samples_per_second": 17.504,
"eval_steps_per_second": 4.399,
"eval_wer": 0.1864638189974644,
"step": 1500
},
{
"epoch": 3.8186157517899764,
"grad_norm": 1.299824833869934,
"learning_rate": 0.00026405773857257413,
"loss": 0.4342,
"step": 1600
},
{
"epoch": 3.8186157517899764,
"eval_loss": 0.22665663063526154,
"eval_runtime": 32.7877,
"eval_samples_per_second": 17.354,
"eval_steps_per_second": 4.361,
"eval_wer": 0.18685391066900722,
"step": 1600
},
{
"epoch": 4.05727923627685,
"grad_norm": 1.9400004148483276,
"learning_rate": 0.00026165196471531675,
"loss": 0.466,
"step": 1700
},
{
"epoch": 4.05727923627685,
"eval_loss": 0.2283923178911209,
"eval_runtime": 32.8561,
"eval_samples_per_second": 17.318,
"eval_steps_per_second": 4.352,
"eval_wer": 0.1897795982055783,
"step": 1700
},
{
"epoch": 4.2959427207637235,
"grad_norm": 1.079958200454712,
"learning_rate": 0.0002592461908580593,
"loss": 0.4268,
"step": 1800
},
{
"epoch": 4.2959427207637235,
"eval_loss": 0.23249927163124084,
"eval_runtime": 32.7252,
"eval_samples_per_second": 17.387,
"eval_steps_per_second": 4.37,
"eval_wer": 0.1958260191144919,
"step": 1800
},
{
"epoch": 4.534606205250597,
"grad_norm": 3.2974212169647217,
"learning_rate": 0.0002568404170008019,
"loss": 0.4283,
"step": 1900
},
{
"epoch": 4.534606205250597,
"eval_loss": 0.22498926520347595,
"eval_runtime": 32.5783,
"eval_samples_per_second": 17.466,
"eval_steps_per_second": 4.389,
"eval_wer": 0.18860932319094986,
"step": 1900
},
{
"epoch": 4.77326968973747,
"grad_norm": 3.1521952152252197,
"learning_rate": 0.0002544346431435445,
"loss": 0.4407,
"step": 2000
},
{
"epoch": 4.77326968973747,
"eval_loss": 0.2249845415353775,
"eval_runtime": 32.4737,
"eval_samples_per_second": 17.522,
"eval_steps_per_second": 4.404,
"eval_wer": 0.18841427735517846,
"step": 2000
},
{
"epoch": 5.011933174224343,
"grad_norm": 1.2635365724563599,
"learning_rate": 0.00025202886928628706,
"loss": 0.4762,
"step": 2100
},
{
"epoch": 5.011933174224343,
"eval_loss": 0.22774527966976166,
"eval_runtime": 33.4042,
"eval_samples_per_second": 17.034,
"eval_steps_per_second": 4.281,
"eval_wer": 0.1893895065340355,
"step": 2100
},
{
"epoch": 5.250596658711217,
"grad_norm": 1.1228593587875366,
"learning_rate": 0.0002496230954290297,
"loss": 0.4289,
"step": 2200
},
{
"epoch": 5.250596658711217,
"eval_loss": 0.22245945036411285,
"eval_runtime": 32.7331,
"eval_samples_per_second": 17.383,
"eval_steps_per_second": 4.369,
"eval_wer": 0.18724400234055003,
"step": 2200
},
{
"epoch": 5.4892601431980905,
"grad_norm": 0.8405218124389648,
"learning_rate": 0.00024721732157177224,
"loss": 0.4391,
"step": 2300
},
{
"epoch": 5.4892601431980905,
"eval_loss": 0.22291946411132812,
"eval_runtime": 32.5165,
"eval_samples_per_second": 17.499,
"eval_steps_per_second": 4.398,
"eval_wer": 0.18841427735517846,
"step": 2300
},
{
"epoch": 5.727923627684964,
"grad_norm": 4.196171283721924,
"learning_rate": 0.0002448115477145148,
"loss": 0.4333,
"step": 2400
},
{
"epoch": 5.727923627684964,
"eval_loss": 0.22290098667144775,
"eval_runtime": 32.4851,
"eval_samples_per_second": 17.516,
"eval_steps_per_second": 4.402,
"eval_wer": 0.18782913984786426,
"step": 2400
},
{
"epoch": 5.966587112171838,
"grad_norm": 0.8724733591079712,
"learning_rate": 0.0002424057738572574,
"loss": 0.4351,
"step": 2500
},
{
"epoch": 5.966587112171838,
"eval_loss": 0.22788961231708527,
"eval_runtime": 32.9988,
"eval_samples_per_second": 17.243,
"eval_steps_per_second": 4.333,
"eval_wer": 0.19016968987712113,
"step": 2500
},
{
"epoch": 6.205250596658711,
"grad_norm": 3.7559807300567627,
"learning_rate": 0.00023999999999999998,
"loss": 0.4065,
"step": 2600
},
{
"epoch": 6.205250596658711,
"eval_loss": 0.22799032926559448,
"eval_runtime": 33.0675,
"eval_samples_per_second": 17.207,
"eval_steps_per_second": 4.324,
"eval_wer": 0.19387556075677784,
"step": 2600
},
{
"epoch": 6.205250596658711,
"step": 2600,
"total_flos": 1.068815361623146e+19,
"train_loss": 1.3819761775090145,
"train_runtime": 3531.6962,
"train_samples_per_second": 28.448,
"train_steps_per_second": 3.559
}
],
"logging_steps": 100,
"max_steps": 12570,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 400,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 4,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.068815361623146e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}