mms-1b-bigcgen-male-15hrs-model / trainer_state.json
csikasote's picture
End of training
a13adfb verified
{
"best_metric": 0.473453164100647,
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bigcgen-male-15hrs-model/checkpoint-1700",
"epoch": 2.169421487603306,
"eval_steps": 100,
"global_step": 2100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10330578512396695,
"grad_norm": 5.168842792510986,
"learning_rate": 0.000285,
"loss": 14.7965,
"step": 100
},
{
"epoch": 0.10330578512396695,
"eval_loss": 3.522738456726074,
"eval_runtime": 30.9773,
"eval_samples_per_second": 14.236,
"eval_steps_per_second": 3.583,
"eval_wer": 1.0007213272421256,
"step": 100
},
{
"epoch": 0.2066115702479339,
"grad_norm": 4.0135955810546875,
"learning_rate": 0.000299015203870076,
"loss": 6.2688,
"step": 200
},
{
"epoch": 0.2066115702479339,
"eval_loss": 2.770048141479492,
"eval_runtime": 30.9601,
"eval_samples_per_second": 14.244,
"eval_steps_per_second": 3.585,
"eval_wer": 1.032459725895648,
"step": 200
},
{
"epoch": 0.30991735537190085,
"grad_norm": 2.5909135341644287,
"learning_rate": 0.00029797857636489284,
"loss": 3.6179,
"step": 300
},
{
"epoch": 0.30991735537190085,
"eval_loss": 0.753160834312439,
"eval_runtime": 30.8437,
"eval_samples_per_second": 14.298,
"eval_steps_per_second": 3.599,
"eval_wer": 0.6068766530415965,
"step": 300
},
{
"epoch": 0.4132231404958678,
"grad_norm": 4.135544776916504,
"learning_rate": 0.0002969419488597097,
"loss": 1.7779,
"step": 400
},
{
"epoch": 0.4132231404958678,
"eval_loss": 0.650780200958252,
"eval_runtime": 30.8789,
"eval_samples_per_second": 14.282,
"eval_steps_per_second": 3.595,
"eval_wer": 0.5821110843952874,
"step": 400
},
{
"epoch": 0.5165289256198347,
"grad_norm": 2.8424060344696045,
"learning_rate": 0.0002959053213545266,
"loss": 1.5595,
"step": 500
},
{
"epoch": 0.5165289256198347,
"eval_loss": 0.6249143481254578,
"eval_runtime": 31.2101,
"eval_samples_per_second": 14.13,
"eval_steps_per_second": 3.557,
"eval_wer": 0.5578264005770618,
"step": 500
},
{
"epoch": 0.6198347107438017,
"grad_norm": 3.2064597606658936,
"learning_rate": 0.00029486869384934343,
"loss": 1.5884,
"step": 600
},
{
"epoch": 0.6198347107438017,
"eval_loss": 0.6142242550849915,
"eval_runtime": 30.7532,
"eval_samples_per_second": 14.34,
"eval_steps_per_second": 3.609,
"eval_wer": 0.5282519836499159,
"step": 600
},
{
"epoch": 0.7231404958677686,
"grad_norm": 3.0032007694244385,
"learning_rate": 0.0002938320663441603,
"loss": 1.5532,
"step": 700
},
{
"epoch": 0.7231404958677686,
"eval_loss": 0.5929429531097412,
"eval_runtime": 31.0147,
"eval_samples_per_second": 14.219,
"eval_steps_per_second": 3.579,
"eval_wer": 0.5171916326039914,
"step": 700
},
{
"epoch": 0.8264462809917356,
"grad_norm": 3.4867029190063477,
"learning_rate": 0.00029279543883897714,
"loss": 1.4021,
"step": 800
},
{
"epoch": 0.8264462809917356,
"eval_loss": 0.5995635390281677,
"eval_runtime": 31.1021,
"eval_samples_per_second": 14.179,
"eval_steps_per_second": 3.569,
"eval_wer": 0.5181534022601587,
"step": 800
},
{
"epoch": 0.9297520661157025,
"grad_norm": 1.893242359161377,
"learning_rate": 0.000291758811333794,
"loss": 1.507,
"step": 900
},
{
"epoch": 0.9297520661157025,
"eval_loss": 0.5824074149131775,
"eval_runtime": 31.2256,
"eval_samples_per_second": 14.123,
"eval_steps_per_second": 3.555,
"eval_wer": 0.5121423419091128,
"step": 900
},
{
"epoch": 1.0330578512396693,
"grad_norm": 5.389484882354736,
"learning_rate": 0.0002907221838286109,
"loss": 1.5374,
"step": 1000
},
{
"epoch": 1.0330578512396693,
"eval_loss": 0.5614578723907471,
"eval_runtime": 31.1303,
"eval_samples_per_second": 14.166,
"eval_steps_per_second": 3.566,
"eval_wer": 0.5061312815580669,
"step": 1000
},
{
"epoch": 1.1363636363636362,
"grad_norm": 3.9391493797302246,
"learning_rate": 0.0002896855563234278,
"loss": 1.4139,
"step": 1100
},
{
"epoch": 1.1363636363636362,
"eval_loss": 0.5456417798995972,
"eval_runtime": 30.8696,
"eval_samples_per_second": 14.286,
"eval_steps_per_second": 3.596,
"eval_wer": 0.5066121663861505,
"step": 1100
},
{
"epoch": 1.2396694214876034,
"grad_norm": 2.858807325363159,
"learning_rate": 0.0002886489288182446,
"loss": 1.4472,
"step": 1200
},
{
"epoch": 1.2396694214876034,
"eval_loss": 0.5177425146102905,
"eval_runtime": 31.4742,
"eval_samples_per_second": 14.011,
"eval_steps_per_second": 3.527,
"eval_wer": 0.48737677326280354,
"step": 1200
},
{
"epoch": 1.3429752066115703,
"grad_norm": 2.0307440757751465,
"learning_rate": 0.0002876123013130615,
"loss": 1.2958,
"step": 1300
},
{
"epoch": 1.3429752066115703,
"eval_loss": 0.5022083520889282,
"eval_runtime": 31.2404,
"eval_samples_per_second": 14.116,
"eval_steps_per_second": 3.553,
"eval_wer": 0.48713633084876173,
"step": 1300
},
{
"epoch": 1.4462809917355373,
"grad_norm": 1.2417099475860596,
"learning_rate": 0.0002865756738078783,
"loss": 1.3292,
"step": 1400
},
{
"epoch": 1.4462809917355373,
"eval_loss": 0.49844667315483093,
"eval_runtime": 31.0915,
"eval_samples_per_second": 14.184,
"eval_steps_per_second": 3.57,
"eval_wer": 0.48713633084876173,
"step": 1400
},
{
"epoch": 1.549586776859504,
"grad_norm": 1.5981268882751465,
"learning_rate": 0.0002855390463026952,
"loss": 1.2062,
"step": 1500
},
{
"epoch": 1.549586776859504,
"eval_loss": 0.48859983682632446,
"eval_runtime": 31.0946,
"eval_samples_per_second": 14.183,
"eval_steps_per_second": 3.57,
"eval_wer": 0.4799230584275066,
"step": 1500
},
{
"epoch": 1.6528925619834711,
"grad_norm": 10.061776161193848,
"learning_rate": 0.0002845024187975121,
"loss": 1.1623,
"step": 1600
},
{
"epoch": 1.6528925619834711,
"eval_loss": 0.4811255931854248,
"eval_runtime": 31.525,
"eval_samples_per_second": 13.989,
"eval_steps_per_second": 3.521,
"eval_wer": 0.48232748256792496,
"step": 1600
},
{
"epoch": 1.756198347107438,
"grad_norm": 3.169590473175049,
"learning_rate": 0.0002834657912923289,
"loss": 1.2759,
"step": 1700
},
{
"epoch": 1.756198347107438,
"eval_loss": 0.473453164100647,
"eval_runtime": 31.3702,
"eval_samples_per_second": 14.058,
"eval_steps_per_second": 3.538,
"eval_wer": 0.46766049531137294,
"step": 1700
},
{
"epoch": 1.859504132231405,
"grad_norm": 3.227973222732544,
"learning_rate": 0.0002824291637871458,
"loss": 1.1852,
"step": 1800
},
{
"epoch": 1.859504132231405,
"eval_loss": 0.4986236095428467,
"eval_runtime": 31.0563,
"eval_samples_per_second": 14.2,
"eval_steps_per_second": 3.574,
"eval_wer": 0.4669391680692474,
"step": 1800
},
{
"epoch": 1.962809917355372,
"grad_norm": 3.3289122581481934,
"learning_rate": 0.0002813925362819626,
"loss": 1.0712,
"step": 1900
},
{
"epoch": 1.962809917355372,
"eval_loss": 0.5045417547225952,
"eval_runtime": 31.1996,
"eval_samples_per_second": 14.135,
"eval_steps_per_second": 3.558,
"eval_wer": 0.4844914642943015,
"step": 1900
},
{
"epoch": 2.0661157024793386,
"grad_norm": 2.1973116397857666,
"learning_rate": 0.0002803559087767795,
"loss": 1.2023,
"step": 2000
},
{
"epoch": 2.0661157024793386,
"eval_loss": 0.475466787815094,
"eval_runtime": 31.3451,
"eval_samples_per_second": 14.069,
"eval_steps_per_second": 3.541,
"eval_wer": 0.4782399615292138,
"step": 2000
},
{
"epoch": 2.169421487603306,
"grad_norm": 1.9049010276794434,
"learning_rate": 0.0002793192812715964,
"loss": 1.2275,
"step": 2100
},
{
"epoch": 2.169421487603306,
"eval_loss": 0.47560110688209534,
"eval_runtime": 31.4981,
"eval_samples_per_second": 14.001,
"eval_steps_per_second": 3.524,
"eval_wer": 0.47054580427987497,
"step": 2100
},
{
"epoch": 2.169421487603306,
"step": 2100,
"total_flos": 1.0989151242100367e+19,
"train_loss": 2.3535848345075334,
"train_runtime": 3049.1563,
"train_samples_per_second": 76.192,
"train_steps_per_second": 9.524
}
],
"logging_steps": 100,
"max_steps": 29040,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 400,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 4,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0989151242100367e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}