|
{ |
|
"best_metric": 0.473453164100647, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bigcgen-male-15hrs-model/checkpoint-1700", |
|
"epoch": 2.169421487603306, |
|
"eval_steps": 100, |
|
"global_step": 2100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10330578512396695, |
|
"grad_norm": 5.168842792510986, |
|
"learning_rate": 0.000285, |
|
"loss": 14.7965, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10330578512396695, |
|
"eval_loss": 3.522738456726074, |
|
"eval_runtime": 30.9773, |
|
"eval_samples_per_second": 14.236, |
|
"eval_steps_per_second": 3.583, |
|
"eval_wer": 1.0007213272421256, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2066115702479339, |
|
"grad_norm": 4.0135955810546875, |
|
"learning_rate": 0.000299015203870076, |
|
"loss": 6.2688, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2066115702479339, |
|
"eval_loss": 2.770048141479492, |
|
"eval_runtime": 30.9601, |
|
"eval_samples_per_second": 14.244, |
|
"eval_steps_per_second": 3.585, |
|
"eval_wer": 1.032459725895648, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.30991735537190085, |
|
"grad_norm": 2.5909135341644287, |
|
"learning_rate": 0.00029797857636489284, |
|
"loss": 3.6179, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.30991735537190085, |
|
"eval_loss": 0.753160834312439, |
|
"eval_runtime": 30.8437, |
|
"eval_samples_per_second": 14.298, |
|
"eval_steps_per_second": 3.599, |
|
"eval_wer": 0.6068766530415965, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4132231404958678, |
|
"grad_norm": 4.135544776916504, |
|
"learning_rate": 0.0002969419488597097, |
|
"loss": 1.7779, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4132231404958678, |
|
"eval_loss": 0.650780200958252, |
|
"eval_runtime": 30.8789, |
|
"eval_samples_per_second": 14.282, |
|
"eval_steps_per_second": 3.595, |
|
"eval_wer": 0.5821110843952874, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5165289256198347, |
|
"grad_norm": 2.8424060344696045, |
|
"learning_rate": 0.0002959053213545266, |
|
"loss": 1.5595, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5165289256198347, |
|
"eval_loss": 0.6249143481254578, |
|
"eval_runtime": 31.2101, |
|
"eval_samples_per_second": 14.13, |
|
"eval_steps_per_second": 3.557, |
|
"eval_wer": 0.5578264005770618, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6198347107438017, |
|
"grad_norm": 3.2064597606658936, |
|
"learning_rate": 0.00029486869384934343, |
|
"loss": 1.5884, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6198347107438017, |
|
"eval_loss": 0.6142242550849915, |
|
"eval_runtime": 30.7532, |
|
"eval_samples_per_second": 14.34, |
|
"eval_steps_per_second": 3.609, |
|
"eval_wer": 0.5282519836499159, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7231404958677686, |
|
"grad_norm": 3.0032007694244385, |
|
"learning_rate": 0.0002938320663441603, |
|
"loss": 1.5532, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7231404958677686, |
|
"eval_loss": 0.5929429531097412, |
|
"eval_runtime": 31.0147, |
|
"eval_samples_per_second": 14.219, |
|
"eval_steps_per_second": 3.579, |
|
"eval_wer": 0.5171916326039914, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8264462809917356, |
|
"grad_norm": 3.4867029190063477, |
|
"learning_rate": 0.00029279543883897714, |
|
"loss": 1.4021, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8264462809917356, |
|
"eval_loss": 0.5995635390281677, |
|
"eval_runtime": 31.1021, |
|
"eval_samples_per_second": 14.179, |
|
"eval_steps_per_second": 3.569, |
|
"eval_wer": 0.5181534022601587, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9297520661157025, |
|
"grad_norm": 1.893242359161377, |
|
"learning_rate": 0.000291758811333794, |
|
"loss": 1.507, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9297520661157025, |
|
"eval_loss": 0.5824074149131775, |
|
"eval_runtime": 31.2256, |
|
"eval_samples_per_second": 14.123, |
|
"eval_steps_per_second": 3.555, |
|
"eval_wer": 0.5121423419091128, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.0330578512396693, |
|
"grad_norm": 5.389484882354736, |
|
"learning_rate": 0.0002907221838286109, |
|
"loss": 1.5374, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0330578512396693, |
|
"eval_loss": 0.5614578723907471, |
|
"eval_runtime": 31.1303, |
|
"eval_samples_per_second": 14.166, |
|
"eval_steps_per_second": 3.566, |
|
"eval_wer": 0.5061312815580669, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 3.9391493797302246, |
|
"learning_rate": 0.0002896855563234278, |
|
"loss": 1.4139, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"eval_loss": 0.5456417798995972, |
|
"eval_runtime": 30.8696, |
|
"eval_samples_per_second": 14.286, |
|
"eval_steps_per_second": 3.596, |
|
"eval_wer": 0.5066121663861505, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.2396694214876034, |
|
"grad_norm": 2.858807325363159, |
|
"learning_rate": 0.0002886489288182446, |
|
"loss": 1.4472, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.2396694214876034, |
|
"eval_loss": 0.5177425146102905, |
|
"eval_runtime": 31.4742, |
|
"eval_samples_per_second": 14.011, |
|
"eval_steps_per_second": 3.527, |
|
"eval_wer": 0.48737677326280354, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.3429752066115703, |
|
"grad_norm": 2.0307440757751465, |
|
"learning_rate": 0.0002876123013130615, |
|
"loss": 1.2958, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.3429752066115703, |
|
"eval_loss": 0.5022083520889282, |
|
"eval_runtime": 31.2404, |
|
"eval_samples_per_second": 14.116, |
|
"eval_steps_per_second": 3.553, |
|
"eval_wer": 0.48713633084876173, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.4462809917355373, |
|
"grad_norm": 1.2417099475860596, |
|
"learning_rate": 0.0002865756738078783, |
|
"loss": 1.3292, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.4462809917355373, |
|
"eval_loss": 0.49844667315483093, |
|
"eval_runtime": 31.0915, |
|
"eval_samples_per_second": 14.184, |
|
"eval_steps_per_second": 3.57, |
|
"eval_wer": 0.48713633084876173, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.549586776859504, |
|
"grad_norm": 1.5981268882751465, |
|
"learning_rate": 0.0002855390463026952, |
|
"loss": 1.2062, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.549586776859504, |
|
"eval_loss": 0.48859983682632446, |
|
"eval_runtime": 31.0946, |
|
"eval_samples_per_second": 14.183, |
|
"eval_steps_per_second": 3.57, |
|
"eval_wer": 0.4799230584275066, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.6528925619834711, |
|
"grad_norm": 10.061776161193848, |
|
"learning_rate": 0.0002845024187975121, |
|
"loss": 1.1623, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.6528925619834711, |
|
"eval_loss": 0.4811255931854248, |
|
"eval_runtime": 31.525, |
|
"eval_samples_per_second": 13.989, |
|
"eval_steps_per_second": 3.521, |
|
"eval_wer": 0.48232748256792496, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.756198347107438, |
|
"grad_norm": 3.169590473175049, |
|
"learning_rate": 0.0002834657912923289, |
|
"loss": 1.2759, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.756198347107438, |
|
"eval_loss": 0.473453164100647, |
|
"eval_runtime": 31.3702, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 3.538, |
|
"eval_wer": 0.46766049531137294, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.859504132231405, |
|
"grad_norm": 3.227973222732544, |
|
"learning_rate": 0.0002824291637871458, |
|
"loss": 1.1852, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.859504132231405, |
|
"eval_loss": 0.4986236095428467, |
|
"eval_runtime": 31.0563, |
|
"eval_samples_per_second": 14.2, |
|
"eval_steps_per_second": 3.574, |
|
"eval_wer": 0.4669391680692474, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.962809917355372, |
|
"grad_norm": 3.3289122581481934, |
|
"learning_rate": 0.0002813925362819626, |
|
"loss": 1.0712, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.962809917355372, |
|
"eval_loss": 0.5045417547225952, |
|
"eval_runtime": 31.1996, |
|
"eval_samples_per_second": 14.135, |
|
"eval_steps_per_second": 3.558, |
|
"eval_wer": 0.4844914642943015, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.0661157024793386, |
|
"grad_norm": 2.1973116397857666, |
|
"learning_rate": 0.0002803559087767795, |
|
"loss": 1.2023, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0661157024793386, |
|
"eval_loss": 0.475466787815094, |
|
"eval_runtime": 31.3451, |
|
"eval_samples_per_second": 14.069, |
|
"eval_steps_per_second": 3.541, |
|
"eval_wer": 0.4782399615292138, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.169421487603306, |
|
"grad_norm": 1.9049010276794434, |
|
"learning_rate": 0.0002793192812715964, |
|
"loss": 1.2275, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.169421487603306, |
|
"eval_loss": 0.47560110688209534, |
|
"eval_runtime": 31.4981, |
|
"eval_samples_per_second": 14.001, |
|
"eval_steps_per_second": 3.524, |
|
"eval_wer": 0.47054580427987497, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.169421487603306, |
|
"step": 2100, |
|
"total_flos": 1.0989151242100367e+19, |
|
"train_loss": 2.3535848345075334, |
|
"train_runtime": 3049.1563, |
|
"train_samples_per_second": 76.192, |
|
"train_steps_per_second": 9.524 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 29040, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0989151242100367e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|