|
{ |
|
"best_metric": 0.1408591866493225, |
|
"best_model_checkpoint": "./mms-1b-bem-male-sv/checkpoint-4400", |
|
"epoch": 5.0, |
|
"eval_steps": 200, |
|
"global_step": 4580, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2183406113537118, |
|
"eval_loss": 0.1927431970834732, |
|
"eval_runtime": 127.6058, |
|
"eval_samples_per_second": 7.97, |
|
"eval_steps_per_second": 1.003, |
|
"eval_wer": 0.4257075471698113, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4366812227074236, |
|
"eval_loss": 0.1712573915719986, |
|
"eval_runtime": 126.4823, |
|
"eval_samples_per_second": 8.041, |
|
"eval_steps_per_second": 1.012, |
|
"eval_wer": 0.3884958071278826, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5458515283842795, |
|
"grad_norm": 1.9703088998794556, |
|
"learning_rate": 0.000992, |
|
"loss": 2.0358, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6550218340611353, |
|
"eval_loss": 0.17603513598442078, |
|
"eval_runtime": 127.0067, |
|
"eval_samples_per_second": 8.007, |
|
"eval_steps_per_second": 1.008, |
|
"eval_wer": 0.39072327044025157, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8733624454148472, |
|
"eval_loss": 0.18193688988685608, |
|
"eval_runtime": 126.7253, |
|
"eval_samples_per_second": 8.025, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wer": 0.4143081761006289, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.091703056768559, |
|
"grad_norm": 4.094836235046387, |
|
"learning_rate": 0.0008784313725490196, |
|
"loss": 0.519, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.091703056768559, |
|
"eval_loss": 0.16109386086463928, |
|
"eval_runtime": 127.0612, |
|
"eval_samples_per_second": 8.004, |
|
"eval_steps_per_second": 1.007, |
|
"eval_wer": 0.38692348008385746, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3100436681222707, |
|
"eval_loss": 0.15501286089420319, |
|
"eval_runtime": 126.7082, |
|
"eval_samples_per_second": 8.026, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wer": 0.3735587002096436, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5283842794759825, |
|
"eval_loss": 0.15383152663707733, |
|
"eval_runtime": 127.2176, |
|
"eval_samples_per_second": 7.994, |
|
"eval_steps_per_second": 1.006, |
|
"eval_wer": 0.3770964360587002, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.6375545851528384, |
|
"grad_norm": 1.7256165742874146, |
|
"learning_rate": 0.0007558823529411764, |
|
"loss": 0.4764, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.7467248908296944, |
|
"eval_loss": 0.1743510365486145, |
|
"eval_runtime": 127.1222, |
|
"eval_samples_per_second": 8.0, |
|
"eval_steps_per_second": 1.007, |
|
"eval_wer": 0.417583857442348, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.965065502183406, |
|
"eval_loss": 0.15977127850055695, |
|
"eval_runtime": 127.3519, |
|
"eval_samples_per_second": 7.986, |
|
"eval_steps_per_second": 1.005, |
|
"eval_wer": 0.38836477987421386, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.183406113537118, |
|
"grad_norm": 2.758544683456421, |
|
"learning_rate": 0.0006333333333333333, |
|
"loss": 0.4501, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.183406113537118, |
|
"eval_loss": 0.15066786110401154, |
|
"eval_runtime": 127.104, |
|
"eval_samples_per_second": 8.001, |
|
"eval_steps_per_second": 1.007, |
|
"eval_wer": 0.3577044025157233, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.4017467248908297, |
|
"eval_loss": 0.15350954234600067, |
|
"eval_runtime": 127.7067, |
|
"eval_samples_per_second": 7.964, |
|
"eval_steps_per_second": 1.002, |
|
"eval_wer": 0.37631027253668764, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.6200873362445414, |
|
"eval_loss": 0.15018954873085022, |
|
"eval_runtime": 127.3689, |
|
"eval_samples_per_second": 7.985, |
|
"eval_steps_per_second": 1.005, |
|
"eval_wer": 0.36491090146750527, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.7292576419213974, |
|
"grad_norm": 0.7292295694351196, |
|
"learning_rate": 0.0005107843137254902, |
|
"loss": 0.4422, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.8384279475982535, |
|
"eval_loss": 0.14573481678962708, |
|
"eval_runtime": 127.0532, |
|
"eval_samples_per_second": 8.005, |
|
"eval_steps_per_second": 1.007, |
|
"eval_wer": 0.35023584905660377, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.056768558951965, |
|
"eval_loss": 0.1484854817390442, |
|
"eval_runtime": 128.0576, |
|
"eval_samples_per_second": 7.942, |
|
"eval_steps_per_second": 1.0, |
|
"eval_wer": 0.3579664570230608, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.2751091703056767, |
|
"grad_norm": 0.6407122015953064, |
|
"learning_rate": 0.0003884803921568628, |
|
"loss": 0.4217, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.2751091703056767, |
|
"eval_loss": 0.148036390542984, |
|
"eval_runtime": 128.302, |
|
"eval_samples_per_second": 7.927, |
|
"eval_steps_per_second": 0.998, |
|
"eval_wer": 0.3546907756813417, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.493449781659389, |
|
"eval_loss": 0.14975149929523468, |
|
"eval_runtime": 127.0597, |
|
"eval_samples_per_second": 8.004, |
|
"eval_steps_per_second": 1.007, |
|
"eval_wer": 0.3666142557651992, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.7117903930131004, |
|
"eval_loss": 0.14578010141849518, |
|
"eval_runtime": 127.8692, |
|
"eval_samples_per_second": 7.953, |
|
"eval_steps_per_second": 1.001, |
|
"eval_wer": 0.3494496855345912, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.8209606986899565, |
|
"grad_norm": 1.9009268283843994, |
|
"learning_rate": 0.0002659313725490196, |
|
"loss": 0.4144, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.930131004366812, |
|
"eval_loss": 0.1427353024482727, |
|
"eval_runtime": 127.8119, |
|
"eval_samples_per_second": 7.957, |
|
"eval_steps_per_second": 1.001, |
|
"eval_wer": 0.35744234800838576, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.148471615720524, |
|
"eval_loss": 0.14451348781585693, |
|
"eval_runtime": 127.6381, |
|
"eval_samples_per_second": 7.968, |
|
"eval_steps_per_second": 1.003, |
|
"eval_wer": 0.3594077568134172, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.366812227074236, |
|
"grad_norm": 1.5231894254684448, |
|
"learning_rate": 0.00014338235294117645, |
|
"loss": 0.3926, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.366812227074236, |
|
"eval_loss": 0.14618775248527527, |
|
"eval_runtime": 127.9874, |
|
"eval_samples_per_second": 7.946, |
|
"eval_steps_per_second": 1.0, |
|
"eval_wer": 0.3666142557651992, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.585152838427947, |
|
"eval_loss": 0.14320309460163116, |
|
"eval_runtime": 128.1452, |
|
"eval_samples_per_second": 7.936, |
|
"eval_steps_per_second": 0.999, |
|
"eval_wer": 0.3527253668763103, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.8034934497816595, |
|
"eval_loss": 0.1408591866493225, |
|
"eval_runtime": 127.7676, |
|
"eval_samples_per_second": 7.96, |
|
"eval_steps_per_second": 1.002, |
|
"eval_wer": 0.3498427672955975, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.9126637554585155, |
|
"grad_norm": 6.477123737335205, |
|
"learning_rate": 2.0833333333333333e-05, |
|
"loss": 0.3928, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 4580, |
|
"total_flos": 2.179329538942206e+19, |
|
"train_loss": 0.6123242174173547, |
|
"train_runtime": 13424.2744, |
|
"train_samples_per_second": 2.729, |
|
"train_steps_per_second": 0.341 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4580, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.179329538942206e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|