{ "best_metric": 0.1408591866493225, "best_model_checkpoint": "./mms-1b-bem-male-sv/checkpoint-4400", "epoch": 5.0, "eval_steps": 200, "global_step": 4580, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2183406113537118, "eval_loss": 0.1927431970834732, "eval_runtime": 127.6058, "eval_samples_per_second": 7.97, "eval_steps_per_second": 1.003, "eval_wer": 0.4257075471698113, "step": 200 }, { "epoch": 0.4366812227074236, "eval_loss": 0.1712573915719986, "eval_runtime": 126.4823, "eval_samples_per_second": 8.041, "eval_steps_per_second": 1.012, "eval_wer": 0.3884958071278826, "step": 400 }, { "epoch": 0.5458515283842795, "grad_norm": 1.9703088998794556, "learning_rate": 0.000992, "loss": 2.0358, "step": 500 }, { "epoch": 0.6550218340611353, "eval_loss": 0.17603513598442078, "eval_runtime": 127.0067, "eval_samples_per_second": 8.007, "eval_steps_per_second": 1.008, "eval_wer": 0.39072327044025157, "step": 600 }, { "epoch": 0.8733624454148472, "eval_loss": 0.18193688988685608, "eval_runtime": 126.7253, "eval_samples_per_second": 8.025, "eval_steps_per_second": 1.01, "eval_wer": 0.4143081761006289, "step": 800 }, { "epoch": 1.091703056768559, "grad_norm": 4.094836235046387, "learning_rate": 0.0008784313725490196, "loss": 0.519, "step": 1000 }, { "epoch": 1.091703056768559, "eval_loss": 0.16109386086463928, "eval_runtime": 127.0612, "eval_samples_per_second": 8.004, "eval_steps_per_second": 1.007, "eval_wer": 0.38692348008385746, "step": 1000 }, { "epoch": 1.3100436681222707, "eval_loss": 0.15501286089420319, "eval_runtime": 126.7082, "eval_samples_per_second": 8.026, "eval_steps_per_second": 1.01, "eval_wer": 0.3735587002096436, "step": 1200 }, { "epoch": 1.5283842794759825, "eval_loss": 0.15383152663707733, "eval_runtime": 127.2176, "eval_samples_per_second": 7.994, "eval_steps_per_second": 1.006, "eval_wer": 0.3770964360587002, "step": 1400 }, { "epoch": 1.6375545851528384, "grad_norm": 1.7256165742874146, "learning_rate": 0.0007558823529411764, "loss": 0.4764, "step": 1500 }, { "epoch": 1.7467248908296944, "eval_loss": 0.1743510365486145, "eval_runtime": 127.1222, "eval_samples_per_second": 8.0, "eval_steps_per_second": 1.007, "eval_wer": 0.417583857442348, "step": 1600 }, { "epoch": 1.965065502183406, "eval_loss": 0.15977127850055695, "eval_runtime": 127.3519, "eval_samples_per_second": 7.986, "eval_steps_per_second": 1.005, "eval_wer": 0.38836477987421386, "step": 1800 }, { "epoch": 2.183406113537118, "grad_norm": 2.758544683456421, "learning_rate": 0.0006333333333333333, "loss": 0.4501, "step": 2000 }, { "epoch": 2.183406113537118, "eval_loss": 0.15066786110401154, "eval_runtime": 127.104, "eval_samples_per_second": 8.001, "eval_steps_per_second": 1.007, "eval_wer": 0.3577044025157233, "step": 2000 }, { "epoch": 2.4017467248908297, "eval_loss": 0.15350954234600067, "eval_runtime": 127.7067, "eval_samples_per_second": 7.964, "eval_steps_per_second": 1.002, "eval_wer": 0.37631027253668764, "step": 2200 }, { "epoch": 2.6200873362445414, "eval_loss": 0.15018954873085022, "eval_runtime": 127.3689, "eval_samples_per_second": 7.985, "eval_steps_per_second": 1.005, "eval_wer": 0.36491090146750527, "step": 2400 }, { "epoch": 2.7292576419213974, "grad_norm": 0.7292295694351196, "learning_rate": 0.0005107843137254902, "loss": 0.4422, "step": 2500 }, { "epoch": 2.8384279475982535, "eval_loss": 0.14573481678962708, "eval_runtime": 127.0532, "eval_samples_per_second": 8.005, "eval_steps_per_second": 1.007, "eval_wer": 0.35023584905660377, "step": 2600 }, { "epoch": 3.056768558951965, "eval_loss": 0.1484854817390442, "eval_runtime": 128.0576, "eval_samples_per_second": 7.942, "eval_steps_per_second": 1.0, "eval_wer": 0.3579664570230608, "step": 2800 }, { "epoch": 3.2751091703056767, "grad_norm": 0.6407122015953064, "learning_rate": 0.0003884803921568628, "loss": 0.4217, "step": 3000 }, { "epoch": 3.2751091703056767, "eval_loss": 0.148036390542984, "eval_runtime": 128.302, "eval_samples_per_second": 7.927, "eval_steps_per_second": 0.998, "eval_wer": 0.3546907756813417, "step": 3000 }, { "epoch": 3.493449781659389, "eval_loss": 0.14975149929523468, "eval_runtime": 127.0597, "eval_samples_per_second": 8.004, "eval_steps_per_second": 1.007, "eval_wer": 0.3666142557651992, "step": 3200 }, { "epoch": 3.7117903930131004, "eval_loss": 0.14578010141849518, "eval_runtime": 127.8692, "eval_samples_per_second": 7.953, "eval_steps_per_second": 1.001, "eval_wer": 0.3494496855345912, "step": 3400 }, { "epoch": 3.8209606986899565, "grad_norm": 1.9009268283843994, "learning_rate": 0.0002659313725490196, "loss": 0.4144, "step": 3500 }, { "epoch": 3.930131004366812, "eval_loss": 0.1427353024482727, "eval_runtime": 127.8119, "eval_samples_per_second": 7.957, "eval_steps_per_second": 1.001, "eval_wer": 0.35744234800838576, "step": 3600 }, { "epoch": 4.148471615720524, "eval_loss": 0.14451348781585693, "eval_runtime": 127.6381, "eval_samples_per_second": 7.968, "eval_steps_per_second": 1.003, "eval_wer": 0.3594077568134172, "step": 3800 }, { "epoch": 4.366812227074236, "grad_norm": 1.5231894254684448, "learning_rate": 0.00014338235294117645, "loss": 0.3926, "step": 4000 }, { "epoch": 4.366812227074236, "eval_loss": 0.14618775248527527, "eval_runtime": 127.9874, "eval_samples_per_second": 7.946, "eval_steps_per_second": 1.0, "eval_wer": 0.3666142557651992, "step": 4000 }, { "epoch": 4.585152838427947, "eval_loss": 0.14320309460163116, "eval_runtime": 128.1452, "eval_samples_per_second": 7.936, "eval_steps_per_second": 0.999, "eval_wer": 0.3527253668763103, "step": 4200 }, { "epoch": 4.8034934497816595, "eval_loss": 0.1408591866493225, "eval_runtime": 127.7676, "eval_samples_per_second": 7.96, "eval_steps_per_second": 1.002, "eval_wer": 0.3498427672955975, "step": 4400 }, { "epoch": 4.9126637554585155, "grad_norm": 6.477123737335205, "learning_rate": 2.0833333333333333e-05, "loss": 0.3928, "step": 4500 }, { "epoch": 5.0, "step": 4580, "total_flos": 2.179329538942206e+19, "train_loss": 0.6123242174173547, "train_runtime": 13424.2744, "train_samples_per_second": 2.729, "train_steps_per_second": 0.341 } ], "logging_steps": 500, "max_steps": 4580, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.179329538942206e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }