{ "best_metric": 0.43837225437164307, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bigcgen-male-10hrs-model/checkpoint-1900", "epoch": 3.560371517027864, "eval_steps": 100, "global_step": 2300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15479876160990713, "grad_norm": 5.460351943969727, "learning_rate": 0.00028799999999999995, "loss": 12.1631, "step": 100 }, { "epoch": 0.15479876160990713, "eval_loss": 1.2138845920562744, "eval_runtime": 30.7834, "eval_samples_per_second": 14.326, "eval_steps_per_second": 3.606, "eval_wer": 0.8800192353931233, "step": 100 }, { "epoch": 0.30959752321981426, "grad_norm": 9.577868461608887, "learning_rate": 0.00029850622406639004, "loss": 1.7593, "step": 200 }, { "epoch": 0.30959752321981426, "eval_loss": 0.6127354502677917, "eval_runtime": 30.6448, "eval_samples_per_second": 14.391, "eval_steps_per_second": 3.622, "eval_wer": 0.5688867516229863, "step": 200 }, { "epoch": 0.46439628482972134, "grad_norm": 3.2317442893981934, "learning_rate": 0.00029696576763485477, "loss": 1.6361, "step": 300 }, { "epoch": 0.46439628482972134, "eval_loss": 0.592118501663208, "eval_runtime": 30.6328, "eval_samples_per_second": 14.396, "eval_steps_per_second": 3.624, "eval_wer": 0.5732147150757394, "step": 300 }, { "epoch": 0.6191950464396285, "grad_norm": 2.212291955947876, "learning_rate": 0.0002954097510373444, "loss": 1.6146, "step": 400 }, { "epoch": 0.6191950464396285, "eval_loss": 0.5587197542190552, "eval_runtime": 30.5318, "eval_samples_per_second": 14.444, "eval_steps_per_second": 3.636, "eval_wer": 0.5547006491945179, "step": 400 }, { "epoch": 0.7739938080495357, "grad_norm": 36.353111267089844, "learning_rate": 0.000293853734439834, "loss": 1.3783, "step": 500 }, { "epoch": 0.7739938080495357, "eval_loss": 0.5519681572914124, "eval_runtime": 30.9645, "eval_samples_per_second": 14.242, "eval_steps_per_second": 3.585, "eval_wer": 0.5313777350324598, "step": 500 }, { "epoch": 0.9287925696594427, "grad_norm": 10.191967010498047, "learning_rate": 0.0002922977178423236, "loss": 1.36, "step": 600 }, { "epoch": 0.9287925696594427, "eval_loss": 0.5443902611732483, "eval_runtime": 30.4904, "eval_samples_per_second": 14.464, "eval_steps_per_second": 3.64, "eval_wer": 0.5277710988218322, "step": 600 }, { "epoch": 1.08359133126935, "grad_norm": 2.354079246520996, "learning_rate": 0.00029074170124481326, "loss": 1.3447, "step": 700 }, { "epoch": 1.08359133126935, "eval_loss": 0.5393995046615601, "eval_runtime": 30.4654, "eval_samples_per_second": 14.475, "eval_steps_per_second": 3.643, "eval_wer": 0.5126232267371964, "step": 700 }, { "epoch": 1.238390092879257, "grad_norm": 22.778234481811523, "learning_rate": 0.0002891856846473029, "loss": 1.3265, "step": 800 }, { "epoch": 1.238390092879257, "eval_loss": 0.5084973573684692, "eval_runtime": 30.5735, "eval_samples_per_second": 14.424, "eval_steps_per_second": 3.631, "eval_wer": 0.5027650877614811, "step": 800 }, { "epoch": 1.3931888544891642, "grad_norm": 1.102784276008606, "learning_rate": 0.00028762966804979253, "loss": 1.2625, "step": 900 }, { "epoch": 1.3931888544891642, "eval_loss": 0.4822230637073517, "eval_runtime": 30.9, "eval_samples_per_second": 14.272, "eval_steps_per_second": 3.592, "eval_wer": 0.5008415484491464, "step": 900 }, { "epoch": 1.5479876160990713, "grad_norm": 28.76813316345215, "learning_rate": 0.00028607365145228217, "loss": 1.2793, "step": 1000 }, { "epoch": 1.5479876160990713, "eval_loss": 0.5092038512229919, "eval_runtime": 30.5673, "eval_samples_per_second": 14.427, "eval_steps_per_second": 3.631, "eval_wer": 0.5037268574176484, "step": 1000 }, { "epoch": 1.7027863777089784, "grad_norm": 2.1807544231414795, "learning_rate": 0.00028453319502074685, "loss": 1.266, "step": 1100 }, { "epoch": 1.7027863777089784, "eval_loss": 0.4713282585144043, "eval_runtime": 30.5711, "eval_samples_per_second": 14.425, "eval_steps_per_second": 3.631, "eval_wer": 0.49579225775426783, "step": 1100 }, { "epoch": 1.8575851393188856, "grad_norm": 1.2101130485534668, "learning_rate": 0.0002829771784232365, "loss": 1.2451, "step": 1200 }, { "epoch": 1.8575851393188856, "eval_loss": 0.45441821217536926, "eval_runtime": 30.4675, "eval_samples_per_second": 14.474, "eval_steps_per_second": 3.643, "eval_wer": 0.4779995191151719, "step": 1200 }, { "epoch": 2.0123839009287927, "grad_norm": 5.769093036651611, "learning_rate": 0.0002814211618257261, "loss": 1.3066, "step": 1300 }, { "epoch": 2.0123839009287927, "eval_loss": 0.4491290748119354, "eval_runtime": 30.9343, "eval_samples_per_second": 14.256, "eval_steps_per_second": 3.588, "eval_wer": 0.47367155566241886, "step": 1300 }, { "epoch": 2.1671826625387, "grad_norm": 10.747196197509766, "learning_rate": 0.00027986514522821576, "loss": 1.2102, "step": 1400 }, { "epoch": 2.1671826625387, "eval_loss": 0.4510246217250824, "eval_runtime": 30.8465, "eval_samples_per_second": 14.297, "eval_steps_per_second": 3.598, "eval_wer": 0.4784804039432556, "step": 1400 }, { "epoch": 2.321981424148607, "grad_norm": 1.3166229724884033, "learning_rate": 0.0002783091286307054, "loss": 1.2384, "step": 1500 }, { "epoch": 2.321981424148607, "eval_loss": 0.4534485936164856, "eval_runtime": 30.6292, "eval_samples_per_second": 14.398, "eval_steps_per_second": 3.624, "eval_wer": 0.47559509497475355, "step": 1500 }, { "epoch": 2.476780185758514, "grad_norm": 1.1447139978408813, "learning_rate": 0.00027675311203319503, "loss": 1.2143, "step": 1600 }, { "epoch": 2.476780185758514, "eval_loss": 0.4538140892982483, "eval_runtime": 30.689, "eval_samples_per_second": 14.37, "eval_steps_per_second": 3.617, "eval_wer": 0.473431113248377, "step": 1600 }, { "epoch": 2.6315789473684212, "grad_norm": 1.3231621980667114, "learning_rate": 0.0002751970954356846, "loss": 1.0998, "step": 1700 }, { "epoch": 2.6315789473684212, "eval_loss": 0.44723573327064514, "eval_runtime": 31.1806, "eval_samples_per_second": 14.143, "eval_steps_per_second": 3.56, "eval_wer": 0.4683818225534984, "step": 1700 }, { "epoch": 2.7863777089783284, "grad_norm": 2.1241629123687744, "learning_rate": 0.00027364107883817425, "loss": 1.0608, "step": 1800 }, { "epoch": 2.7863777089783284, "eval_loss": 0.4533312916755676, "eval_runtime": 31.1676, "eval_samples_per_second": 14.149, "eval_steps_per_second": 3.561, "eval_wer": 0.461649434960327, "step": 1800 }, { "epoch": 2.9411764705882355, "grad_norm": 1.158493161201477, "learning_rate": 0.0002720850622406639, "loss": 1.1756, "step": 1900 }, { "epoch": 2.9411764705882355, "eval_loss": 0.43837225437164307, "eval_runtime": 31.1732, "eval_samples_per_second": 14.147, "eval_steps_per_second": 3.561, "eval_wer": 0.46140899254628515, "step": 1900 }, { "epoch": 3.0959752321981426, "grad_norm": 1.2850475311279297, "learning_rate": 0.0002705290456431535, "loss": 1.0873, "step": 2000 }, { "epoch": 3.0959752321981426, "eval_loss": 0.4458366930484772, "eval_runtime": 31.0493, "eval_samples_per_second": 14.203, "eval_steps_per_second": 3.575, "eval_wer": 0.46381341668670356, "step": 2000 }, { "epoch": 3.2507739938080498, "grad_norm": 0.9354444146156311, "learning_rate": 0.00026897302904564316, "loss": 1.0788, "step": 2100 }, { "epoch": 3.2507739938080498, "eval_loss": 0.44000744819641113, "eval_runtime": 31.515, "eval_samples_per_second": 13.993, "eval_steps_per_second": 3.522, "eval_wer": 0.45732147150757396, "step": 2100 }, { "epoch": 3.405572755417957, "grad_norm": 2.473445177078247, "learning_rate": 0.0002674170124481328, "loss": 1.1188, "step": 2200 }, { "epoch": 3.405572755417957, "eval_loss": 0.44116678833961487, "eval_runtime": 31.1851, "eval_samples_per_second": 14.141, "eval_steps_per_second": 3.559, "eval_wer": 0.4650156287569127, "step": 2200 }, { "epoch": 3.560371517027864, "grad_norm": 5.2535858154296875, "learning_rate": 0.0002658609958506224, "loss": 1.2589, "step": 2300 }, { "epoch": 3.560371517027864, "eval_loss": 0.44131311774253845, "eval_runtime": 30.9569, "eval_samples_per_second": 14.246, "eval_steps_per_second": 3.586, "eval_wer": 0.4553979321952392, "step": 2300 }, { "epoch": 3.560371517027864, "step": 2300, "total_flos": 1.198392334375229e+19, "train_loss": 1.7602186650815217, "train_runtime": 3322.857, "train_samples_per_second": 46.641, "train_steps_per_second": 5.832 } ], "logging_steps": 100, "max_steps": 19380, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.198392334375229e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }