{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 100, "global_step": 2370, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6329113924050633, "eval_loss": 3.940936803817749, "eval_runtime": 314.2197, "eval_samples_per_second": 8.045, "eval_steps_per_second": 1.006, "eval_wer": 1.0, "step": 100 }, { "epoch": 1.2658227848101267, "eval_loss": 3.044066905975342, "eval_runtime": 313.2217, "eval_samples_per_second": 8.071, "eval_steps_per_second": 1.009, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.8987341772151898, "eval_loss": 2.9164648056030273, "eval_runtime": 317.0806, "eval_samples_per_second": 7.973, "eval_steps_per_second": 0.997, "eval_wer": 1.0, "step": 300 }, { "epoch": 2.5316455696202533, "eval_loss": 1.4924770593643188, "eval_runtime": 316.4362, "eval_samples_per_second": 7.989, "eval_steps_per_second": 0.999, "eval_wer": 1.9968354430379747, "step": 400 }, { "epoch": 3.1645569620253164, "grad_norm": 0.8539223670959473, "learning_rate": 0.0002982, "loss": 3.7012, "step": 500 }, { "epoch": 3.1645569620253164, "eval_loss": 0.30101653933525085, "eval_runtime": 312.8326, "eval_samples_per_second": 8.081, "eval_steps_per_second": 1.01, "eval_wer": 1.9446202531645569, "step": 500 }, { "epoch": 3.7974683544303796, "eval_loss": 0.17126257717609406, "eval_runtime": 314.312, "eval_samples_per_second": 8.043, "eval_steps_per_second": 1.005, "eval_wer": 1.8259493670886076, "step": 600 }, { "epoch": 4.430379746835443, "eval_loss": 0.09897234290838242, "eval_runtime": 312.6958, "eval_samples_per_second": 8.085, "eval_steps_per_second": 1.011, "eval_wer": 1.6162974683544304, "step": 700 }, { "epoch": 5.063291139240507, "eval_loss": 0.06915320456027985, "eval_runtime": 317.6492, "eval_samples_per_second": 7.958, "eval_steps_per_second": 0.995, "eval_wer": 1.5439082278481013, "step": 800 }, { "epoch": 5.69620253164557, "eval_loss": 0.046260952949523926, "eval_runtime": 321.8259, "eval_samples_per_second": 7.855, "eval_steps_per_second": 0.982, "eval_wer": 1.423259493670886, "step": 900 }, { "epoch": 6.329113924050633, "grad_norm": 0.26903435587882996, "learning_rate": 0.00022026737967914436, "loss": 0.1686, "step": 1000 }, { "epoch": 6.329113924050633, "eval_loss": 0.038907092064619064, "eval_runtime": 320.4174, "eval_samples_per_second": 7.89, "eval_steps_per_second": 0.986, "eval_wer": 1.3469145569620253, "step": 1000 }, { "epoch": 6.962025316455696, "eval_loss": 0.029044821858406067, "eval_runtime": 320.649, "eval_samples_per_second": 7.884, "eval_steps_per_second": 0.986, "eval_wer": 1.3101265822784811, "step": 1100 }, { "epoch": 7.594936708860759, "eval_loss": 0.020351797342300415, "eval_runtime": 316.4006, "eval_samples_per_second": 7.99, "eval_steps_per_second": 0.999, "eval_wer": 1.1993670886075949, "step": 1200 }, { "epoch": 8.227848101265822, "eval_loss": 0.016085166484117508, "eval_runtime": 321.3591, "eval_samples_per_second": 7.867, "eval_steps_per_second": 0.983, "eval_wer": 1.1839398734177216, "step": 1300 }, { "epoch": 8.860759493670885, "eval_loss": 0.014270616695284843, "eval_runtime": 314.8714, "eval_samples_per_second": 8.029, "eval_steps_per_second": 1.004, "eval_wer": 1.1499208860759493, "step": 1400 }, { "epoch": 9.49367088607595, "grad_norm": 1.2659544944763184, "learning_rate": 0.00014005347593582887, "loss": 0.0553, "step": 1500 }, { "epoch": 9.49367088607595, "eval_loss": 0.011028471402823925, "eval_runtime": 316.4518, "eval_samples_per_second": 7.989, "eval_steps_per_second": 0.999, "eval_wer": 1.1459651898734178, "step": 1500 }, { "epoch": 10.126582278481013, "eval_loss": 0.008157163858413696, "eval_runtime": 315.7243, "eval_samples_per_second": 8.007, "eval_steps_per_second": 1.001, "eval_wer": 1.0953322784810127, "step": 1600 }, { "epoch": 10.759493670886076, "eval_loss": 0.008831300772726536, "eval_runtime": 315.0431, "eval_samples_per_second": 8.024, "eval_steps_per_second": 1.003, "eval_wer": 1.1119462025316456, "step": 1700 }, { "epoch": 11.39240506329114, "eval_loss": 0.005905392114073038, "eval_runtime": 319.3821, "eval_samples_per_second": 7.915, "eval_steps_per_second": 0.989, "eval_wer": 1.0573575949367089, "step": 1800 }, { "epoch": 12.025316455696203, "eval_loss": 0.005364276003092527, "eval_runtime": 316.4189, "eval_samples_per_second": 7.989, "eval_steps_per_second": 0.999, "eval_wer": 1.0510284810126582, "step": 1900 }, { "epoch": 12.658227848101266, "grad_norm": 0.34013208746910095, "learning_rate": 5.983957219251336e-05, "loss": 0.0295, "step": 2000 }, { "epoch": 12.658227848101266, "eval_loss": 0.004200654104351997, "eval_runtime": 316.6068, "eval_samples_per_second": 7.985, "eval_steps_per_second": 0.998, "eval_wer": 1.0356012658227849, "step": 2000 }, { "epoch": 13.291139240506329, "eval_loss": 0.003917561378329992, "eval_runtime": 317.6825, "eval_samples_per_second": 7.958, "eval_steps_per_second": 0.995, "eval_wer": 1.035996835443038, "step": 2100 }, { "epoch": 13.924050632911392, "eval_loss": 0.0033297832123935223, "eval_runtime": 317.8207, "eval_samples_per_second": 7.954, "eval_steps_per_second": 0.994, "eval_wer": 1.0268987341772151, "step": 2200 }, { "epoch": 14.556962025316455, "eval_loss": 0.0030845776200294495, "eval_runtime": 316.4074, "eval_samples_per_second": 7.99, "eval_steps_per_second": 0.999, "eval_wer": 1.0237341772151898, "step": 2300 }, { "epoch": 15.0, "step": 2370, "total_flos": 7.984243073297488e+18, "train_loss": 0.8373936769831533, "train_runtime": 15099.3203, "train_samples_per_second": 2.511, "train_steps_per_second": 0.157 } ], "logging_steps": 500, "max_steps": 2370, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.984243073297488e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }