|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 100, |
|
"global_step": 2370, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6329113924050633, |
|
"eval_loss": 3.940936803817749, |
|
"eval_runtime": 314.2197, |
|
"eval_samples_per_second": 8.045, |
|
"eval_steps_per_second": 1.006, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2658227848101267, |
|
"eval_loss": 3.044066905975342, |
|
"eval_runtime": 313.2217, |
|
"eval_samples_per_second": 8.071, |
|
"eval_steps_per_second": 1.009, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.8987341772151898, |
|
"eval_loss": 2.9164648056030273, |
|
"eval_runtime": 317.0806, |
|
"eval_samples_per_second": 7.973, |
|
"eval_steps_per_second": 0.997, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.5316455696202533, |
|
"eval_loss": 1.4924770593643188, |
|
"eval_runtime": 316.4362, |
|
"eval_samples_per_second": 7.989, |
|
"eval_steps_per_second": 0.999, |
|
"eval_wer": 1.9968354430379747, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.1645569620253164, |
|
"grad_norm": 0.8539223670959473, |
|
"learning_rate": 0.0002982, |
|
"loss": 3.7012, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.1645569620253164, |
|
"eval_loss": 0.30101653933525085, |
|
"eval_runtime": 312.8326, |
|
"eval_samples_per_second": 8.081, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wer": 1.9446202531645569, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.7974683544303796, |
|
"eval_loss": 0.17126257717609406, |
|
"eval_runtime": 314.312, |
|
"eval_samples_per_second": 8.043, |
|
"eval_steps_per_second": 1.005, |
|
"eval_wer": 1.8259493670886076, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.430379746835443, |
|
"eval_loss": 0.09897234290838242, |
|
"eval_runtime": 312.6958, |
|
"eval_samples_per_second": 8.085, |
|
"eval_steps_per_second": 1.011, |
|
"eval_wer": 1.6162974683544304, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.063291139240507, |
|
"eval_loss": 0.06915320456027985, |
|
"eval_runtime": 317.6492, |
|
"eval_samples_per_second": 7.958, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 1.5439082278481013, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.69620253164557, |
|
"eval_loss": 0.046260952949523926, |
|
"eval_runtime": 321.8259, |
|
"eval_samples_per_second": 7.855, |
|
"eval_steps_per_second": 0.982, |
|
"eval_wer": 1.423259493670886, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.329113924050633, |
|
"grad_norm": 0.26903435587882996, |
|
"learning_rate": 0.00022026737967914436, |
|
"loss": 0.1686, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.329113924050633, |
|
"eval_loss": 0.038907092064619064, |
|
"eval_runtime": 320.4174, |
|
"eval_samples_per_second": 7.89, |
|
"eval_steps_per_second": 0.986, |
|
"eval_wer": 1.3469145569620253, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.962025316455696, |
|
"eval_loss": 0.029044821858406067, |
|
"eval_runtime": 320.649, |
|
"eval_samples_per_second": 7.884, |
|
"eval_steps_per_second": 0.986, |
|
"eval_wer": 1.3101265822784811, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.594936708860759, |
|
"eval_loss": 0.020351797342300415, |
|
"eval_runtime": 316.4006, |
|
"eval_samples_per_second": 7.99, |
|
"eval_steps_per_second": 0.999, |
|
"eval_wer": 1.1993670886075949, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.227848101265822, |
|
"eval_loss": 0.016085166484117508, |
|
"eval_runtime": 321.3591, |
|
"eval_samples_per_second": 7.867, |
|
"eval_steps_per_second": 0.983, |
|
"eval_wer": 1.1839398734177216, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.860759493670885, |
|
"eval_loss": 0.014270616695284843, |
|
"eval_runtime": 314.8714, |
|
"eval_samples_per_second": 8.029, |
|
"eval_steps_per_second": 1.004, |
|
"eval_wer": 1.1499208860759493, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.49367088607595, |
|
"grad_norm": 1.2659544944763184, |
|
"learning_rate": 0.00014005347593582887, |
|
"loss": 0.0553, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.49367088607595, |
|
"eval_loss": 0.011028471402823925, |
|
"eval_runtime": 316.4518, |
|
"eval_samples_per_second": 7.989, |
|
"eval_steps_per_second": 0.999, |
|
"eval_wer": 1.1459651898734178, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.126582278481013, |
|
"eval_loss": 0.008157163858413696, |
|
"eval_runtime": 315.7243, |
|
"eval_samples_per_second": 8.007, |
|
"eval_steps_per_second": 1.001, |
|
"eval_wer": 1.0953322784810127, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.759493670886076, |
|
"eval_loss": 0.008831300772726536, |
|
"eval_runtime": 315.0431, |
|
"eval_samples_per_second": 8.024, |
|
"eval_steps_per_second": 1.003, |
|
"eval_wer": 1.1119462025316456, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 11.39240506329114, |
|
"eval_loss": 0.005905392114073038, |
|
"eval_runtime": 319.3821, |
|
"eval_samples_per_second": 7.915, |
|
"eval_steps_per_second": 0.989, |
|
"eval_wer": 1.0573575949367089, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 12.025316455696203, |
|
"eval_loss": 0.005364276003092527, |
|
"eval_runtime": 316.4189, |
|
"eval_samples_per_second": 7.989, |
|
"eval_steps_per_second": 0.999, |
|
"eval_wer": 1.0510284810126582, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 12.658227848101266, |
|
"grad_norm": 0.34013208746910095, |
|
"learning_rate": 5.983957219251336e-05, |
|
"loss": 0.0295, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.658227848101266, |
|
"eval_loss": 0.004200654104351997, |
|
"eval_runtime": 316.6068, |
|
"eval_samples_per_second": 7.985, |
|
"eval_steps_per_second": 0.998, |
|
"eval_wer": 1.0356012658227849, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.291139240506329, |
|
"eval_loss": 0.003917561378329992, |
|
"eval_runtime": 317.6825, |
|
"eval_samples_per_second": 7.958, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 1.035996835443038, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 13.924050632911392, |
|
"eval_loss": 0.0033297832123935223, |
|
"eval_runtime": 317.8207, |
|
"eval_samples_per_second": 7.954, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 1.0268987341772151, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 14.556962025316455, |
|
"eval_loss": 0.0030845776200294495, |
|
"eval_runtime": 316.4074, |
|
"eval_samples_per_second": 7.99, |
|
"eval_steps_per_second": 0.999, |
|
"eval_wer": 1.0237341772151898, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 2370, |
|
"total_flos": 7.984243073297488e+18, |
|
"train_loss": 0.8373936769831533, |
|
"train_runtime": 15099.3203, |
|
"train_samples_per_second": 2.511, |
|
"train_steps_per_second": 0.157 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2370, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.984243073297488e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|