|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 27.11864406779661, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 1.4595407247543335, |
|
"eval_runtime": 81.6295, |
|
"eval_samples_per_second": 25.665, |
|
"eval_steps_per_second": 3.21, |
|
"eval_wer": 1.0039260592180599, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0002982, |
|
"loss": 4.7778, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 0.8082281947135925, |
|
"eval_runtime": 80.8384, |
|
"eval_samples_per_second": 25.916, |
|
"eval_steps_per_second": 3.241, |
|
"eval_wer": 1.0115055346529254, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00028011999999999997, |
|
"loss": 0.6408, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_loss": 0.7031667828559875, |
|
"eval_runtime": 84.117, |
|
"eval_samples_per_second": 24.906, |
|
"eval_steps_per_second": 3.115, |
|
"eval_wer": 1.0078521184361198, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00026011999999999997, |
|
"loss": 0.3937, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_loss": 0.6889204382896423, |
|
"eval_runtime": 80.1903, |
|
"eval_samples_per_second": 26.125, |
|
"eval_steps_per_second": 3.267, |
|
"eval_wer": 1.0432957085991603, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00024011999999999997, |
|
"loss": 0.3, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_loss": 0.6820310354232788, |
|
"eval_runtime": 80.2232, |
|
"eval_samples_per_second": 26.115, |
|
"eval_steps_per_second": 3.266, |
|
"eval_wer": 1.0068706036316049, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"eval_loss": 0.6669920086860657, |
|
"eval_runtime": 79.89, |
|
"eval_samples_per_second": 26.224, |
|
"eval_steps_per_second": 3.28, |
|
"eval_wer": 1.0196302960902994, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.00022011999999999997, |
|
"loss": 0.226, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_loss": 0.7215595841407776, |
|
"eval_runtime": 80.0002, |
|
"eval_samples_per_second": 26.187, |
|
"eval_steps_per_second": 3.275, |
|
"eval_wer": 1.0422051365941436, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.00020012, |
|
"loss": 0.197, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"eval_loss": 0.7669464945793152, |
|
"eval_runtime": 80.1105, |
|
"eval_samples_per_second": 26.151, |
|
"eval_steps_per_second": 3.27, |
|
"eval_wer": 1.053383499645564, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.00018012, |
|
"loss": 0.165, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_loss": 0.7517344951629639, |
|
"eval_runtime": 79.716, |
|
"eval_samples_per_second": 26.281, |
|
"eval_steps_per_second": 3.287, |
|
"eval_wer": 1.0199574676918044, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.00016011999999999998, |
|
"loss": 0.1486, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"eval_loss": 0.7124771475791931, |
|
"eval_runtime": 79.8981, |
|
"eval_samples_per_second": 26.221, |
|
"eval_steps_per_second": 3.279, |
|
"eval_wer": 1.0357162331642946, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"eval_loss": 0.7447456121444702, |
|
"eval_runtime": 82.2103, |
|
"eval_samples_per_second": 25.483, |
|
"eval_steps_per_second": 3.187, |
|
"eval_wer": 1.0347347183597797, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 0.00014012, |
|
"loss": 0.122, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"eval_loss": 0.6899322271347046, |
|
"eval_runtime": 79.6624, |
|
"eval_samples_per_second": 26.298, |
|
"eval_steps_per_second": 3.289, |
|
"eval_wer": 1.0440045804024212, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 0.00012011999999999998, |
|
"loss": 0.1069, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"eval_loss": 0.7212241291999817, |
|
"eval_runtime": 80.3141, |
|
"eval_samples_per_second": 26.085, |
|
"eval_steps_per_second": 3.262, |
|
"eval_wer": 1.0350073613610338, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.00010011999999999998, |
|
"loss": 0.0961, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_loss": 0.7417359352111816, |
|
"eval_runtime": 80.0211, |
|
"eval_samples_per_second": 26.181, |
|
"eval_steps_per_second": 3.274, |
|
"eval_wer": 1.0408419215878728, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 8.012e-05, |
|
"loss": 0.086, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"eval_loss": 0.7402010560035706, |
|
"eval_runtime": 80.0522, |
|
"eval_samples_per_second": 26.17, |
|
"eval_steps_per_second": 3.273, |
|
"eval_wer": 1.0355526473635421, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"eval_loss": 0.7760854959487915, |
|
"eval_runtime": 80.138, |
|
"eval_samples_per_second": 26.142, |
|
"eval_steps_per_second": 3.269, |
|
"eval_wer": 1.0419870221931402, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 6.0119999999999994e-05, |
|
"loss": 0.0756, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 23.05, |
|
"eval_loss": 0.7345677614212036, |
|
"eval_runtime": 80.4841, |
|
"eval_samples_per_second": 26.03, |
|
"eval_steps_per_second": 3.255, |
|
"eval_wer": 1.036915862369813, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 4.012e-05, |
|
"loss": 0.0666, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"eval_loss": 0.7506045699119568, |
|
"eval_runtime": 82.6434, |
|
"eval_samples_per_second": 25.35, |
|
"eval_steps_per_second": 3.17, |
|
"eval_wer": 1.0449315666066852, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 2.0119999999999997e-05, |
|
"loss": 0.0595, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.76, |
|
"eval_loss": 0.7319227457046509, |
|
"eval_runtime": 79.8082, |
|
"eval_samples_per_second": 26.25, |
|
"eval_steps_per_second": 3.283, |
|
"eval_wer": 1.0476034680189759, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 1.6e-07, |
|
"loss": 0.054, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"eval_loss": 0.7346429228782654, |
|
"eval_runtime": 79.9162, |
|
"eval_samples_per_second": 26.215, |
|
"eval_steps_per_second": 3.278, |
|
"eval_wer": 1.0478761110202301, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"step": 8000, |
|
"total_flos": 1.592873144248711e+19, |
|
"train_loss": 0.46973063707351687, |
|
"train_runtime": 10600.4383, |
|
"train_samples_per_second": 12.075, |
|
"train_steps_per_second": 0.755 |
|
} |
|
], |
|
"max_steps": 8000, |
|
"num_train_epochs": 28, |
|
"total_flos": 1.592873144248711e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|