|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 16.949152542372882, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 4.256950378417969, |
|
"eval_runtime": 93.7021, |
|
"eval_samples_per_second": 22.358, |
|
"eval_steps_per_second": 2.796, |
|
"eval_wer": 0.9999454713997492, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 3.3221993446350098, |
|
"eval_runtime": 95.7275, |
|
"eval_samples_per_second": 21.885, |
|
"eval_steps_per_second": 2.737, |
|
"eval_wer": 1.0005452860025084, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.375530242919922, |
|
"eval_runtime": 93.1661, |
|
"eval_samples_per_second": 22.487, |
|
"eval_steps_per_second": 2.812, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 1.407051682472229, |
|
"eval_runtime": 93.2199, |
|
"eval_samples_per_second": 22.474, |
|
"eval_steps_per_second": 2.811, |
|
"eval_wer": 1.0036534162168058, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00028143749999999995, |
|
"loss": 3.8379, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 1.2223190069198608, |
|
"eval_runtime": 92.5816, |
|
"eval_samples_per_second": 22.629, |
|
"eval_steps_per_second": 2.83, |
|
"eval_wer": 1.009106276241889, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 1.0884724855422974, |
|
"eval_runtime": 94.6436, |
|
"eval_samples_per_second": 22.136, |
|
"eval_steps_per_second": 2.768, |
|
"eval_wer": 1.0272643001254158, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.9818190336227417, |
|
"eval_runtime": 92.6738, |
|
"eval_samples_per_second": 22.606, |
|
"eval_steps_per_second": 2.827, |
|
"eval_wer": 1.009760619444899, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 0.9154127836227417, |
|
"eval_runtime": 93.1212, |
|
"eval_samples_per_second": 22.498, |
|
"eval_steps_per_second": 2.814, |
|
"eval_wer": 1.0109602486504172, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 0.8868876695632935, |
|
"eval_runtime": 94.7308, |
|
"eval_samples_per_second": 22.115, |
|
"eval_steps_per_second": 2.766, |
|
"eval_wer": 1.0153770652707346, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0002501875, |
|
"loss": 0.5888, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 0.897603452205658, |
|
"eval_runtime": 92.6891, |
|
"eval_samples_per_second": 22.602, |
|
"eval_steps_per_second": 2.827, |
|
"eval_wer": 1.0148863078684771, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 0.8322562575340271, |
|
"eval_runtime": 91.8444, |
|
"eval_samples_per_second": 22.81, |
|
"eval_steps_per_second": 2.853, |
|
"eval_wer": 1.0124325208571896, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_loss": 0.8487809300422668, |
|
"eval_runtime": 92.7251, |
|
"eval_samples_per_second": 22.594, |
|
"eval_steps_per_second": 2.826, |
|
"eval_wer": 1.0086700474398822, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_loss": 0.8659328818321228, |
|
"eval_runtime": 91.959, |
|
"eval_samples_per_second": 22.782, |
|
"eval_steps_per_second": 2.849, |
|
"eval_wer": 1.0069251322318555, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 0.8611888289451599, |
|
"eval_runtime": 92.7084, |
|
"eval_samples_per_second": 22.598, |
|
"eval_steps_per_second": 2.826, |
|
"eval_wer": 1.0042532308195649, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00021893749999999998, |
|
"loss": 0.3706, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 0.8300378918647766, |
|
"eval_runtime": 91.9965, |
|
"eval_samples_per_second": 22.773, |
|
"eval_steps_per_second": 2.848, |
|
"eval_wer": 1.0182670810840286, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_loss": 0.8417208194732666, |
|
"eval_runtime": 94.3323, |
|
"eval_samples_per_second": 22.209, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 1.009106276241889, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_loss": 0.8261289000511169, |
|
"eval_runtime": 91.9157, |
|
"eval_samples_per_second": 22.793, |
|
"eval_steps_per_second": 2.85, |
|
"eval_wer": 1.008724576040133, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 0.854759931564331, |
|
"eval_runtime": 92.0581, |
|
"eval_samples_per_second": 22.757, |
|
"eval_steps_per_second": 2.846, |
|
"eval_wer": 1.006761546431103, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"eval_loss": 0.7983754873275757, |
|
"eval_runtime": 92.3841, |
|
"eval_samples_per_second": 22.677, |
|
"eval_steps_per_second": 2.836, |
|
"eval_wer": 1.011014777250668, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00018768749999999997, |
|
"loss": 0.2671, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_loss": 0.8387603163719177, |
|
"eval_runtime": 92.0056, |
|
"eval_samples_per_second": 22.77, |
|
"eval_steps_per_second": 2.848, |
|
"eval_wer": 1.0116691204536779, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_loss": 0.8498700857162476, |
|
"eval_runtime": 91.7074, |
|
"eval_samples_per_second": 22.844, |
|
"eval_steps_per_second": 2.857, |
|
"eval_wer": 1.0072523038333605, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"eval_loss": 0.8480040431022644, |
|
"eval_runtime": 92.1173, |
|
"eval_samples_per_second": 22.743, |
|
"eval_steps_per_second": 2.844, |
|
"eval_wer": 1.0112328916516713, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_loss": 0.7929303646087646, |
|
"eval_runtime": 92.2635, |
|
"eval_samples_per_second": 22.707, |
|
"eval_steps_per_second": 2.84, |
|
"eval_wer": 1.0098696766454005, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"eval_loss": 0.8658961057662964, |
|
"eval_runtime": 93.603, |
|
"eval_samples_per_second": 22.382, |
|
"eval_steps_per_second": 2.799, |
|
"eval_wer": 1.0088881618408856, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.0001564375, |
|
"loss": 0.2017, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"eval_loss": 0.8583062291145325, |
|
"eval_runtime": 96.9455, |
|
"eval_samples_per_second": 21.61, |
|
"eval_steps_per_second": 2.703, |
|
"eval_wer": 1.0062707890288456, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"eval_loss": 0.8326291441917419, |
|
"eval_runtime": 93.0622, |
|
"eval_samples_per_second": 22.512, |
|
"eval_steps_per_second": 2.815, |
|
"eval_wer": 1.0109602486504172, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"eval_loss": 0.8759247064590454, |
|
"eval_runtime": 92.8699, |
|
"eval_samples_per_second": 22.558, |
|
"eval_steps_per_second": 2.821, |
|
"eval_wer": 1.0036534162168058, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_loss": 0.85763019323349, |
|
"eval_runtime": 91.9599, |
|
"eval_samples_per_second": 22.782, |
|
"eval_steps_per_second": 2.849, |
|
"eval_wer": 1.009978733845902, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"eval_loss": 0.8777465224266052, |
|
"eval_runtime": 92.008, |
|
"eval_samples_per_second": 22.77, |
|
"eval_steps_per_second": 2.848, |
|
"eval_wer": 1.0224112547030917, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.0001251875, |
|
"loss": 0.1682, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"eval_loss": 0.886458694934845, |
|
"eval_runtime": 92.2385, |
|
"eval_samples_per_second": 22.713, |
|
"eval_steps_per_second": 2.84, |
|
"eval_wer": 1.0280277005289273, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"eval_loss": 0.9213446378707886, |
|
"eval_runtime": 93.1739, |
|
"eval_samples_per_second": 22.485, |
|
"eval_steps_per_second": 2.812, |
|
"eval_wer": 1.006761546431103, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"eval_loss": 0.8880829215049744, |
|
"eval_runtime": 93.2784, |
|
"eval_samples_per_second": 22.46, |
|
"eval_steps_per_second": 2.809, |
|
"eval_wer": 1.0151589508697312, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"eval_loss": 0.9088767170906067, |
|
"eval_runtime": 93.7626, |
|
"eval_samples_per_second": 22.344, |
|
"eval_steps_per_second": 2.794, |
|
"eval_wer": 1.010033262446153, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"eval_loss": 0.8973812460899353, |
|
"eval_runtime": 92.2784, |
|
"eval_samples_per_second": 22.703, |
|
"eval_steps_per_second": 2.839, |
|
"eval_wer": 1.0127051638584437, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 9.393749999999999e-05, |
|
"loss": 0.1347, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"eval_loss": 0.9128761887550354, |
|
"eval_runtime": 92.834, |
|
"eval_samples_per_second": 22.567, |
|
"eval_steps_per_second": 2.822, |
|
"eval_wer": 1.012323463656688, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_loss": 0.9939002394676208, |
|
"eval_runtime": 91.782, |
|
"eval_samples_per_second": 22.826, |
|
"eval_steps_per_second": 2.855, |
|
"eval_wer": 1.0169038660777578, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"eval_loss": 0.913511335849762, |
|
"eval_runtime": 93.2623, |
|
"eval_samples_per_second": 22.464, |
|
"eval_steps_per_second": 2.809, |
|
"eval_wer": 1.0083428758383772, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"eval_loss": 0.9228624105453491, |
|
"eval_runtime": 92.6142, |
|
"eval_samples_per_second": 22.621, |
|
"eval_steps_per_second": 2.829, |
|
"eval_wer": 1.0118327062544306, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"eval_loss": 0.9609713554382324, |
|
"eval_runtime": 91.8115, |
|
"eval_samples_per_second": 22.818, |
|
"eval_steps_per_second": 2.854, |
|
"eval_wer": 1.010687605649163, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 6.26875e-05, |
|
"loss": 0.1049, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"eval_loss": 0.9235594868659973, |
|
"eval_runtime": 92.7509, |
|
"eval_samples_per_second": 22.587, |
|
"eval_steps_per_second": 2.825, |
|
"eval_wer": 1.0098696766454005, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"eval_loss": 0.8966746926307678, |
|
"eval_runtime": 94.847, |
|
"eval_samples_per_second": 22.088, |
|
"eval_steps_per_second": 2.762, |
|
"eval_wer": 1.0084519330388788, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"eval_loss": 0.8979936242103577, |
|
"eval_runtime": 92.5697, |
|
"eval_samples_per_second": 22.632, |
|
"eval_steps_per_second": 2.83, |
|
"eval_wer": 1.0081247614373738, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"eval_loss": 0.9023324251174927, |
|
"eval_runtime": 91.8786, |
|
"eval_samples_per_second": 22.802, |
|
"eval_steps_per_second": 2.852, |
|
"eval_wer": 1.0081247614373738, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"eval_loss": 0.9215817451477051, |
|
"eval_runtime": 92.0777, |
|
"eval_samples_per_second": 22.753, |
|
"eval_steps_per_second": 2.845, |
|
"eval_wer": 1.0078521184361198, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 3.14375e-05, |
|
"loss": 0.0917, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"eval_loss": 0.9442654252052307, |
|
"eval_runtime": 92.8739, |
|
"eval_samples_per_second": 22.557, |
|
"eval_steps_per_second": 2.821, |
|
"eval_wer": 1.0089972190413872, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"eval_loss": 0.9389934539794922, |
|
"eval_runtime": 91.9159, |
|
"eval_samples_per_second": 22.793, |
|
"eval_steps_per_second": 2.85, |
|
"eval_wer": 1.009051747641638, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"eval_loss": 0.9153040647506714, |
|
"eval_runtime": 92.1522, |
|
"eval_samples_per_second": 22.734, |
|
"eval_steps_per_second": 2.843, |
|
"eval_wer": 1.0082883472381263, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"eval_loss": 0.9189886450767517, |
|
"eval_runtime": 92.2125, |
|
"eval_samples_per_second": 22.719, |
|
"eval_steps_per_second": 2.841, |
|
"eval_wer": 1.0091608048421397, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"eval_loss": 0.9194196462631226, |
|
"eval_runtime": 92.6007, |
|
"eval_samples_per_second": 22.624, |
|
"eval_steps_per_second": 2.829, |
|
"eval_wer": 1.009051747641638, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 1.8749999999999998e-07, |
|
"loss": 0.0786, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_loss": 0.9201112985610962, |
|
"eval_runtime": 91.9659, |
|
"eval_samples_per_second": 22.78, |
|
"eval_steps_per_second": 2.849, |
|
"eval_wer": 1.009106276241889, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"step": 5000, |
|
"total_flos": 1.0048485919914449e+19, |
|
"train_loss": 0.584431241607666, |
|
"train_runtime": 8680.4794, |
|
"train_samples_per_second": 9.216, |
|
"train_steps_per_second": 0.576 |
|
} |
|
], |
|
"max_steps": 5000, |
|
"num_train_epochs": 17, |
|
"total_flos": 1.0048485919914449e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|