{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.949152542372882, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.34, "eval_loss": 4.256950378417969, "eval_runtime": 93.7021, "eval_samples_per_second": 22.358, "eval_steps_per_second": 2.796, "eval_wer": 0.9999454713997492, "step": 100 }, { "epoch": 0.68, "eval_loss": 3.3221993446350098, "eval_runtime": 95.7275, "eval_samples_per_second": 21.885, "eval_steps_per_second": 2.737, "eval_wer": 1.0005452860025084, "step": 200 }, { "epoch": 1.02, "eval_loss": 2.375530242919922, "eval_runtime": 93.1661, "eval_samples_per_second": 22.487, "eval_steps_per_second": 2.812, "eval_wer": 1.0, "step": 300 }, { "epoch": 1.36, "eval_loss": 1.407051682472229, "eval_runtime": 93.2199, "eval_samples_per_second": 22.474, "eval_steps_per_second": 2.811, "eval_wer": 1.0036534162168058, "step": 400 }, { "epoch": 1.69, "learning_rate": 0.00028143749999999995, "loss": 3.8379, "step": 500 }, { "epoch": 1.69, "eval_loss": 1.2223190069198608, "eval_runtime": 92.5816, "eval_samples_per_second": 22.629, "eval_steps_per_second": 2.83, "eval_wer": 1.009106276241889, "step": 500 }, { "epoch": 2.03, "eval_loss": 1.0884724855422974, "eval_runtime": 94.6436, "eval_samples_per_second": 22.136, "eval_steps_per_second": 2.768, "eval_wer": 1.0272643001254158, "step": 600 }, { "epoch": 2.37, "eval_loss": 0.9818190336227417, "eval_runtime": 92.6738, "eval_samples_per_second": 22.606, "eval_steps_per_second": 2.827, "eval_wer": 1.009760619444899, "step": 700 }, { "epoch": 2.71, "eval_loss": 0.9154127836227417, "eval_runtime": 93.1212, "eval_samples_per_second": 22.498, "eval_steps_per_second": 2.814, "eval_wer": 1.0109602486504172, "step": 800 }, { "epoch": 3.05, "eval_loss": 0.8868876695632935, "eval_runtime": 94.7308, "eval_samples_per_second": 22.115, "eval_steps_per_second": 2.766, "eval_wer": 1.0153770652707346, "step": 900 }, { "epoch": 3.39, "learning_rate": 0.0002501875, "loss": 0.5888, "step": 1000 }, { "epoch": 3.39, "eval_loss": 0.897603452205658, "eval_runtime": 92.6891, "eval_samples_per_second": 22.602, "eval_steps_per_second": 2.827, "eval_wer": 1.0148863078684771, "step": 1000 }, { "epoch": 3.73, "eval_loss": 0.8322562575340271, "eval_runtime": 91.8444, "eval_samples_per_second": 22.81, "eval_steps_per_second": 2.853, "eval_wer": 1.0124325208571896, "step": 1100 }, { "epoch": 4.07, "eval_loss": 0.8487809300422668, "eval_runtime": 92.7251, "eval_samples_per_second": 22.594, "eval_steps_per_second": 2.826, "eval_wer": 1.0086700474398822, "step": 1200 }, { "epoch": 4.41, "eval_loss": 0.8659328818321228, "eval_runtime": 91.959, "eval_samples_per_second": 22.782, "eval_steps_per_second": 2.849, "eval_wer": 1.0069251322318555, "step": 1300 }, { "epoch": 4.75, "eval_loss": 0.8611888289451599, "eval_runtime": 92.7084, "eval_samples_per_second": 22.598, "eval_steps_per_second": 2.826, "eval_wer": 1.0042532308195649, "step": 1400 }, { "epoch": 5.08, "learning_rate": 0.00021893749999999998, "loss": 0.3706, "step": 1500 }, { "epoch": 5.08, "eval_loss": 0.8300378918647766, "eval_runtime": 91.9965, "eval_samples_per_second": 22.773, "eval_steps_per_second": 2.848, "eval_wer": 1.0182670810840286, "step": 1500 }, { "epoch": 5.42, "eval_loss": 0.8417208194732666, "eval_runtime": 94.3323, "eval_samples_per_second": 22.209, "eval_steps_per_second": 2.777, "eval_wer": 1.009106276241889, "step": 1600 }, { "epoch": 5.76, "eval_loss": 0.8261289000511169, "eval_runtime": 91.9157, "eval_samples_per_second": 22.793, "eval_steps_per_second": 2.85, "eval_wer": 1.008724576040133, "step": 1700 }, { "epoch": 6.1, "eval_loss": 0.854759931564331, "eval_runtime": 92.0581, "eval_samples_per_second": 22.757, "eval_steps_per_second": 2.846, "eval_wer": 1.006761546431103, "step": 1800 }, { "epoch": 6.44, "eval_loss": 0.7983754873275757, "eval_runtime": 92.3841, "eval_samples_per_second": 22.677, "eval_steps_per_second": 2.836, "eval_wer": 1.011014777250668, "step": 1900 }, { "epoch": 6.78, "learning_rate": 0.00018768749999999997, "loss": 0.2671, "step": 2000 }, { "epoch": 6.78, "eval_loss": 0.8387603163719177, "eval_runtime": 92.0056, "eval_samples_per_second": 22.77, "eval_steps_per_second": 2.848, "eval_wer": 1.0116691204536779, "step": 2000 }, { "epoch": 7.12, "eval_loss": 0.8498700857162476, "eval_runtime": 91.7074, "eval_samples_per_second": 22.844, "eval_steps_per_second": 2.857, "eval_wer": 1.0072523038333605, "step": 2100 }, { "epoch": 7.46, "eval_loss": 0.8480040431022644, "eval_runtime": 92.1173, "eval_samples_per_second": 22.743, "eval_steps_per_second": 2.844, "eval_wer": 1.0112328916516713, "step": 2200 }, { "epoch": 7.8, "eval_loss": 0.7929303646087646, "eval_runtime": 92.2635, "eval_samples_per_second": 22.707, "eval_steps_per_second": 2.84, "eval_wer": 1.0098696766454005, "step": 2300 }, { "epoch": 8.14, "eval_loss": 0.8658961057662964, "eval_runtime": 93.603, "eval_samples_per_second": 22.382, "eval_steps_per_second": 2.799, "eval_wer": 1.0088881618408856, "step": 2400 }, { "epoch": 8.47, "learning_rate": 0.0001564375, "loss": 0.2017, "step": 2500 }, { "epoch": 8.47, "eval_loss": 0.8583062291145325, "eval_runtime": 96.9455, "eval_samples_per_second": 21.61, "eval_steps_per_second": 2.703, "eval_wer": 1.0062707890288456, "step": 2500 }, { "epoch": 8.81, "eval_loss": 0.8326291441917419, "eval_runtime": 93.0622, "eval_samples_per_second": 22.512, "eval_steps_per_second": 2.815, "eval_wer": 1.0109602486504172, "step": 2600 }, { "epoch": 9.15, "eval_loss": 0.8759247064590454, "eval_runtime": 92.8699, "eval_samples_per_second": 22.558, "eval_steps_per_second": 2.821, "eval_wer": 1.0036534162168058, "step": 2700 }, { "epoch": 9.49, "eval_loss": 0.85763019323349, "eval_runtime": 91.9599, "eval_samples_per_second": 22.782, "eval_steps_per_second": 2.849, "eval_wer": 1.009978733845902, "step": 2800 }, { "epoch": 9.83, "eval_loss": 0.8777465224266052, "eval_runtime": 92.008, "eval_samples_per_second": 22.77, "eval_steps_per_second": 2.848, "eval_wer": 1.0224112547030917, "step": 2900 }, { "epoch": 10.17, "learning_rate": 0.0001251875, "loss": 0.1682, "step": 3000 }, { "epoch": 10.17, "eval_loss": 0.886458694934845, "eval_runtime": 92.2385, "eval_samples_per_second": 22.713, "eval_steps_per_second": 2.84, "eval_wer": 1.0280277005289273, "step": 3000 }, { "epoch": 10.51, "eval_loss": 0.9213446378707886, "eval_runtime": 93.1739, "eval_samples_per_second": 22.485, "eval_steps_per_second": 2.812, "eval_wer": 1.006761546431103, "step": 3100 }, { "epoch": 10.85, "eval_loss": 0.8880829215049744, "eval_runtime": 93.2784, "eval_samples_per_second": 22.46, "eval_steps_per_second": 2.809, "eval_wer": 1.0151589508697312, "step": 3200 }, { "epoch": 11.19, "eval_loss": 0.9088767170906067, "eval_runtime": 93.7626, "eval_samples_per_second": 22.344, "eval_steps_per_second": 2.794, "eval_wer": 1.010033262446153, "step": 3300 }, { "epoch": 11.53, "eval_loss": 0.8973812460899353, "eval_runtime": 92.2784, "eval_samples_per_second": 22.703, "eval_steps_per_second": 2.839, "eval_wer": 1.0127051638584437, "step": 3400 }, { "epoch": 11.86, "learning_rate": 9.393749999999999e-05, "loss": 0.1347, "step": 3500 }, { "epoch": 11.86, "eval_loss": 0.9128761887550354, "eval_runtime": 92.834, "eval_samples_per_second": 22.567, "eval_steps_per_second": 2.822, "eval_wer": 1.012323463656688, "step": 3500 }, { "epoch": 12.2, "eval_loss": 0.9939002394676208, "eval_runtime": 91.782, "eval_samples_per_second": 22.826, "eval_steps_per_second": 2.855, "eval_wer": 1.0169038660777578, "step": 3600 }, { "epoch": 12.54, "eval_loss": 0.913511335849762, "eval_runtime": 93.2623, "eval_samples_per_second": 22.464, "eval_steps_per_second": 2.809, "eval_wer": 1.0083428758383772, "step": 3700 }, { "epoch": 12.88, "eval_loss": 0.9228624105453491, "eval_runtime": 92.6142, "eval_samples_per_second": 22.621, "eval_steps_per_second": 2.829, "eval_wer": 1.0118327062544306, "step": 3800 }, { "epoch": 13.22, "eval_loss": 0.9609713554382324, "eval_runtime": 91.8115, "eval_samples_per_second": 22.818, "eval_steps_per_second": 2.854, "eval_wer": 1.010687605649163, "step": 3900 }, { "epoch": 13.56, "learning_rate": 6.26875e-05, "loss": 0.1049, "step": 4000 }, { "epoch": 13.56, "eval_loss": 0.9235594868659973, "eval_runtime": 92.7509, "eval_samples_per_second": 22.587, "eval_steps_per_second": 2.825, "eval_wer": 1.0098696766454005, "step": 4000 }, { "epoch": 13.9, "eval_loss": 0.8966746926307678, "eval_runtime": 94.847, "eval_samples_per_second": 22.088, "eval_steps_per_second": 2.762, "eval_wer": 1.0084519330388788, "step": 4100 }, { "epoch": 14.24, "eval_loss": 0.8979936242103577, "eval_runtime": 92.5697, "eval_samples_per_second": 22.632, "eval_steps_per_second": 2.83, "eval_wer": 1.0081247614373738, "step": 4200 }, { "epoch": 14.58, "eval_loss": 0.9023324251174927, "eval_runtime": 91.8786, "eval_samples_per_second": 22.802, "eval_steps_per_second": 2.852, "eval_wer": 1.0081247614373738, "step": 4300 }, { "epoch": 14.92, "eval_loss": 0.9215817451477051, "eval_runtime": 92.0777, "eval_samples_per_second": 22.753, "eval_steps_per_second": 2.845, "eval_wer": 1.0078521184361198, "step": 4400 }, { "epoch": 15.25, "learning_rate": 3.14375e-05, "loss": 0.0917, "step": 4500 }, { "epoch": 15.25, "eval_loss": 0.9442654252052307, "eval_runtime": 92.8739, "eval_samples_per_second": 22.557, "eval_steps_per_second": 2.821, "eval_wer": 1.0089972190413872, "step": 4500 }, { "epoch": 15.59, "eval_loss": 0.9389934539794922, "eval_runtime": 91.9159, "eval_samples_per_second": 22.793, "eval_steps_per_second": 2.85, "eval_wer": 1.009051747641638, "step": 4600 }, { "epoch": 15.93, "eval_loss": 0.9153040647506714, "eval_runtime": 92.1522, "eval_samples_per_second": 22.734, "eval_steps_per_second": 2.843, "eval_wer": 1.0082883472381263, "step": 4700 }, { "epoch": 16.27, "eval_loss": 0.9189886450767517, "eval_runtime": 92.2125, "eval_samples_per_second": 22.719, "eval_steps_per_second": 2.841, "eval_wer": 1.0091608048421397, "step": 4800 }, { "epoch": 16.61, "eval_loss": 0.9194196462631226, "eval_runtime": 92.6007, "eval_samples_per_second": 22.624, "eval_steps_per_second": 2.829, "eval_wer": 1.009051747641638, "step": 4900 }, { "epoch": 16.95, "learning_rate": 1.8749999999999998e-07, "loss": 0.0786, "step": 5000 }, { "epoch": 16.95, "eval_loss": 0.9201112985610962, "eval_runtime": 91.9659, "eval_samples_per_second": 22.78, "eval_steps_per_second": 2.849, "eval_wer": 1.009106276241889, "step": 5000 }, { "epoch": 16.95, "step": 5000, "total_flos": 1.0048485919914449e+19, "train_loss": 0.584431241607666, "train_runtime": 8680.4794, "train_samples_per_second": 9.216, "train_steps_per_second": 0.576 } ], "max_steps": 5000, "num_train_epochs": 17, "total_flos": 1.0048485919914449e+19, "trial_name": null, "trial_params": null }