|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 2100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 4.132408142089844, |
|
"eval_runtime": 7.7892, |
|
"eval_samples_per_second": 33.893, |
|
"eval_steps_per_second": 4.237, |
|
"eval_wer": 1.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 3.357618570327759, |
|
"eval_runtime": 7.7699, |
|
"eval_samples_per_second": 33.977, |
|
"eval_steps_per_second": 4.247, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 3.0935420989990234, |
|
"eval_runtime": 8.3088, |
|
"eval_samples_per_second": 31.773, |
|
"eval_steps_per_second": 3.972, |
|
"eval_wer": 1.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 3.000765085220337, |
|
"eval_runtime": 7.8476, |
|
"eval_samples_per_second": 33.641, |
|
"eval_steps_per_second": 4.205, |
|
"eval_wer": 1.2494669509594882, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 2.8665499687194824, |
|
"eval_runtime": 7.9301, |
|
"eval_samples_per_second": 33.291, |
|
"eval_steps_per_second": 4.161, |
|
"eval_wer": 1.0874200426439233, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 2.7977161407470703, |
|
"eval_runtime": 7.8406, |
|
"eval_samples_per_second": 33.671, |
|
"eval_steps_per_second": 4.209, |
|
"eval_wer": 1.537313432835821, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 2.618842601776123, |
|
"eval_runtime": 8.0288, |
|
"eval_samples_per_second": 32.882, |
|
"eval_steps_per_second": 4.11, |
|
"eval_wer": 1.2345415778251598, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 2.3096766471862793, |
|
"eval_runtime": 8.0346, |
|
"eval_samples_per_second": 32.858, |
|
"eval_steps_per_second": 4.107, |
|
"eval_wer": 1.1279317697228144, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 1.8073012828826904, |
|
"eval_runtime": 7.9049, |
|
"eval_samples_per_second": 33.397, |
|
"eval_steps_per_second": 4.175, |
|
"eval_wer": 1.251599147121535, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0002409, |
|
"loss": 3.5589, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 1.3744713068008423, |
|
"eval_runtime": 7.9139, |
|
"eval_samples_per_second": 33.359, |
|
"eval_steps_per_second": 4.17, |
|
"eval_wer": 1.0895522388059702, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 1.1971436738967896, |
|
"eval_runtime": 8.4443, |
|
"eval_samples_per_second": 31.264, |
|
"eval_steps_per_second": 3.908, |
|
"eval_wer": 1.2921108742004264, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_loss": 1.0361448526382446, |
|
"eval_runtime": 7.8907, |
|
"eval_samples_per_second": 33.457, |
|
"eval_steps_per_second": 4.182, |
|
"eval_wer": 0.9872068230277186, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_loss": 1.0113328695297241, |
|
"eval_runtime": 8.6808, |
|
"eval_samples_per_second": 30.412, |
|
"eval_steps_per_second": 3.802, |
|
"eval_wer": 1.1556503198294243, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.9761010408401489, |
|
"eval_runtime": 8.374, |
|
"eval_samples_per_second": 31.526, |
|
"eval_steps_per_second": 3.941, |
|
"eval_wer": 0.9509594882729211, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_loss": 0.8795022368431091, |
|
"eval_runtime": 8.0897, |
|
"eval_samples_per_second": 32.634, |
|
"eval_steps_per_second": 4.079, |
|
"eval_wer": 1.1279317697228144, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_loss": 0.8115519881248474, |
|
"eval_runtime": 7.9853, |
|
"eval_samples_per_second": 33.061, |
|
"eval_steps_per_second": 4.133, |
|
"eval_wer": 0.8869936034115139, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"eval_loss": 0.7683095932006836, |
|
"eval_runtime": 8.0654, |
|
"eval_samples_per_second": 32.732, |
|
"eval_steps_per_second": 4.092, |
|
"eval_wer": 0.9275053304904051, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 0.7249290943145752, |
|
"eval_runtime": 7.8449, |
|
"eval_samples_per_second": 33.652, |
|
"eval_steps_per_second": 4.207, |
|
"eval_wer": 1.0255863539445629, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_loss": 0.7122178077697754, |
|
"eval_runtime": 8.1435, |
|
"eval_samples_per_second": 32.419, |
|
"eval_steps_per_second": 4.052, |
|
"eval_wer": 0.9211087420042644, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00016634999999999998, |
|
"loss": 1.5095, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 0.7041318416595459, |
|
"eval_runtime": 9.0145, |
|
"eval_samples_per_second": 29.286, |
|
"eval_steps_per_second": 3.661, |
|
"eval_wer": 1.0319829424307037, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 0.678531289100647, |
|
"eval_runtime": 8.7862, |
|
"eval_samples_per_second": 30.047, |
|
"eval_steps_per_second": 3.756, |
|
"eval_wer": 0.8699360341151386, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_loss": 0.7056036591529846, |
|
"eval_runtime": 8.0033, |
|
"eval_samples_per_second": 32.986, |
|
"eval_steps_per_second": 4.123, |
|
"eval_wer": 0.9680170575692963, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"eval_loss": 0.6487303972244263, |
|
"eval_runtime": 8.7104, |
|
"eval_samples_per_second": 30.309, |
|
"eval_steps_per_second": 3.789, |
|
"eval_wer": 0.8550106609808102, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"eval_loss": 0.5972908139228821, |
|
"eval_runtime": 7.951, |
|
"eval_samples_per_second": 33.204, |
|
"eval_steps_per_second": 4.15, |
|
"eval_wer": 0.7889125799573561, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_loss": 0.5955255627632141, |
|
"eval_runtime": 7.9427, |
|
"eval_samples_per_second": 33.238, |
|
"eval_steps_per_second": 4.155, |
|
"eval_wer": 0.8443496801705757, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"eval_loss": 0.5822768211364746, |
|
"eval_runtime": 7.8596, |
|
"eval_samples_per_second": 33.59, |
|
"eval_steps_per_second": 4.199, |
|
"eval_wer": 0.8017057569296375, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"eval_loss": 0.5886873006820679, |
|
"eval_runtime": 7.8662, |
|
"eval_samples_per_second": 33.561, |
|
"eval_steps_per_second": 4.195, |
|
"eval_wer": 0.7569296375266524, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.5869713425636292, |
|
"eval_runtime": 7.8909, |
|
"eval_samples_per_second": 33.456, |
|
"eval_steps_per_second": 4.182, |
|
"eval_wer": 0.7569296375266524, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"eval_loss": 0.5845889449119568, |
|
"eval_runtime": 7.8445, |
|
"eval_samples_per_second": 33.654, |
|
"eval_steps_per_second": 4.207, |
|
"eval_wer": 0.7484008528784648, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 9.18e-05, |
|
"loss": 1.1157, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"eval_loss": 0.5864734053611755, |
|
"eval_runtime": 8.0229, |
|
"eval_samples_per_second": 32.906, |
|
"eval_steps_per_second": 4.113, |
|
"eval_wer": 0.7547974413646056, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"eval_loss": 0.5586370825767517, |
|
"eval_runtime": 7.8673, |
|
"eval_samples_per_second": 33.557, |
|
"eval_steps_per_second": 4.195, |
|
"eval_wer": 0.7334754797441365, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"eval_loss": 0.5573432445526123, |
|
"eval_runtime": 7.8679, |
|
"eval_samples_per_second": 33.554, |
|
"eval_steps_per_second": 4.194, |
|
"eval_wer": 0.744136460554371, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"eval_loss": 0.5594019889831543, |
|
"eval_runtime": 7.9618, |
|
"eval_samples_per_second": 33.158, |
|
"eval_steps_per_second": 4.145, |
|
"eval_wer": 0.7292110874200426, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"eval_loss": 0.5614868998527527, |
|
"eval_runtime": 7.8272, |
|
"eval_samples_per_second": 33.729, |
|
"eval_steps_per_second": 4.216, |
|
"eval_wer": 0.7569296375266524, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 0.5569693446159363, |
|
"eval_runtime": 8.0892, |
|
"eval_samples_per_second": 32.636, |
|
"eval_steps_per_second": 4.08, |
|
"eval_wer": 0.7654584221748401, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"eval_loss": 0.5408880710601807, |
|
"eval_runtime": 7.8701, |
|
"eval_samples_per_second": 33.545, |
|
"eval_steps_per_second": 4.193, |
|
"eval_wer": 0.7121535181236673, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 0.5358032584190369, |
|
"eval_runtime": 7.8977, |
|
"eval_samples_per_second": 33.427, |
|
"eval_steps_per_second": 4.178, |
|
"eval_wer": 0.6652452025586354, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"eval_loss": 0.5394359827041626, |
|
"eval_runtime": 7.8219, |
|
"eval_samples_per_second": 33.751, |
|
"eval_steps_per_second": 4.219, |
|
"eval_wer": 0.6823027718550106, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"eval_loss": 0.5434439778327942, |
|
"eval_runtime": 7.8516, |
|
"eval_samples_per_second": 33.624, |
|
"eval_steps_per_second": 4.203, |
|
"eval_wer": 0.6993603411513859, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 1.74e-05, |
|
"loss": 0.8658, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_loss": 0.5396074056625366, |
|
"eval_runtime": 7.9269, |
|
"eval_samples_per_second": 33.304, |
|
"eval_steps_per_second": 4.163, |
|
"eval_wer": 0.6823027718550106, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"eval_loss": 0.5431792736053467, |
|
"eval_runtime": 7.8451, |
|
"eval_samples_per_second": 33.651, |
|
"eval_steps_per_second": 4.206, |
|
"eval_wer": 0.6780383795309168, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.5424522757530212, |
|
"eval_runtime": 7.9063, |
|
"eval_samples_per_second": 33.391, |
|
"eval_steps_per_second": 4.174, |
|
"eval_wer": 0.6865671641791045, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 2100, |
|
"total_flos": 2.38289603930769e+18, |
|
"train_loss": 1.7177187274751209, |
|
"train_runtime": 3292.1945, |
|
"train_samples_per_second": 20.344, |
|
"train_steps_per_second": 0.638 |
|
} |
|
], |
|
"max_steps": 2100, |
|
"num_train_epochs": 15, |
|
"total_flos": 2.38289603930769e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|