|
{ |
|
"best_metric": 1.0742337703704834, |
|
"best_model_checkpoint": "./checkpoint-200", |
|
"epoch": 71.42857142857143, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4e-07, |
|
"loss": 3.2633, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.4000000000000003e-07, |
|
"loss": 3.235, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 5.4e-07, |
|
"loss": 3.164, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 7.4e-07, |
|
"loss": 2.9792, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 9.400000000000001e-07, |
|
"loss": 2.7533, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 1.14e-06, |
|
"loss": 2.5502, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.34e-06, |
|
"loss": 2.3845, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 1.54e-06, |
|
"loss": 2.161, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 1.74e-06, |
|
"loss": 1.9855, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 1.94e-06, |
|
"loss": 1.8225, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_loss": 1.760784387588501, |
|
"eval_runtime": 425.4331, |
|
"eval_samples_per_second": 1.203, |
|
"eval_steps_per_second": 0.075, |
|
"eval_wer": 105.31930992736078, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 2.1400000000000003e-06, |
|
"loss": 1.6373, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 2.3400000000000005e-06, |
|
"loss": 1.4756, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"learning_rate": 2.5400000000000002e-06, |
|
"loss": 1.3534, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2.7400000000000004e-06, |
|
"loss": 1.2259, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 2.9400000000000002e-06, |
|
"loss": 1.1304, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 3.1400000000000004e-06, |
|
"loss": 1.0195, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"learning_rate": 3.3400000000000006e-06, |
|
"loss": 0.9428, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"learning_rate": 3.54e-06, |
|
"loss": 0.8721, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 27.14, |
|
"learning_rate": 3.74e-06, |
|
"loss": 0.7904, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 3.94e-06, |
|
"loss": 0.7281, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"eval_loss": 1.0742337703704834, |
|
"eval_runtime": 174.027, |
|
"eval_samples_per_second": 2.942, |
|
"eval_steps_per_second": 0.184, |
|
"eval_wer": 69.61259079903148, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.14e-06, |
|
"loss": 0.6704, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 31.43, |
|
"learning_rate": 4.34e-06, |
|
"loss": 0.6118, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"learning_rate": 4.540000000000001e-06, |
|
"loss": 0.5494, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 34.29, |
|
"learning_rate": 4.74e-06, |
|
"loss": 0.5024, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 4.94e-06, |
|
"loss": 0.4511, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 37.14, |
|
"learning_rate": 5.140000000000001e-06, |
|
"loss": 0.4026, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"learning_rate": 5.3400000000000005e-06, |
|
"loss": 0.3507, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 5.540000000000001e-06, |
|
"loss": 0.3108, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 41.43, |
|
"learning_rate": 5.74e-06, |
|
"loss": 0.2687, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"learning_rate": 5.94e-06, |
|
"loss": 0.2329, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"eval_loss": 1.1192402839660645, |
|
"eval_runtime": 175.174, |
|
"eval_samples_per_second": 2.923, |
|
"eval_steps_per_second": 0.183, |
|
"eval_wer": 67.02481840193705, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 44.29, |
|
"learning_rate": 6.1400000000000005e-06, |
|
"loss": 0.1932, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 45.71, |
|
"learning_rate": 6.34e-06, |
|
"loss": 0.1642, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 47.14, |
|
"learning_rate": 6.540000000000001e-06, |
|
"loss": 0.134, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 48.57, |
|
"learning_rate": 6.740000000000001e-06, |
|
"loss": 0.1047, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 6.9400000000000005e-06, |
|
"loss": 0.0869, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 51.43, |
|
"learning_rate": 7.14e-06, |
|
"loss": 0.0648, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 52.86, |
|
"learning_rate": 7.340000000000001e-06, |
|
"loss": 0.0517, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 54.29, |
|
"learning_rate": 7.540000000000001e-06, |
|
"loss": 0.0391, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 55.71, |
|
"learning_rate": 7.74e-06, |
|
"loss": 0.0311, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"learning_rate": 7.94e-06, |
|
"loss": 0.0247, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"eval_loss": 1.3494515419006348, |
|
"eval_runtime": 166.7411, |
|
"eval_samples_per_second": 3.071, |
|
"eval_steps_per_second": 0.192, |
|
"eval_wer": 66.37409200968523, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 58.57, |
|
"learning_rate": 8.14e-06, |
|
"loss": 0.0195, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 8.34e-06, |
|
"loss": 0.0157, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 61.43, |
|
"learning_rate": 8.540000000000001e-06, |
|
"loss": 0.0129, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 62.86, |
|
"learning_rate": 8.740000000000001e-06, |
|
"loss": 0.0112, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 64.29, |
|
"learning_rate": 8.94e-06, |
|
"loss": 0.0097, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 65.71, |
|
"learning_rate": 9.14e-06, |
|
"loss": 0.0085, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 67.14, |
|
"learning_rate": 9.340000000000002e-06, |
|
"loss": 0.0075, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 68.57, |
|
"learning_rate": 9.54e-06, |
|
"loss": 0.0069, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 9.74e-06, |
|
"loss": 0.0062, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"learning_rate": 9.940000000000001e-06, |
|
"loss": 0.0057, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"eval_loss": 1.5055396556854248, |
|
"eval_runtime": 189.4887, |
|
"eval_samples_per_second": 2.702, |
|
"eval_steps_per_second": 0.169, |
|
"eval_wer": 67.28964891041163, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"step": 500, |
|
"total_flos": 1.99723386175488e+18, |
|
"train_loss": 0.8564610816463828, |
|
"train_runtime": 2065.7487, |
|
"train_samples_per_second": 15.491, |
|
"train_steps_per_second": 0.242 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 72, |
|
"total_flos": 1.99723386175488e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|