{ "best_metric": 99.44547134935306, "best_model_checkpoint": "./output_dir/checkpoint-1000", "epoch": 5.0761421319796955, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10152284263959391, "grad_norm": 45.86384582519531, "learning_rate": 1.9000000000000002e-06, "loss": 2.9237, "step": 100 }, { "epoch": 0.20304568527918782, "grad_norm": 45.55335998535156, "learning_rate": 3.900000000000001e-06, "loss": 2.3206, "step": 200 }, { "epoch": 0.30456852791878175, "grad_norm": 48.86751174926758, "learning_rate": 5.9e-06, "loss": 2.3718, "step": 300 }, { "epoch": 0.40609137055837563, "grad_norm": 57.0309944152832, "learning_rate": 7.9e-06, "loss": 2.2004, "step": 400 }, { "epoch": 0.5076142131979695, "grad_norm": 50.58991241455078, "learning_rate": 9.9e-06, "loss": 2.1753, "step": 500 }, { "epoch": 0.6091370558375635, "grad_norm": 55.411651611328125, "learning_rate": 9.78888888888889e-06, "loss": 2.1806, "step": 600 }, { "epoch": 0.7106598984771574, "grad_norm": 52.69567108154297, "learning_rate": 9.566666666666668e-06, "loss": 2.1058, "step": 700 }, { "epoch": 0.8121827411167513, "grad_norm": 54.5142822265625, "learning_rate": 9.344444444444446e-06, "loss": 1.9591, "step": 800 }, { "epoch": 0.9137055837563451, "grad_norm": 34.65861511230469, "learning_rate": 9.124444444444444e-06, "loss": 2.0829, "step": 900 }, { "epoch": 1.015228426395939, "grad_norm": 47.46334457397461, "learning_rate": 8.902222222222224e-06, "loss": 1.9297, "step": 1000 }, { "epoch": 1.015228426395939, "eval_loss": 1.9972329139709473, "eval_runtime": 143.8629, "eval_samples_per_second": 3.476, "eval_steps_per_second": 1.738, "eval_wer": 99.44547134935306, "step": 1000 }, { "epoch": 1.116751269035533, "grad_norm": 35.19301986694336, "learning_rate": 8.68e-06, "loss": 1.3307, "step": 1100 }, { "epoch": 1.218274111675127, "grad_norm": 32.941097259521484, "learning_rate": 8.457777777777778e-06, "loss": 1.3622, "step": 1200 }, { "epoch": 1.3197969543147208, "grad_norm": 60.84815216064453, "learning_rate": 8.235555555555557e-06, "loss": 1.3763, "step": 1300 }, { "epoch": 1.4213197969543148, "grad_norm": 41.705345153808594, "learning_rate": 8.013333333333333e-06, "loss": 1.3411, "step": 1400 }, { "epoch": 1.5228426395939088, "grad_norm": 46.46513366699219, "learning_rate": 7.791111111111111e-06, "loss": 1.3745, "step": 1500 }, { "epoch": 1.6243654822335025, "grad_norm": 52.48996353149414, "learning_rate": 7.56888888888889e-06, "loss": 1.4233, "step": 1600 }, { "epoch": 1.7258883248730963, "grad_norm": 71.2730712890625, "learning_rate": 7.346666666666668e-06, "loss": 1.2765, "step": 1700 }, { "epoch": 1.8274111675126905, "grad_norm": 41.1866455078125, "learning_rate": 7.124444444444445e-06, "loss": 1.4133, "step": 1800 }, { "epoch": 1.9289340101522843, "grad_norm": 40.041481018066406, "learning_rate": 6.902222222222223e-06, "loss": 1.2927, "step": 1900 }, { "epoch": 2.030456852791878, "grad_norm": 29.876537322998047, "learning_rate": 6.680000000000001e-06, "loss": 1.1678, "step": 2000 }, { "epoch": 2.030456852791878, "eval_loss": 1.9988778829574585, "eval_runtime": 150.1664, "eval_samples_per_second": 3.33, "eval_steps_per_second": 1.665, "eval_wer": 99.44547134935306, "step": 2000 }, { "epoch": 2.1319796954314723, "grad_norm": 31.42411231994629, "learning_rate": 6.457777777777778e-06, "loss": 0.6302, "step": 2100 }, { "epoch": 2.233502538071066, "grad_norm": 29.325563430786133, "learning_rate": 6.235555555555556e-06, "loss": 0.7149, "step": 2200 }, { "epoch": 2.33502538071066, "grad_norm": 28.083486557006836, "learning_rate": 6.013333333333335e-06, "loss": 0.7032, "step": 2300 }, { "epoch": 2.436548223350254, "grad_norm": 59.06522750854492, "learning_rate": 5.791111111111112e-06, "loss": 0.7279, "step": 2400 }, { "epoch": 2.5380710659898478, "grad_norm": 25.313156127929688, "learning_rate": 5.56888888888889e-06, "loss": 0.7762, "step": 2500 }, { "epoch": 2.6395939086294415, "grad_norm": 42.13996887207031, "learning_rate": 5.346666666666667e-06, "loss": 0.6803, "step": 2600 }, { "epoch": 2.7411167512690353, "grad_norm": 40.32999038696289, "learning_rate": 5.124444444444445e-06, "loss": 0.7569, "step": 2700 }, { "epoch": 2.8426395939086295, "grad_norm": 26.812973022460938, "learning_rate": 4.902222222222222e-06, "loss": 0.7796, "step": 2800 }, { "epoch": 2.9441624365482233, "grad_norm": 35.564395904541016, "learning_rate": 4.680000000000001e-06, "loss": 0.7115, "step": 2900 }, { "epoch": 3.045685279187817, "grad_norm": 26.95085906982422, "learning_rate": 4.457777777777778e-06, "loss": 0.5123, "step": 3000 }, { "epoch": 3.045685279187817, "eval_loss": 2.0867390632629395, "eval_runtime": 147.2269, "eval_samples_per_second": 3.396, "eval_steps_per_second": 1.698, "eval_wer": 102.40295748613678, "step": 3000 }, { "epoch": 3.1472081218274113, "grad_norm": 10.427111625671387, "learning_rate": 4.235555555555556e-06, "loss": 0.3293, "step": 3100 }, { "epoch": 3.248730964467005, "grad_norm": 25.181922912597656, "learning_rate": 4.013333333333334e-06, "loss": 0.3875, "step": 3200 }, { "epoch": 3.350253807106599, "grad_norm": 25.681047439575195, "learning_rate": 3.7911111111111114e-06, "loss": 0.336, "step": 3300 }, { "epoch": 3.451776649746193, "grad_norm": 22.395158767700195, "learning_rate": 3.568888888888889e-06, "loss": 0.3491, "step": 3400 }, { "epoch": 3.553299492385787, "grad_norm": 24.35469627380371, "learning_rate": 3.346666666666667e-06, "loss": 0.3499, "step": 3500 }, { "epoch": 3.6548223350253806, "grad_norm": 43.59064865112305, "learning_rate": 3.124444444444445e-06, "loss": 0.3475, "step": 3600 }, { "epoch": 3.7563451776649748, "grad_norm": 3.5348989963531494, "learning_rate": 2.9022222222222223e-06, "loss": 0.3032, "step": 3700 }, { "epoch": 3.8578680203045685, "grad_norm": 41.54439926147461, "learning_rate": 2.68e-06, "loss": 0.3408, "step": 3800 }, { "epoch": 3.9593908629441623, "grad_norm": 20.793079376220703, "learning_rate": 2.457777777777778e-06, "loss": 0.2949, "step": 3900 }, { "epoch": 4.060913705583756, "grad_norm": 17.149005889892578, "learning_rate": 2.235555555555556e-06, "loss": 0.2025, "step": 4000 }, { "epoch": 4.060913705583756, "eval_loss": 2.169760227203369, "eval_runtime": 148.2971, "eval_samples_per_second": 3.372, "eval_steps_per_second": 1.686, "eval_wer": 103.32717190388169, "step": 4000 }, { "epoch": 4.16243654822335, "grad_norm": 1.9773640632629395, "learning_rate": 2.0133333333333337e-06, "loss": 0.1643, "step": 4100 }, { "epoch": 4.2639593908629445, "grad_norm": 10.312366485595703, "learning_rate": 1.7911111111111113e-06, "loss": 0.1461, "step": 4200 }, { "epoch": 4.365482233502538, "grad_norm": 1.339500069618225, "learning_rate": 1.568888888888889e-06, "loss": 0.1546, "step": 4300 }, { "epoch": 4.467005076142132, "grad_norm": 23.366968154907227, "learning_rate": 1.3466666666666668e-06, "loss": 0.1184, "step": 4400 }, { "epoch": 4.568527918781726, "grad_norm": 3.0241127014160156, "learning_rate": 1.1244444444444446e-06, "loss": 0.18, "step": 4500 }, { "epoch": 4.67005076142132, "grad_norm": 21.15237045288086, "learning_rate": 9.022222222222222e-07, "loss": 0.2008, "step": 4600 }, { "epoch": 4.771573604060913, "grad_norm": 4.779166221618652, "learning_rate": 6.800000000000001e-07, "loss": 0.1546, "step": 4700 }, { "epoch": 4.873096446700508, "grad_norm": 1.9334161281585693, "learning_rate": 4.5777777777777784e-07, "loss": 0.1429, "step": 4800 }, { "epoch": 4.974619289340102, "grad_norm": 18.270540237426758, "learning_rate": 2.3555555555555556e-07, "loss": 0.1373, "step": 4900 }, { "epoch": 5.0761421319796955, "grad_norm": 1.0400763750076294, "learning_rate": 1.3333333333333334e-08, "loss": 0.0843, "step": 5000 }, { "epoch": 5.0761421319796955, "eval_loss": 2.2213687896728516, "eval_runtime": 142.0959, "eval_samples_per_second": 3.519, "eval_steps_per_second": 1.759, "eval_wer": 104.25138632162663, "step": 5000 }, { "epoch": 5.0761421319796955, "step": 5000, "total_flos": 2.8844110761984e+18, "train_loss": 0.9465061870574951, "train_runtime": 2908.7621, "train_samples_per_second": 3.438, "train_steps_per_second": 1.719 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8844110761984e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }