diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5605 @@ +{ + "best_metric": 21.3524811218986, + "best_model_checkpoint": "tamil_models/whisper-medium-ta_alldata_multigpu/checkpoint-8100", + "epoch": 2.9779411764705883, + "global_step": 8100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.8562520089993833e-06, + "loss": 1.5919, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.805947323355403e-06, + "loss": 0.7788, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5.712504017998767e-06, + "loss": 0.5415, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 6.309637647321797e-06, + "loss": 0.403, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 6.755642637711422e-06, + "loss": 0.3221, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 7.111770744693953e-06, + "loss": 0.2477, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 7.4082398170603955e-06, + "loss": 0.2032, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 7.662199332354785e-06, + "loss": 0.1913, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 7.884325004773166e-06, + "loss": 0.1803, + "step": 90 + }, + { + "epoch": 0.04, + "learning_rate": 8.0817159331721e-06, + "loss": 0.1755, + "step": 100 + }, + { + "epoch": 0.04, + "eval_loss": 0.20166015625, + "eval_runtime": 284.2904, + "eval_samples_per_second": 9.744, + "eval_steps_per_second": 0.077, + "eval_wer": 39.73840345199569, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 8.259332961677815e-06, + "loss": 0.1628, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 8.420779589879445e-06, + "loss": 0.1622, + "step": 120 + }, + { + "epoch": 0.05, + "learning_rate": 8.56875602699815e-06, + "loss": 0.1555, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 8.705337952067442e-06, + "loss": 0.1506, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 8.832155400481049e-06, + "loss": 0.1489, + "step": 150 + }, + { + "epoch": 0.06, + "learning_rate": 8.95051185954238e-06, + "loss": 0.1453, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 9.061466059049972e-06, + "loss": 0.143, + "step": 170 + }, + { + "epoch": 0.07, + "learning_rate": 9.16588965632118e-06, + "loss": 0.1402, + "step": 180 + }, + { + "epoch": 0.07, + "learning_rate": 9.264508868515998e-06, + "loss": 0.1386, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 9.357935131416414e-06, + "loss": 0.1374, + "step": 200 + }, + { + "epoch": 0.07, + "eval_loss": 0.1690673828125, + "eval_runtime": 294.2297, + "eval_samples_per_second": 9.414, + "eval_steps_per_second": 0.075, + "eval_wer": 36.370685005393746, + "step": 200 + }, + { + "epoch": 0.08, + "learning_rate": 9.446688082452126e-06, + "loss": 0.1323, + "step": 210 + }, + { + "epoch": 0.08, + "learning_rate": 9.531213064471803e-06, + "loss": 0.1343, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 9.611894646710806e-06, + "loss": 0.1317, + "step": 230 + }, + { + "epoch": 0.09, + "learning_rate": 9.689067203278456e-06, + "loss": 0.125, + "step": 240 + }, + { + "epoch": 0.09, + "learning_rate": 9.76302328564421e-06, + "loss": 0.1323, + "step": 250 + }, + { + "epoch": 0.1, + "learning_rate": 9.834020319129184e-06, + "loss": 0.1286, + "step": 260 + }, + { + "epoch": 0.1, + "learning_rate": 9.902286010551205e-06, + "loss": 0.1225, + "step": 270 + }, + { + "epoch": 0.1, + "learning_rate": 9.968022753693337e-06, + "loss": 0.1282, + "step": 280 + }, + { + "epoch": 0.11, + "learning_rate": 9.998556998557e-06, + "loss": 0.1244, + "step": 290 + }, + { + "epoch": 0.11, + "learning_rate": 9.994949494949497e-06, + "loss": 0.1231, + "step": 300 + }, + { + "epoch": 0.11, + "eval_loss": 0.1517333984375, + "eval_runtime": 240.2543, + "eval_samples_per_second": 11.529, + "eval_steps_per_second": 0.092, + "eval_wer": 33.087243797195256, + "step": 300 + }, + { + "epoch": 0.11, + "learning_rate": 9.991341991341992e-06, + "loss": 0.1244, + "step": 310 + }, + { + "epoch": 0.12, + "learning_rate": 9.987734487734489e-06, + "loss": 0.1163, + "step": 320 + }, + { + "epoch": 0.12, + "learning_rate": 9.984126984126986e-06, + "loss": 0.1194, + "step": 330 + }, + { + "epoch": 0.12, + "learning_rate": 9.980519480519481e-06, + "loss": 0.1191, + "step": 340 + }, + { + "epoch": 0.13, + "learning_rate": 9.976911976911978e-06, + "loss": 0.115, + "step": 350 + }, + { + "epoch": 0.13, + "learning_rate": 9.973304473304473e-06, + "loss": 0.1119, + "step": 360 + }, + { + "epoch": 0.14, + "learning_rate": 9.96969696969697e-06, + "loss": 0.1171, + "step": 370 + }, + { + "epoch": 0.14, + "learning_rate": 9.966089466089467e-06, + "loss": 0.1159, + "step": 380 + }, + { + "epoch": 0.14, + "learning_rate": 9.962481962481964e-06, + "loss": 0.1134, + "step": 390 + }, + { + "epoch": 0.15, + "learning_rate": 9.95887445887446e-06, + "loss": 0.116, + "step": 400 + }, + { + "epoch": 0.15, + "eval_loss": 0.1417236328125, + "eval_runtime": 254.6878, + "eval_samples_per_second": 10.876, + "eval_steps_per_second": 0.086, + "eval_wer": 31.47586299892125, + "step": 400 + }, + { + "epoch": 0.15, + "learning_rate": 9.955266955266956e-06, + "loss": 0.1126, + "step": 410 + }, + { + "epoch": 0.15, + "learning_rate": 9.951659451659453e-06, + "loss": 0.1114, + "step": 420 + }, + { + "epoch": 0.16, + "learning_rate": 9.94805194805195e-06, + "loss": 0.1116, + "step": 430 + }, + { + "epoch": 0.16, + "learning_rate": 9.944444444444445e-06, + "loss": 0.111, + "step": 440 + }, + { + "epoch": 0.17, + "learning_rate": 9.940836940836942e-06, + "loss": 0.1122, + "step": 450 + }, + { + "epoch": 0.17, + "learning_rate": 9.937229437229437e-06, + "loss": 0.1081, + "step": 460 + }, + { + "epoch": 0.17, + "learning_rate": 9.933621933621934e-06, + "loss": 0.1116, + "step": 470 + }, + { + "epoch": 0.18, + "learning_rate": 9.93001443001443e-06, + "loss": 0.1067, + "step": 480 + }, + { + "epoch": 0.18, + "learning_rate": 9.926406926406928e-06, + "loss": 0.1075, + "step": 490 + }, + { + "epoch": 0.18, + "learning_rate": 9.922799422799425e-06, + "loss": 0.1073, + "step": 500 + }, + { + "epoch": 0.18, + "eval_loss": 0.134765625, + "eval_runtime": 864.1715, + "eval_samples_per_second": 3.205, + "eval_steps_per_second": 0.025, + "eval_wer": 30.818500539374327, + "step": 500 + }, + { + "epoch": 0.19, + "learning_rate": 9.91919191919192e-06, + "loss": 0.1085, + "step": 510 + }, + { + "epoch": 0.19, + "learning_rate": 9.915584415584417e-06, + "loss": 0.1079, + "step": 520 + }, + { + "epoch": 0.19, + "learning_rate": 9.911976911976914e-06, + "loss": 0.1068, + "step": 530 + }, + { + "epoch": 0.2, + "learning_rate": 9.908369408369409e-06, + "loss": 0.1096, + "step": 540 + }, + { + "epoch": 0.2, + "learning_rate": 9.904761904761906e-06, + "loss": 0.104, + "step": 550 + }, + { + "epoch": 0.21, + "learning_rate": 9.901154401154402e-06, + "loss": 0.1055, + "step": 560 + }, + { + "epoch": 0.21, + "learning_rate": 9.897546897546898e-06, + "loss": 0.1014, + "step": 570 + }, + { + "epoch": 0.21, + "learning_rate": 9.893939393939395e-06, + "loss": 0.1086, + "step": 580 + }, + { + "epoch": 0.22, + "learning_rate": 9.890331890331891e-06, + "loss": 0.106, + "step": 590 + }, + { + "epoch": 0.22, + "learning_rate": 9.886724386724388e-06, + "loss": 0.1037, + "step": 600 + }, + { + "epoch": 0.22, + "eval_loss": 0.1295166015625, + "eval_runtime": 258.1898, + "eval_samples_per_second": 10.729, + "eval_steps_per_second": 0.085, + "eval_wer": 29.72289644012945, + "step": 600 + }, + { + "epoch": 0.22, + "learning_rate": 9.883116883116885e-06, + "loss": 0.1027, + "step": 610 + }, + { + "epoch": 0.23, + "learning_rate": 9.87950937950938e-06, + "loss": 0.1015, + "step": 620 + }, + { + "epoch": 0.23, + "learning_rate": 9.875901875901877e-06, + "loss": 0.1002, + "step": 630 + }, + { + "epoch": 0.24, + "learning_rate": 9.872294372294373e-06, + "loss": 0.1027, + "step": 640 + }, + { + "epoch": 0.24, + "learning_rate": 9.86868686868687e-06, + "loss": 0.102, + "step": 650 + }, + { + "epoch": 0.24, + "learning_rate": 9.865079365079366e-06, + "loss": 0.0981, + "step": 660 + }, + { + "epoch": 0.25, + "learning_rate": 9.861471861471862e-06, + "loss": 0.0992, + "step": 670 + }, + { + "epoch": 0.25, + "learning_rate": 9.857864357864358e-06, + "loss": 0.1041, + "step": 680 + }, + { + "epoch": 0.25, + "learning_rate": 9.854256854256855e-06, + "loss": 0.0979, + "step": 690 + }, + { + "epoch": 0.26, + "learning_rate": 9.850649350649352e-06, + "loss": 0.0997, + "step": 700 + }, + { + "epoch": 0.26, + "eval_loss": 0.1251220703125, + "eval_runtime": 253.4804, + "eval_samples_per_second": 10.928, + "eval_steps_per_second": 0.087, + "eval_wer": 29.26442826321467, + "step": 700 + }, + { + "epoch": 0.26, + "learning_rate": 9.847041847041849e-06, + "loss": 0.0963, + "step": 710 + }, + { + "epoch": 0.26, + "learning_rate": 9.843434343434344e-06, + "loss": 0.0971, + "step": 720 + }, + { + "epoch": 0.27, + "learning_rate": 9.839826839826841e-06, + "loss": 0.0997, + "step": 730 + }, + { + "epoch": 0.27, + "learning_rate": 9.836219336219336e-06, + "loss": 0.0979, + "step": 740 + }, + { + "epoch": 0.28, + "learning_rate": 9.832611832611833e-06, + "loss": 0.099, + "step": 750 + }, + { + "epoch": 0.28, + "learning_rate": 9.82900432900433e-06, + "loss": 0.0974, + "step": 760 + }, + { + "epoch": 0.28, + "learning_rate": 9.825396825396825e-06, + "loss": 0.0955, + "step": 770 + }, + { + "epoch": 0.29, + "learning_rate": 9.821789321789322e-06, + "loss": 0.0953, + "step": 780 + }, + { + "epoch": 0.29, + "learning_rate": 9.81818181818182e-06, + "loss": 0.0965, + "step": 790 + }, + { + "epoch": 0.29, + "learning_rate": 9.814574314574316e-06, + "loss": 0.0982, + "step": 800 + }, + { + "epoch": 0.29, + "eval_loss": 0.12176513671875, + "eval_runtime": 245.3351, + "eval_samples_per_second": 11.291, + "eval_steps_per_second": 0.09, + "eval_wer": 28.56324163969795, + "step": 800 + }, + { + "epoch": 0.3, + "learning_rate": 9.810966810966811e-06, + "loss": 0.0949, + "step": 810 + }, + { + "epoch": 0.3, + "learning_rate": 9.807359307359308e-06, + "loss": 0.0963, + "step": 820 + }, + { + "epoch": 0.31, + "learning_rate": 9.803751803751805e-06, + "loss": 0.0973, + "step": 830 + }, + { + "epoch": 0.31, + "learning_rate": 9.8001443001443e-06, + "loss": 0.0965, + "step": 840 + }, + { + "epoch": 0.31, + "learning_rate": 9.796536796536797e-06, + "loss": 0.0938, + "step": 850 + }, + { + "epoch": 0.32, + "learning_rate": 9.792929292929294e-06, + "loss": 0.0939, + "step": 860 + }, + { + "epoch": 0.32, + "learning_rate": 9.789321789321791e-06, + "loss": 0.0963, + "step": 870 + }, + { + "epoch": 0.32, + "learning_rate": 9.785714285714286e-06, + "loss": 0.0978, + "step": 880 + }, + { + "epoch": 0.33, + "learning_rate": 9.782106782106783e-06, + "loss": 0.0923, + "step": 890 + }, + { + "epoch": 0.33, + "learning_rate": 9.77849927849928e-06, + "loss": 0.098, + "step": 900 + }, + { + "epoch": 0.33, + "eval_loss": 0.1192626953125, + "eval_runtime": 520.2114, + "eval_samples_per_second": 5.325, + "eval_steps_per_second": 0.042, + "eval_wer": 28.030609492988134, + "step": 900 + }, + { + "epoch": 0.33, + "learning_rate": 9.774891774891775e-06, + "loss": 0.0914, + "step": 910 + }, + { + "epoch": 0.34, + "learning_rate": 9.771284271284272e-06, + "loss": 0.0944, + "step": 920 + }, + { + "epoch": 0.34, + "learning_rate": 9.767676767676767e-06, + "loss": 0.0966, + "step": 930 + }, + { + "epoch": 0.35, + "learning_rate": 9.764069264069264e-06, + "loss": 0.0903, + "step": 940 + }, + { + "epoch": 0.35, + "learning_rate": 9.760461760461761e-06, + "loss": 0.0952, + "step": 950 + }, + { + "epoch": 0.35, + "learning_rate": 9.756854256854258e-06, + "loss": 0.0913, + "step": 960 + }, + { + "epoch": 0.36, + "learning_rate": 9.753246753246755e-06, + "loss": 0.0943, + "step": 970 + }, + { + "epoch": 0.36, + "learning_rate": 9.74963924963925e-06, + "loss": 0.0939, + "step": 980 + }, + { + "epoch": 0.36, + "learning_rate": 9.746031746031747e-06, + "loss": 0.0946, + "step": 990 + }, + { + "epoch": 0.37, + "learning_rate": 9.742424242424244e-06, + "loss": 0.0941, + "step": 1000 + }, + { + "epoch": 0.37, + "eval_loss": 0.11590576171875, + "eval_runtime": 241.569, + "eval_samples_per_second": 11.467, + "eval_steps_per_second": 0.091, + "eval_wer": 27.4608953613808, + "step": 1000 + }, + { + "epoch": 0.37, + "learning_rate": 9.738816738816739e-06, + "loss": 0.0898, + "step": 1010 + }, + { + "epoch": 0.38, + "learning_rate": 9.735209235209236e-06, + "loss": 0.0899, + "step": 1020 + }, + { + "epoch": 0.38, + "learning_rate": 9.731601731601731e-06, + "loss": 0.093, + "step": 1030 + }, + { + "epoch": 0.38, + "learning_rate": 9.727994227994228e-06, + "loss": 0.0897, + "step": 1040 + }, + { + "epoch": 0.39, + "learning_rate": 9.724386724386725e-06, + "loss": 0.095, + "step": 1050 + }, + { + "epoch": 0.39, + "learning_rate": 9.720779220779222e-06, + "loss": 0.0965, + "step": 1060 + }, + { + "epoch": 0.39, + "learning_rate": 9.717171717171719e-06, + "loss": 0.0911, + "step": 1070 + }, + { + "epoch": 0.4, + "learning_rate": 9.713564213564214e-06, + "loss": 0.0903, + "step": 1080 + }, + { + "epoch": 0.4, + "learning_rate": 9.70995670995671e-06, + "loss": 0.0952, + "step": 1090 + }, + { + "epoch": 0.4, + "learning_rate": 9.706349206349208e-06, + "loss": 0.0941, + "step": 1100 + }, + { + "epoch": 0.4, + "eval_loss": 0.11358642578125, + "eval_runtime": 255.0901, + "eval_samples_per_second": 10.859, + "eval_steps_per_second": 0.086, + "eval_wer": 26.965345199568503, + "step": 1100 + }, + { + "epoch": 0.41, + "learning_rate": 9.702741702741703e-06, + "loss": 0.0898, + "step": 1110 + }, + { + "epoch": 0.41, + "learning_rate": 9.6991341991342e-06, + "loss": 0.0917, + "step": 1120 + }, + { + "epoch": 0.42, + "learning_rate": 9.695526695526695e-06, + "loss": 0.0895, + "step": 1130 + }, + { + "epoch": 0.42, + "learning_rate": 9.691919191919192e-06, + "loss": 0.0915, + "step": 1140 + }, + { + "epoch": 0.42, + "learning_rate": 9.688311688311689e-06, + "loss": 0.089, + "step": 1150 + }, + { + "epoch": 0.43, + "learning_rate": 9.684704184704186e-06, + "loss": 0.0887, + "step": 1160 + }, + { + "epoch": 0.43, + "learning_rate": 9.681096681096683e-06, + "loss": 0.0909, + "step": 1170 + }, + { + "epoch": 0.43, + "learning_rate": 9.67748917748918e-06, + "loss": 0.0897, + "step": 1180 + }, + { + "epoch": 0.44, + "learning_rate": 9.673881673881675e-06, + "loss": 0.0917, + "step": 1190 + }, + { + "epoch": 0.44, + "learning_rate": 9.670274170274172e-06, + "loss": 0.0868, + "step": 1200 + }, + { + "epoch": 0.44, + "eval_loss": 0.11199951171875, + "eval_runtime": 247.8554, + "eval_samples_per_second": 11.176, + "eval_steps_per_second": 0.089, + "eval_wer": 27.164239482200646, + "step": 1200 + }, + { + "epoch": 0.44, + "learning_rate": 9.666666666666667e-06, + "loss": 0.091, + "step": 1210 + }, + { + "epoch": 0.45, + "learning_rate": 9.663059163059164e-06, + "loss": 0.0903, + "step": 1220 + }, + { + "epoch": 0.45, + "learning_rate": 9.65945165945166e-06, + "loss": 0.0856, + "step": 1230 + }, + { + "epoch": 0.46, + "learning_rate": 9.655844155844156e-06, + "loss": 0.0907, + "step": 1240 + }, + { + "epoch": 0.46, + "learning_rate": 9.652236652236653e-06, + "loss": 0.0886, + "step": 1250 + }, + { + "epoch": 0.46, + "learning_rate": 9.64862914862915e-06, + "loss": 0.0841, + "step": 1260 + }, + { + "epoch": 0.47, + "learning_rate": 9.645021645021646e-06, + "loss": 0.0855, + "step": 1270 + }, + { + "epoch": 0.47, + "learning_rate": 9.641414141414143e-06, + "loss": 0.0843, + "step": 1280 + }, + { + "epoch": 0.47, + "learning_rate": 9.637806637806638e-06, + "loss": 0.0901, + "step": 1290 + }, + { + "epoch": 0.48, + "learning_rate": 9.634199134199135e-06, + "loss": 0.0899, + "step": 1300 + }, + { + "epoch": 0.48, + "eval_loss": 0.10968017578125, + "eval_runtime": 245.0559, + "eval_samples_per_second": 11.304, + "eval_steps_per_second": 0.09, + "eval_wer": 26.581040992448756, + "step": 1300 + }, + { + "epoch": 0.48, + "learning_rate": 9.63059163059163e-06, + "loss": 0.0857, + "step": 1310 + }, + { + "epoch": 0.49, + "learning_rate": 9.626984126984127e-06, + "loss": 0.0879, + "step": 1320 + }, + { + "epoch": 0.49, + "learning_rate": 9.623376623376624e-06, + "loss": 0.0909, + "step": 1330 + }, + { + "epoch": 0.49, + "learning_rate": 9.61976911976912e-06, + "loss": 0.0859, + "step": 1340 + }, + { + "epoch": 0.5, + "learning_rate": 9.616161616161616e-06, + "loss": 0.0844, + "step": 1350 + }, + { + "epoch": 0.5, + "learning_rate": 9.612554112554113e-06, + "loss": 0.0878, + "step": 1360 + }, + { + "epoch": 0.5, + "learning_rate": 9.60894660894661e-06, + "loss": 0.0867, + "step": 1370 + }, + { + "epoch": 0.51, + "learning_rate": 9.605339105339107e-06, + "loss": 0.0885, + "step": 1380 + }, + { + "epoch": 0.51, + "learning_rate": 9.601731601731602e-06, + "loss": 0.0856, + "step": 1390 + }, + { + "epoch": 0.51, + "learning_rate": 9.5981240981241e-06, + "loss": 0.0888, + "step": 1400 + }, + { + "epoch": 0.51, + "eval_loss": 0.1080322265625, + "eval_runtime": 243.3367, + "eval_samples_per_second": 11.383, + "eval_steps_per_second": 0.09, + "eval_wer": 26.007955771305287, + "step": 1400 + }, + { + "epoch": 0.52, + "learning_rate": 9.594516594516594e-06, + "loss": 0.0872, + "step": 1410 + }, + { + "epoch": 0.52, + "learning_rate": 9.590909090909091e-06, + "loss": 0.0861, + "step": 1420 + }, + { + "epoch": 0.53, + "learning_rate": 9.587301587301588e-06, + "loss": 0.0852, + "step": 1430 + }, + { + "epoch": 0.53, + "learning_rate": 9.583694083694083e-06, + "loss": 0.0866, + "step": 1440 + }, + { + "epoch": 0.53, + "learning_rate": 9.58008658008658e-06, + "loss": 0.0842, + "step": 1450 + }, + { + "epoch": 0.54, + "learning_rate": 9.576479076479077e-06, + "loss": 0.0861, + "step": 1460 + }, + { + "epoch": 0.54, + "learning_rate": 9.572871572871574e-06, + "loss": 0.086, + "step": 1470 + }, + { + "epoch": 0.54, + "learning_rate": 9.569264069264071e-06, + "loss": 0.0831, + "step": 1480 + }, + { + "epoch": 0.55, + "learning_rate": 9.565656565656566e-06, + "loss": 0.0858, + "step": 1490 + }, + { + "epoch": 0.55, + "learning_rate": 9.562049062049063e-06, + "loss": 0.0871, + "step": 1500 + }, + { + "epoch": 0.55, + "eval_loss": 0.10699462890625, + "eval_runtime": 701.1447, + "eval_samples_per_second": 3.951, + "eval_steps_per_second": 0.031, + "eval_wer": 25.974244875943903, + "step": 1500 + }, + { + "epoch": 0.56, + "learning_rate": 9.558441558441558e-06, + "loss": 0.0805, + "step": 1510 + }, + { + "epoch": 0.56, + "learning_rate": 9.554834054834055e-06, + "loss": 0.0857, + "step": 1520 + }, + { + "epoch": 0.56, + "learning_rate": 9.551226551226552e-06, + "loss": 0.0855, + "step": 1530 + }, + { + "epoch": 0.57, + "learning_rate": 9.547619047619049e-06, + "loss": 0.0855, + "step": 1540 + }, + { + "epoch": 0.57, + "learning_rate": 9.544011544011544e-06, + "loss": 0.0872, + "step": 1550 + }, + { + "epoch": 0.57, + "learning_rate": 9.540404040404041e-06, + "loss": 0.0866, + "step": 1560 + }, + { + "epoch": 0.58, + "learning_rate": 9.536796536796538e-06, + "loss": 0.0852, + "step": 1570 + }, + { + "epoch": 0.58, + "learning_rate": 9.533189033189035e-06, + "loss": 0.0878, + "step": 1580 + }, + { + "epoch": 0.58, + "learning_rate": 9.52958152958153e-06, + "loss": 0.0855, + "step": 1590 + }, + { + "epoch": 0.59, + "learning_rate": 9.525974025974027e-06, + "loss": 0.0848, + "step": 1600 + }, + { + "epoch": 0.59, + "eval_loss": 0.10595703125, + "eval_runtime": 254.3389, + "eval_samples_per_second": 10.891, + "eval_steps_per_second": 0.086, + "eval_wer": 25.589940668824163, + "step": 1600 + }, + { + "epoch": 0.59, + "learning_rate": 9.522366522366522e-06, + "loss": 0.0893, + "step": 1610 + }, + { + "epoch": 0.6, + "learning_rate": 9.518759018759019e-06, + "loss": 0.0822, + "step": 1620 + }, + { + "epoch": 0.6, + "learning_rate": 9.515151515151516e-06, + "loss": 0.0813, + "step": 1630 + }, + { + "epoch": 0.6, + "learning_rate": 9.511544011544013e-06, + "loss": 0.0844, + "step": 1640 + }, + { + "epoch": 0.61, + "learning_rate": 9.507936507936508e-06, + "loss": 0.0823, + "step": 1650 + }, + { + "epoch": 0.61, + "learning_rate": 9.504329004329005e-06, + "loss": 0.0868, + "step": 1660 + }, + { + "epoch": 0.61, + "learning_rate": 9.500721500721502e-06, + "loss": 0.0851, + "step": 1670 + }, + { + "epoch": 0.62, + "learning_rate": 9.497113997113999e-06, + "loss": 0.0818, + "step": 1680 + }, + { + "epoch": 0.62, + "learning_rate": 9.493506493506494e-06, + "loss": 0.0835, + "step": 1690 + }, + { + "epoch": 0.62, + "learning_rate": 9.48989898989899e-06, + "loss": 0.0835, + "step": 1700 + }, + { + "epoch": 0.62, + "eval_loss": 0.1044921875, + "eval_runtime": 1330.4645, + "eval_samples_per_second": 2.082, + "eval_steps_per_second": 0.017, + "eval_wer": 25.573085221143472, + "step": 1700 + }, + { + "epoch": 0.63, + "learning_rate": 9.486291486291486e-06, + "loss": 0.0841, + "step": 1710 + }, + { + "epoch": 0.63, + "learning_rate": 9.482683982683983e-06, + "loss": 0.0858, + "step": 1720 + }, + { + "epoch": 0.64, + "learning_rate": 9.47907647907648e-06, + "loss": 0.0845, + "step": 1730 + }, + { + "epoch": 0.64, + "learning_rate": 9.475468975468977e-06, + "loss": 0.0854, + "step": 1740 + }, + { + "epoch": 0.64, + "learning_rate": 9.471861471861472e-06, + "loss": 0.083, + "step": 1750 + }, + { + "epoch": 0.65, + "learning_rate": 9.468253968253969e-06, + "loss": 0.0834, + "step": 1760 + }, + { + "epoch": 0.65, + "learning_rate": 9.464646464646466e-06, + "loss": 0.0811, + "step": 1770 + }, + { + "epoch": 0.65, + "learning_rate": 9.461038961038963e-06, + "loss": 0.0882, + "step": 1780 + }, + { + "epoch": 0.66, + "learning_rate": 9.457431457431458e-06, + "loss": 0.0829, + "step": 1790 + }, + { + "epoch": 0.66, + "learning_rate": 9.453823953823955e-06, + "loss": 0.08, + "step": 1800 + }, + { + "epoch": 0.66, + "eval_loss": 0.10369873046875, + "eval_runtime": 244.3089, + "eval_samples_per_second": 11.338, + "eval_steps_per_second": 0.09, + "eval_wer": 25.13484358144552, + "step": 1800 + }, + { + "epoch": 0.67, + "learning_rate": 9.45021645021645e-06, + "loss": 0.0853, + "step": 1810 + }, + { + "epoch": 0.67, + "learning_rate": 9.446608946608947e-06, + "loss": 0.0826, + "step": 1820 + }, + { + "epoch": 0.67, + "learning_rate": 9.443001443001444e-06, + "loss": 0.0828, + "step": 1830 + }, + { + "epoch": 0.68, + "learning_rate": 9.43939393939394e-06, + "loss": 0.0797, + "step": 1840 + }, + { + "epoch": 0.68, + "learning_rate": 9.435786435786437e-06, + "loss": 0.0814, + "step": 1850 + }, + { + "epoch": 0.68, + "learning_rate": 9.432178932178933e-06, + "loss": 0.0827, + "step": 1860 + }, + { + "epoch": 0.69, + "learning_rate": 9.42857142857143e-06, + "loss": 0.0789, + "step": 1870 + }, + { + "epoch": 0.69, + "learning_rate": 9.424963924963926e-06, + "loss": 0.0838, + "step": 1880 + }, + { + "epoch": 0.69, + "learning_rate": 9.421356421356422e-06, + "loss": 0.0853, + "step": 1890 + }, + { + "epoch": 0.7, + "learning_rate": 9.417748917748919e-06, + "loss": 0.0819, + "step": 1900 + }, + { + "epoch": 0.7, + "eval_loss": 0.10162353515625, + "eval_runtime": 705.4564, + "eval_samples_per_second": 3.927, + "eval_steps_per_second": 0.031, + "eval_wer": 24.976402373247033, + "step": 1900 + }, + { + "epoch": 0.7, + "learning_rate": 9.414141414141414e-06, + "loss": 0.0821, + "step": 1910 + }, + { + "epoch": 0.71, + "learning_rate": 9.41053391053391e-06, + "loss": 0.0813, + "step": 1920 + }, + { + "epoch": 0.71, + "learning_rate": 9.406926406926408e-06, + "loss": 0.0814, + "step": 1930 + }, + { + "epoch": 0.71, + "learning_rate": 9.403318903318904e-06, + "loss": 0.0856, + "step": 1940 + }, + { + "epoch": 0.72, + "learning_rate": 9.399711399711401e-06, + "loss": 0.0842, + "step": 1950 + }, + { + "epoch": 0.72, + "learning_rate": 9.396103896103896e-06, + "loss": 0.0806, + "step": 1960 + }, + { + "epoch": 0.72, + "learning_rate": 9.392496392496393e-06, + "loss": 0.0794, + "step": 1970 + }, + { + "epoch": 0.73, + "learning_rate": 9.38888888888889e-06, + "loss": 0.084, + "step": 1980 + }, + { + "epoch": 0.73, + "learning_rate": 9.385281385281385e-06, + "loss": 0.0853, + "step": 1990 + }, + { + "epoch": 0.74, + "learning_rate": 9.381673881673882e-06, + "loss": 0.0801, + "step": 2000 + }, + { + "epoch": 0.74, + "eval_loss": 0.100830078125, + "eval_runtime": 693.4811, + "eval_samples_per_second": 3.994, + "eval_steps_per_second": 0.032, + "eval_wer": 24.96628910463862, + "step": 2000 + }, + { + "epoch": 0.74, + "learning_rate": 9.378066378066378e-06, + "loss": 0.0807, + "step": 2010 + }, + { + "epoch": 0.74, + "learning_rate": 9.374458874458874e-06, + "loss": 0.0807, + "step": 2020 + }, + { + "epoch": 0.75, + "learning_rate": 9.370851370851371e-06, + "loss": 0.08, + "step": 2030 + }, + { + "epoch": 0.75, + "learning_rate": 9.367243867243868e-06, + "loss": 0.0829, + "step": 2040 + }, + { + "epoch": 0.75, + "learning_rate": 9.363636363636365e-06, + "loss": 0.0805, + "step": 2050 + }, + { + "epoch": 0.76, + "learning_rate": 9.36002886002886e-06, + "loss": 0.0807, + "step": 2060 + }, + { + "epoch": 0.76, + "learning_rate": 9.356421356421357e-06, + "loss": 0.0826, + "step": 2070 + }, + { + "epoch": 0.76, + "learning_rate": 9.352813852813854e-06, + "loss": 0.0796, + "step": 2080 + }, + { + "epoch": 0.77, + "learning_rate": 9.34920634920635e-06, + "loss": 0.0786, + "step": 2090 + }, + { + "epoch": 0.77, + "learning_rate": 9.345598845598846e-06, + "loss": 0.0793, + "step": 2100 + }, + { + "epoch": 0.77, + "eval_loss": 0.09967041015625, + "eval_runtime": 817.1155, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.027, + "eval_wer": 24.504449838187703, + "step": 2100 + }, + { + "epoch": 0.78, + "learning_rate": 9.341991341991343e-06, + "loss": 0.0808, + "step": 2110 + }, + { + "epoch": 0.78, + "learning_rate": 9.338383838383838e-06, + "loss": 0.0814, + "step": 2120 + }, + { + "epoch": 0.78, + "learning_rate": 9.334776334776335e-06, + "loss": 0.0832, + "step": 2130 + }, + { + "epoch": 0.79, + "learning_rate": 9.331168831168832e-06, + "loss": 0.0812, + "step": 2140 + }, + { + "epoch": 0.79, + "learning_rate": 9.327561327561329e-06, + "loss": 0.0776, + "step": 2150 + }, + { + "epoch": 0.79, + "learning_rate": 9.323953823953826e-06, + "loss": 0.0807, + "step": 2160 + }, + { + "epoch": 0.8, + "learning_rate": 9.320346320346321e-06, + "loss": 0.0831, + "step": 2170 + }, + { + "epoch": 0.8, + "learning_rate": 9.316738816738818e-06, + "loss": 0.0795, + "step": 2180 + }, + { + "epoch": 0.81, + "learning_rate": 9.313131313131313e-06, + "loss": 0.079, + "step": 2190 + }, + { + "epoch": 0.81, + "learning_rate": 9.30952380952381e-06, + "loss": 0.081, + "step": 2200 + }, + { + "epoch": 0.81, + "eval_loss": 0.09979248046875, + "eval_runtime": 515.1218, + "eval_samples_per_second": 5.377, + "eval_steps_per_second": 0.043, + "eval_wer": 24.514563106796118, + "step": 2200 + }, + { + "epoch": 0.81, + "learning_rate": 9.305916305916307e-06, + "loss": 0.083, + "step": 2210 + }, + { + "epoch": 0.82, + "learning_rate": 9.302308802308802e-06, + "loss": 0.0823, + "step": 2220 + }, + { + "epoch": 0.82, + "learning_rate": 9.298701298701299e-06, + "loss": 0.0809, + "step": 2230 + }, + { + "epoch": 0.82, + "learning_rate": 9.295093795093796e-06, + "loss": 0.0795, + "step": 2240 + }, + { + "epoch": 0.83, + "learning_rate": 9.291486291486293e-06, + "loss": 0.0786, + "step": 2250 + }, + { + "epoch": 0.83, + "learning_rate": 9.28787878787879e-06, + "loss": 0.0838, + "step": 2260 + }, + { + "epoch": 0.83, + "learning_rate": 9.284271284271285e-06, + "loss": 0.0788, + "step": 2270 + }, + { + "epoch": 0.84, + "learning_rate": 9.280663780663782e-06, + "loss": 0.0788, + "step": 2280 + }, + { + "epoch": 0.84, + "learning_rate": 9.277056277056277e-06, + "loss": 0.0765, + "step": 2290 + }, + { + "epoch": 0.85, + "learning_rate": 9.273448773448774e-06, + "loss": 0.079, + "step": 2300 + }, + { + "epoch": 0.85, + "eval_loss": 0.0982666015625, + "eval_runtime": 568.3193, + "eval_samples_per_second": 4.874, + "eval_steps_per_second": 0.039, + "eval_wer": 24.733683926645092, + "step": 2300 + }, + { + "epoch": 0.85, + "learning_rate": 9.26984126984127e-06, + "loss": 0.0772, + "step": 2310 + }, + { + "epoch": 0.85, + "learning_rate": 9.266233766233766e-06, + "loss": 0.0808, + "step": 2320 + }, + { + "epoch": 0.86, + "learning_rate": 9.262626262626263e-06, + "loss": 0.0786, + "step": 2330 + }, + { + "epoch": 0.86, + "learning_rate": 9.25901875901876e-06, + "loss": 0.0787, + "step": 2340 + }, + { + "epoch": 0.86, + "learning_rate": 9.255411255411257e-06, + "loss": 0.0798, + "step": 2350 + }, + { + "epoch": 0.87, + "learning_rate": 9.251803751803754e-06, + "loss": 0.0808, + "step": 2360 + }, + { + "epoch": 0.87, + "learning_rate": 9.248196248196249e-06, + "loss": 0.0753, + "step": 2370 + }, + { + "epoch": 0.88, + "learning_rate": 9.244588744588746e-06, + "loss": 0.0801, + "step": 2380 + }, + { + "epoch": 0.88, + "learning_rate": 9.240981240981241e-06, + "loss": 0.0794, + "step": 2390 + }, + { + "epoch": 0.88, + "learning_rate": 9.237373737373738e-06, + "loss": 0.0758, + "step": 2400 + }, + { + "epoch": 0.88, + "eval_loss": 0.09796142578125, + "eval_runtime": 249.0453, + "eval_samples_per_second": 11.122, + "eval_steps_per_second": 0.088, + "eval_wer": 24.747168284789645, + "step": 2400 + }, + { + "epoch": 0.89, + "learning_rate": 9.233766233766235e-06, + "loss": 0.0798, + "step": 2410 + }, + { + "epoch": 0.89, + "learning_rate": 9.230158730158732e-06, + "loss": 0.082, + "step": 2420 + }, + { + "epoch": 0.89, + "learning_rate": 9.226551226551227e-06, + "loss": 0.0796, + "step": 2430 + }, + { + "epoch": 0.9, + "learning_rate": 9.222943722943724e-06, + "loss": 0.0787, + "step": 2440 + }, + { + "epoch": 0.9, + "learning_rate": 9.21933621933622e-06, + "loss": 0.075, + "step": 2450 + }, + { + "epoch": 0.9, + "learning_rate": 9.215728715728717e-06, + "loss": 0.0778, + "step": 2460 + }, + { + "epoch": 0.91, + "learning_rate": 9.212121212121213e-06, + "loss": 0.077, + "step": 2470 + }, + { + "epoch": 0.91, + "learning_rate": 9.20851370851371e-06, + "loss": 0.0747, + "step": 2480 + }, + { + "epoch": 0.92, + "learning_rate": 9.204906204906205e-06, + "loss": 0.075, + "step": 2490 + }, + { + "epoch": 0.92, + "learning_rate": 9.201298701298702e-06, + "loss": 0.0806, + "step": 2500 + }, + { + "epoch": 0.92, + "eval_loss": 0.0966796875, + "eval_runtime": 487.9447, + "eval_samples_per_second": 5.677, + "eval_steps_per_second": 0.045, + "eval_wer": 23.77629449838188, + "step": 2500 + }, + { + "epoch": 0.92, + "learning_rate": 9.197691197691199e-06, + "loss": 0.0786, + "step": 2510 + }, + { + "epoch": 0.93, + "learning_rate": 9.194083694083695e-06, + "loss": 0.0789, + "step": 2520 + }, + { + "epoch": 0.93, + "learning_rate": 9.19047619047619e-06, + "loss": 0.0765, + "step": 2530 + }, + { + "epoch": 0.93, + "learning_rate": 9.186868686868688e-06, + "loss": 0.0769, + "step": 2540 + }, + { + "epoch": 0.94, + "learning_rate": 9.183261183261184e-06, + "loss": 0.0773, + "step": 2550 + }, + { + "epoch": 0.94, + "learning_rate": 9.179653679653681e-06, + "loss": 0.076, + "step": 2560 + }, + { + "epoch": 0.94, + "learning_rate": 9.176046176046177e-06, + "loss": 0.0751, + "step": 2570 + }, + { + "epoch": 0.95, + "learning_rate": 9.172438672438673e-06, + "loss": 0.0764, + "step": 2580 + }, + { + "epoch": 0.95, + "learning_rate": 9.168831168831169e-06, + "loss": 0.0767, + "step": 2590 + }, + { + "epoch": 0.96, + "learning_rate": 9.165223665223666e-06, + "loss": 0.077, + "step": 2600 + }, + { + "epoch": 0.96, + "eval_loss": 0.095703125, + "eval_runtime": 770.8972, + "eval_samples_per_second": 3.593, + "eval_steps_per_second": 0.029, + "eval_wer": 24.029126213592235, + "step": 2600 + }, + { + "epoch": 0.96, + "learning_rate": 9.161616161616162e-06, + "loss": 0.0777, + "step": 2610 + }, + { + "epoch": 0.96, + "learning_rate": 9.15800865800866e-06, + "loss": 0.0773, + "step": 2620 + }, + { + "epoch": 0.97, + "learning_rate": 9.154401154401155e-06, + "loss": 0.0801, + "step": 2630 + }, + { + "epoch": 0.97, + "learning_rate": 9.150793650793651e-06, + "loss": 0.0799, + "step": 2640 + }, + { + "epoch": 0.97, + "learning_rate": 9.147186147186148e-06, + "loss": 0.0778, + "step": 2650 + }, + { + "epoch": 0.98, + "learning_rate": 9.143578643578645e-06, + "loss": 0.0757, + "step": 2660 + }, + { + "epoch": 0.98, + "learning_rate": 9.13997113997114e-06, + "loss": 0.0797, + "step": 2670 + }, + { + "epoch": 0.99, + "learning_rate": 9.136363636363637e-06, + "loss": 0.0756, + "step": 2680 + }, + { + "epoch": 0.99, + "learning_rate": 9.132756132756132e-06, + "loss": 0.0759, + "step": 2690 + }, + { + "epoch": 0.99, + "learning_rate": 9.12914862914863e-06, + "loss": 0.0797, + "step": 2700 + }, + { + "epoch": 0.99, + "eval_loss": 0.09466552734375, + "eval_runtime": 554.2007, + "eval_samples_per_second": 4.998, + "eval_steps_per_second": 0.04, + "eval_wer": 23.678532901833872, + "step": 2700 + }, + { + "epoch": 1.0, + "learning_rate": 9.125541125541126e-06, + "loss": 0.0775, + "step": 2710 + }, + { + "epoch": 1.0, + "learning_rate": 9.121933621933623e-06, + "loss": 0.0774, + "step": 2720 + }, + { + "epoch": 1.0, + "learning_rate": 9.11832611832612e-06, + "loss": 0.0691, + "step": 2730 + }, + { + "epoch": 1.01, + "learning_rate": 9.114718614718615e-06, + "loss": 0.067, + "step": 2740 + }, + { + "epoch": 1.01, + "learning_rate": 9.111111111111112e-06, + "loss": 0.0684, + "step": 2750 + }, + { + "epoch": 1.01, + "learning_rate": 9.107503607503609e-06, + "loss": 0.0688, + "step": 2760 + }, + { + "epoch": 1.02, + "learning_rate": 9.103896103896104e-06, + "loss": 0.0667, + "step": 2770 + }, + { + "epoch": 1.02, + "learning_rate": 9.100288600288601e-06, + "loss": 0.0664, + "step": 2780 + }, + { + "epoch": 1.03, + "learning_rate": 9.096681096681096e-06, + "loss": 0.0654, + "step": 2790 + }, + { + "epoch": 1.03, + "learning_rate": 9.093073593073593e-06, + "loss": 0.0697, + "step": 2800 + }, + { + "epoch": 1.03, + "eval_loss": 0.09423828125, + "eval_runtime": 378.3675, + "eval_samples_per_second": 7.321, + "eval_steps_per_second": 0.058, + "eval_wer": 23.570658036677454, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.08946608946609e-06, + "loss": 0.0675, + "step": 2810 + }, + { + "epoch": 1.04, + "learning_rate": 9.085858585858587e-06, + "loss": 0.0684, + "step": 2820 + }, + { + "epoch": 1.04, + "learning_rate": 9.082251082251084e-06, + "loss": 0.0697, + "step": 2830 + }, + { + "epoch": 1.04, + "learning_rate": 9.078643578643579e-06, + "loss": 0.0678, + "step": 2840 + }, + { + "epoch": 1.05, + "learning_rate": 9.075036075036076e-06, + "loss": 0.0673, + "step": 2850 + }, + { + "epoch": 1.05, + "learning_rate": 9.071428571428573e-06, + "loss": 0.0691, + "step": 2860 + }, + { + "epoch": 1.06, + "learning_rate": 9.067821067821068e-06, + "loss": 0.0657, + "step": 2870 + }, + { + "epoch": 1.06, + "learning_rate": 9.064213564213565e-06, + "loss": 0.067, + "step": 2880 + }, + { + "epoch": 1.06, + "learning_rate": 9.06060606060606e-06, + "loss": 0.0695, + "step": 2890 + }, + { + "epoch": 1.07, + "learning_rate": 9.056998556998557e-06, + "loss": 0.0685, + "step": 2900 + }, + { + "epoch": 1.07, + "eval_loss": 0.09454345703125, + "eval_runtime": 252.9239, + "eval_samples_per_second": 10.952, + "eval_steps_per_second": 0.087, + "eval_wer": 23.5571736785329, + "step": 2900 + }, + { + "epoch": 1.07, + "learning_rate": 9.053391053391054e-06, + "loss": 0.0688, + "step": 2910 + }, + { + "epoch": 1.07, + "learning_rate": 9.049783549783551e-06, + "loss": 0.0657, + "step": 2920 + }, + { + "epoch": 1.08, + "learning_rate": 9.046176046176048e-06, + "loss": 0.068, + "step": 2930 + }, + { + "epoch": 1.08, + "learning_rate": 9.042568542568543e-06, + "loss": 0.0692, + "step": 2940 + }, + { + "epoch": 1.08, + "learning_rate": 9.03896103896104e-06, + "loss": 0.0681, + "step": 2950 + }, + { + "epoch": 1.09, + "learning_rate": 9.035353535353537e-06, + "loss": 0.0699, + "step": 2960 + }, + { + "epoch": 1.09, + "learning_rate": 9.031746031746032e-06, + "loss": 0.0725, + "step": 2970 + }, + { + "epoch": 1.1, + "learning_rate": 9.028138528138529e-06, + "loss": 0.0665, + "step": 2980 + }, + { + "epoch": 1.1, + "learning_rate": 9.024531024531026e-06, + "loss": 0.0666, + "step": 2990 + }, + { + "epoch": 1.1, + "learning_rate": 9.020923520923521e-06, + "loss": 0.0685, + "step": 3000 + }, + { + "epoch": 1.1, + "eval_loss": 0.094970703125, + "eval_runtime": 448.4694, + "eval_samples_per_second": 6.177, + "eval_steps_per_second": 0.049, + "eval_wer": 23.722357065803667, + "step": 3000 + }, + { + "epoch": 1.11, + "learning_rate": 9.017316017316018e-06, + "loss": 0.0692, + "step": 3010 + }, + { + "epoch": 1.11, + "learning_rate": 9.013708513708515e-06, + "loss": 0.067, + "step": 3020 + }, + { + "epoch": 1.11, + "learning_rate": 9.010101010101012e-06, + "loss": 0.0689, + "step": 3030 + }, + { + "epoch": 1.12, + "learning_rate": 9.006493506493509e-06, + "loss": 0.0692, + "step": 3040 + }, + { + "epoch": 1.12, + "learning_rate": 9.002886002886004e-06, + "loss": 0.0696, + "step": 3050 + }, + { + "epoch": 1.12, + "learning_rate": 8.9992784992785e-06, + "loss": 0.0676, + "step": 3060 + }, + { + "epoch": 1.13, + "learning_rate": 8.995670995670996e-06, + "loss": 0.0696, + "step": 3070 + }, + { + "epoch": 1.13, + "learning_rate": 8.992063492063493e-06, + "loss": 0.0702, + "step": 3080 + }, + { + "epoch": 1.14, + "learning_rate": 8.98845598845599e-06, + "loss": 0.0669, + "step": 3090 + }, + { + "epoch": 1.14, + "learning_rate": 8.984848484848485e-06, + "loss": 0.0669, + "step": 3100 + }, + { + "epoch": 1.14, + "eval_loss": 0.0938720703125, + "eval_runtime": 367.7769, + "eval_samples_per_second": 7.532, + "eval_steps_per_second": 0.06, + "eval_wer": 23.59762675296656, + "step": 3100 + }, + { + "epoch": 1.14, + "learning_rate": 8.981240981240982e-06, + "loss": 0.0677, + "step": 3110 + }, + { + "epoch": 1.15, + "learning_rate": 8.977633477633479e-06, + "loss": 0.0684, + "step": 3120 + }, + { + "epoch": 1.15, + "learning_rate": 8.974025974025975e-06, + "loss": 0.0677, + "step": 3130 + }, + { + "epoch": 1.15, + "learning_rate": 8.970418470418472e-06, + "loss": 0.0658, + "step": 3140 + }, + { + "epoch": 1.16, + "learning_rate": 8.966810966810968e-06, + "loss": 0.0689, + "step": 3150 + }, + { + "epoch": 1.16, + "learning_rate": 8.963203463203464e-06, + "loss": 0.0653, + "step": 3160 + }, + { + "epoch": 1.17, + "learning_rate": 8.95959595959596e-06, + "loss": 0.0668, + "step": 3170 + }, + { + "epoch": 1.17, + "learning_rate": 8.955988455988457e-06, + "loss": 0.066, + "step": 3180 + }, + { + "epoch": 1.17, + "learning_rate": 8.952380952380953e-06, + "loss": 0.0668, + "step": 3190 + }, + { + "epoch": 1.18, + "learning_rate": 8.948773448773449e-06, + "loss": 0.0678, + "step": 3200 + }, + { + "epoch": 1.18, + "eval_loss": 0.09356689453125, + "eval_runtime": 288.5179, + "eval_samples_per_second": 9.601, + "eval_steps_per_second": 0.076, + "eval_wer": 23.415587918015103, + "step": 3200 + }, + { + "epoch": 1.18, + "learning_rate": 8.945165945165946e-06, + "loss": 0.0679, + "step": 3210 + }, + { + "epoch": 1.18, + "learning_rate": 8.941558441558442e-06, + "loss": 0.0677, + "step": 3220 + }, + { + "epoch": 1.19, + "learning_rate": 8.93795093795094e-06, + "loss": 0.0694, + "step": 3230 + }, + { + "epoch": 1.19, + "learning_rate": 8.934343434343436e-06, + "loss": 0.0678, + "step": 3240 + }, + { + "epoch": 1.19, + "learning_rate": 8.930735930735931e-06, + "loss": 0.0682, + "step": 3250 + }, + { + "epoch": 1.2, + "learning_rate": 8.927128427128428e-06, + "loss": 0.0674, + "step": 3260 + }, + { + "epoch": 1.2, + "learning_rate": 8.923520923520924e-06, + "loss": 0.0688, + "step": 3270 + }, + { + "epoch": 1.21, + "learning_rate": 8.91991341991342e-06, + "loss": 0.0684, + "step": 3280 + }, + { + "epoch": 1.21, + "learning_rate": 8.916305916305917e-06, + "loss": 0.0659, + "step": 3290 + }, + { + "epoch": 1.21, + "learning_rate": 8.912698412698414e-06, + "loss": 0.0692, + "step": 3300 + }, + { + "epoch": 1.21, + "eval_loss": 0.09393310546875, + "eval_runtime": 478.9048, + "eval_samples_per_second": 5.784, + "eval_steps_per_second": 0.046, + "eval_wer": 23.358279395900755, + "step": 3300 + }, + { + "epoch": 1.22, + "learning_rate": 8.90909090909091e-06, + "loss": 0.0698, + "step": 3310 + }, + { + "epoch": 1.22, + "learning_rate": 8.905483405483406e-06, + "loss": 0.069, + "step": 3320 + }, + { + "epoch": 1.22, + "learning_rate": 8.901875901875903e-06, + "loss": 0.0668, + "step": 3330 + }, + { + "epoch": 1.23, + "learning_rate": 8.8982683982684e-06, + "loss": 0.0663, + "step": 3340 + }, + { + "epoch": 1.23, + "learning_rate": 8.894660894660895e-06, + "loss": 0.0676, + "step": 3350 + }, + { + "epoch": 1.24, + "learning_rate": 8.891053391053392e-06, + "loss": 0.0657, + "step": 3360 + }, + { + "epoch": 1.24, + "learning_rate": 8.887445887445887e-06, + "loss": 0.0667, + "step": 3370 + }, + { + "epoch": 1.24, + "learning_rate": 8.883838383838384e-06, + "loss": 0.0657, + "step": 3380 + }, + { + "epoch": 1.25, + "learning_rate": 8.880230880230881e-06, + "loss": 0.0647, + "step": 3390 + }, + { + "epoch": 1.25, + "learning_rate": 8.876623376623378e-06, + "loss": 0.0664, + "step": 3400 + }, + { + "epoch": 1.25, + "eval_loss": 0.09320068359375, + "eval_runtime": 804.4129, + "eval_samples_per_second": 3.444, + "eval_steps_per_second": 0.027, + "eval_wer": 23.52683387270766, + "step": 3400 + }, + { + "epoch": 1.25, + "learning_rate": 8.873015873015873e-06, + "loss": 0.0668, + "step": 3410 + }, + { + "epoch": 1.26, + "learning_rate": 8.86940836940837e-06, + "loss": 0.0662, + "step": 3420 + }, + { + "epoch": 1.26, + "learning_rate": 8.865800865800867e-06, + "loss": 0.0656, + "step": 3430 + }, + { + "epoch": 1.26, + "learning_rate": 8.862193362193364e-06, + "loss": 0.0668, + "step": 3440 + }, + { + "epoch": 1.27, + "learning_rate": 8.85858585858586e-06, + "loss": 0.0688, + "step": 3450 + }, + { + "epoch": 1.27, + "learning_rate": 8.854978354978356e-06, + "loss": 0.0667, + "step": 3460 + }, + { + "epoch": 1.28, + "learning_rate": 8.851370851370851e-06, + "loss": 0.0703, + "step": 3470 + }, + { + "epoch": 1.28, + "learning_rate": 8.847763347763348e-06, + "loss": 0.0703, + "step": 3480 + }, + { + "epoch": 1.28, + "learning_rate": 8.844155844155845e-06, + "loss": 0.0678, + "step": 3490 + }, + { + "epoch": 1.29, + "learning_rate": 8.840548340548342e-06, + "loss": 0.0684, + "step": 3500 + }, + { + "epoch": 1.29, + "eval_loss": 0.09197998046875, + "eval_runtime": 577.6791, + "eval_samples_per_second": 4.795, + "eval_steps_per_second": 0.038, + "eval_wer": 23.43918554476807, + "step": 3500 + }, + { + "epoch": 1.29, + "learning_rate": 8.836940836940837e-06, + "loss": 0.0679, + "step": 3510 + }, + { + "epoch": 1.29, + "learning_rate": 8.833333333333334e-06, + "loss": 0.068, + "step": 3520 + }, + { + "epoch": 1.3, + "learning_rate": 8.829725829725831e-06, + "loss": 0.0673, + "step": 3530 + }, + { + "epoch": 1.3, + "learning_rate": 8.826118326118328e-06, + "loss": 0.0646, + "step": 3540 + }, + { + "epoch": 1.31, + "learning_rate": 8.822510822510823e-06, + "loss": 0.0667, + "step": 3550 + }, + { + "epoch": 1.31, + "learning_rate": 8.81890331890332e-06, + "loss": 0.0664, + "step": 3560 + }, + { + "epoch": 1.31, + "learning_rate": 8.815295815295815e-06, + "loss": 0.0669, + "step": 3570 + }, + { + "epoch": 1.32, + "learning_rate": 8.811688311688312e-06, + "loss": 0.0681, + "step": 3580 + }, + { + "epoch": 1.32, + "learning_rate": 8.808080808080809e-06, + "loss": 0.0689, + "step": 3590 + }, + { + "epoch": 1.32, + "learning_rate": 8.804473304473306e-06, + "loss": 0.0681, + "step": 3600 + }, + { + "epoch": 1.32, + "eval_loss": 0.0921630859375, + "eval_runtime": 316.0563, + "eval_samples_per_second": 8.764, + "eval_steps_per_second": 0.07, + "eval_wer": 22.82564724919094, + "step": 3600 + }, + { + "epoch": 1.33, + "learning_rate": 8.800865800865803e-06, + "loss": 0.0683, + "step": 3610 + }, + { + "epoch": 1.33, + "learning_rate": 8.797258297258298e-06, + "loss": 0.0662, + "step": 3620 + }, + { + "epoch": 1.33, + "learning_rate": 8.793650793650795e-06, + "loss": 0.0705, + "step": 3630 + }, + { + "epoch": 1.34, + "learning_rate": 8.79004329004329e-06, + "loss": 0.0668, + "step": 3640 + }, + { + "epoch": 1.34, + "learning_rate": 8.786435786435787e-06, + "loss": 0.0649, + "step": 3650 + }, + { + "epoch": 1.35, + "learning_rate": 8.782828282828284e-06, + "loss": 0.0676, + "step": 3660 + }, + { + "epoch": 1.35, + "learning_rate": 8.779220779220779e-06, + "loss": 0.067, + "step": 3670 + }, + { + "epoch": 1.35, + "learning_rate": 8.775613275613276e-06, + "loss": 0.068, + "step": 3680 + }, + { + "epoch": 1.36, + "learning_rate": 8.772005772005773e-06, + "loss": 0.0656, + "step": 3690 + }, + { + "epoch": 1.36, + "learning_rate": 8.76839826839827e-06, + "loss": 0.0668, + "step": 3700 + }, + { + "epoch": 1.36, + "eval_loss": 0.09124755859375, + "eval_runtime": 443.766, + "eval_samples_per_second": 6.242, + "eval_steps_per_second": 0.05, + "eval_wer": 22.866100323624593, + "step": 3700 + }, + { + "epoch": 1.36, + "learning_rate": 8.764790764790767e-06, + "loss": 0.0656, + "step": 3710 + }, + { + "epoch": 1.37, + "learning_rate": 8.761183261183262e-06, + "loss": 0.0672, + "step": 3720 + }, + { + "epoch": 1.37, + "learning_rate": 8.757575757575759e-06, + "loss": 0.0691, + "step": 3730 + }, + { + "epoch": 1.38, + "learning_rate": 8.753968253968254e-06, + "loss": 0.0658, + "step": 3740 + }, + { + "epoch": 1.38, + "learning_rate": 8.75036075036075e-06, + "loss": 0.067, + "step": 3750 + }, + { + "epoch": 1.38, + "learning_rate": 8.746753246753248e-06, + "loss": 0.0659, + "step": 3760 + }, + { + "epoch": 1.39, + "learning_rate": 8.743145743145743e-06, + "loss": 0.0644, + "step": 3770 + }, + { + "epoch": 1.39, + "learning_rate": 8.73953823953824e-06, + "loss": 0.0664, + "step": 3780 + }, + { + "epoch": 1.39, + "learning_rate": 8.735930735930737e-06, + "loss": 0.0674, + "step": 3790 + }, + { + "epoch": 1.4, + "learning_rate": 8.732323232323234e-06, + "loss": 0.0678, + "step": 3800 + }, + { + "epoch": 1.4, + "eval_loss": 0.09130859375, + "eval_runtime": 455.9937, + "eval_samples_per_second": 6.075, + "eval_steps_per_second": 0.048, + "eval_wer": 23.580771305285868, + "step": 3800 + }, + { + "epoch": 1.4, + "learning_rate": 8.72871572871573e-06, + "loss": 0.0658, + "step": 3810 + }, + { + "epoch": 1.4, + "learning_rate": 8.725108225108226e-06, + "loss": 0.0646, + "step": 3820 + }, + { + "epoch": 1.41, + "learning_rate": 8.721500721500722e-06, + "loss": 0.0649, + "step": 3830 + }, + { + "epoch": 1.41, + "learning_rate": 8.717893217893218e-06, + "loss": 0.067, + "step": 3840 + }, + { + "epoch": 1.42, + "learning_rate": 8.714285714285715e-06, + "loss": 0.0656, + "step": 3850 + }, + { + "epoch": 1.42, + "learning_rate": 8.710678210678211e-06, + "loss": 0.0685, + "step": 3860 + }, + { + "epoch": 1.42, + "learning_rate": 8.707070707070707e-06, + "loss": 0.0652, + "step": 3870 + }, + { + "epoch": 1.43, + "learning_rate": 8.703463203463204e-06, + "loss": 0.0686, + "step": 3880 + }, + { + "epoch": 1.43, + "learning_rate": 8.6998556998557e-06, + "loss": 0.0669, + "step": 3890 + }, + { + "epoch": 1.43, + "learning_rate": 8.696248196248197e-06, + "loss": 0.064, + "step": 3900 + }, + { + "epoch": 1.43, + "eval_loss": 0.09088134765625, + "eval_runtime": 323.8053, + "eval_samples_per_second": 8.555, + "eval_steps_per_second": 0.068, + "eval_wer": 23.15601402373247, + "step": 3900 + }, + { + "epoch": 1.44, + "learning_rate": 8.692640692640694e-06, + "loss": 0.0667, + "step": 3910 + }, + { + "epoch": 1.44, + "learning_rate": 8.68903318903319e-06, + "loss": 0.0635, + "step": 3920 + }, + { + "epoch": 1.44, + "learning_rate": 8.685425685425686e-06, + "loss": 0.067, + "step": 3930 + }, + { + "epoch": 1.45, + "learning_rate": 8.681818181818182e-06, + "loss": 0.0644, + "step": 3940 + }, + { + "epoch": 1.45, + "learning_rate": 8.678210678210678e-06, + "loss": 0.0629, + "step": 3950 + }, + { + "epoch": 1.46, + "learning_rate": 8.674603174603175e-06, + "loss": 0.0667, + "step": 3960 + }, + { + "epoch": 1.46, + "learning_rate": 8.670995670995672e-06, + "loss": 0.0664, + "step": 3970 + }, + { + "epoch": 1.46, + "learning_rate": 8.667388167388167e-06, + "loss": 0.0658, + "step": 3980 + }, + { + "epoch": 1.47, + "learning_rate": 8.663780663780664e-06, + "loss": 0.0682, + "step": 3990 + }, + { + "epoch": 1.47, + "learning_rate": 8.660173160173161e-06, + "loss": 0.0663, + "step": 4000 + }, + { + "epoch": 1.47, + "eval_loss": 0.09112548828125, + "eval_runtime": 454.1676, + "eval_samples_per_second": 6.099, + "eval_steps_per_second": 0.048, + "eval_wer": 23.826860841423947, + "step": 4000 + }, + { + "epoch": 1.47, + "learning_rate": 8.657287157287158e-06, + "loss": 0.0664, + "step": 4010 + }, + { + "epoch": 1.48, + "learning_rate": 8.653679653679655e-06, + "loss": 0.0658, + "step": 4020 + }, + { + "epoch": 1.48, + "learning_rate": 8.650072150072152e-06, + "loss": 0.0678, + "step": 4030 + }, + { + "epoch": 1.49, + "learning_rate": 8.646464646464647e-06, + "loss": 0.0699, + "step": 4040 + }, + { + "epoch": 1.49, + "learning_rate": 8.642857142857144e-06, + "loss": 0.0662, + "step": 4050 + }, + { + "epoch": 1.49, + "learning_rate": 8.639249639249639e-06, + "loss": 0.0677, + "step": 4060 + }, + { + "epoch": 1.5, + "learning_rate": 8.635642135642136e-06, + "loss": 0.0651, + "step": 4070 + }, + { + "epoch": 1.5, + "learning_rate": 8.632034632034633e-06, + "loss": 0.0681, + "step": 4080 + }, + { + "epoch": 1.5, + "learning_rate": 8.62842712842713e-06, + "loss": 0.0658, + "step": 4090 + }, + { + "epoch": 1.51, + "learning_rate": 8.624819624819626e-06, + "loss": 0.0657, + "step": 4100 + }, + { + "epoch": 1.51, + "eval_loss": 0.0904541015625, + "eval_runtime": 483.7028, + "eval_samples_per_second": 5.727, + "eval_steps_per_second": 0.045, + "eval_wer": 22.562702265372167, + "step": 4100 + }, + { + "epoch": 1.51, + "learning_rate": 8.621212121212122e-06, + "loss": 0.0668, + "step": 4110 + }, + { + "epoch": 1.51, + "learning_rate": 8.617604617604618e-06, + "loss": 0.066, + "step": 4120 + }, + { + "epoch": 1.52, + "learning_rate": 8.613997113997115e-06, + "loss": 0.0646, + "step": 4130 + }, + { + "epoch": 1.52, + "learning_rate": 8.61038961038961e-06, + "loss": 0.0657, + "step": 4140 + }, + { + "epoch": 1.53, + "learning_rate": 8.606782106782107e-06, + "loss": 0.0658, + "step": 4150 + }, + { + "epoch": 1.53, + "learning_rate": 8.603174603174604e-06, + "loss": 0.0666, + "step": 4160 + }, + { + "epoch": 1.53, + "learning_rate": 8.5995670995671e-06, + "loss": 0.0677, + "step": 4170 + }, + { + "epoch": 1.54, + "learning_rate": 8.595959595959596e-06, + "loss": 0.0667, + "step": 4180 + }, + { + "epoch": 1.54, + "learning_rate": 8.592352092352093e-06, + "loss": 0.0671, + "step": 4190 + }, + { + "epoch": 1.54, + "learning_rate": 8.58874458874459e-06, + "loss": 0.0679, + "step": 4200 + }, + { + "epoch": 1.54, + "eval_loss": 0.090576171875, + "eval_runtime": 527.5145, + "eval_samples_per_second": 5.251, + "eval_steps_per_second": 0.042, + "eval_wer": 22.808791801510246, + "step": 4200 + }, + { + "epoch": 1.55, + "learning_rate": 8.585137085137087e-06, + "loss": 0.0684, + "step": 4210 + }, + { + "epoch": 1.55, + "learning_rate": 8.581529581529582e-06, + "loss": 0.0677, + "step": 4220 + }, + { + "epoch": 1.56, + "learning_rate": 8.57792207792208e-06, + "loss": 0.0646, + "step": 4230 + }, + { + "epoch": 1.56, + "learning_rate": 8.574314574314574e-06, + "loss": 0.068, + "step": 4240 + }, + { + "epoch": 1.56, + "learning_rate": 8.570707070707071e-06, + "loss": 0.0668, + "step": 4250 + }, + { + "epoch": 1.57, + "learning_rate": 8.567099567099568e-06, + "loss": 0.0656, + "step": 4260 + }, + { + "epoch": 1.57, + "learning_rate": 8.563492063492063e-06, + "loss": 0.0654, + "step": 4270 + }, + { + "epoch": 1.57, + "learning_rate": 8.55988455988456e-06, + "loss": 0.0648, + "step": 4280 + }, + { + "epoch": 1.58, + "learning_rate": 8.556277056277057e-06, + "loss": 0.067, + "step": 4290 + }, + { + "epoch": 1.58, + "learning_rate": 8.552669552669554e-06, + "loss": 0.0675, + "step": 4300 + }, + { + "epoch": 1.58, + "eval_loss": 0.090087890625, + "eval_runtime": 250.2835, + "eval_samples_per_second": 11.067, + "eval_steps_per_second": 0.088, + "eval_wer": 22.515507011866234, + "step": 4300 + }, + { + "epoch": 1.58, + "learning_rate": 8.549062049062051e-06, + "loss": 0.0657, + "step": 4310 + }, + { + "epoch": 1.59, + "learning_rate": 8.545454545454546e-06, + "loss": 0.0648, + "step": 4320 + }, + { + "epoch": 1.59, + "learning_rate": 8.541847041847043e-06, + "loss": 0.0679, + "step": 4330 + }, + { + "epoch": 1.6, + "learning_rate": 8.538239538239538e-06, + "loss": 0.0682, + "step": 4340 + }, + { + "epoch": 1.6, + "learning_rate": 8.534632034632035e-06, + "loss": 0.0693, + "step": 4350 + }, + { + "epoch": 1.6, + "learning_rate": 8.531024531024532e-06, + "loss": 0.0637, + "step": 4360 + }, + { + "epoch": 1.61, + "learning_rate": 8.527417027417027e-06, + "loss": 0.068, + "step": 4370 + }, + { + "epoch": 1.61, + "learning_rate": 8.523809523809524e-06, + "loss": 0.0688, + "step": 4380 + }, + { + "epoch": 1.61, + "learning_rate": 8.520202020202021e-06, + "loss": 0.0643, + "step": 4390 + }, + { + "epoch": 1.62, + "learning_rate": 8.516594516594518e-06, + "loss": 0.0673, + "step": 4400 + }, + { + "epoch": 1.62, + "eval_loss": 0.0897216796875, + "eval_runtime": 333.8313, + "eval_samples_per_second": 8.298, + "eval_steps_per_second": 0.066, + "eval_wer": 22.43797195253506, + "step": 4400 + }, + { + "epoch": 1.62, + "learning_rate": 8.512987012987015e-06, + "loss": 0.0634, + "step": 4410 + }, + { + "epoch": 1.62, + "learning_rate": 8.50937950937951e-06, + "loss": 0.0683, + "step": 4420 + }, + { + "epoch": 1.63, + "learning_rate": 8.505772005772007e-06, + "loss": 0.0673, + "step": 4430 + }, + { + "epoch": 1.63, + "learning_rate": 8.502164502164502e-06, + "loss": 0.0677, + "step": 4440 + }, + { + "epoch": 1.64, + "learning_rate": 8.498556998556999e-06, + "loss": 0.0676, + "step": 4450 + }, + { + "epoch": 1.64, + "learning_rate": 8.494949494949496e-06, + "loss": 0.0686, + "step": 4460 + }, + { + "epoch": 1.64, + "learning_rate": 8.491341991341993e-06, + "loss": 0.0678, + "step": 4470 + }, + { + "epoch": 1.65, + "learning_rate": 8.487734487734488e-06, + "loss": 0.0663, + "step": 4480 + }, + { + "epoch": 1.65, + "learning_rate": 8.484126984126985e-06, + "loss": 0.0659, + "step": 4490 + }, + { + "epoch": 1.65, + "learning_rate": 8.480519480519482e-06, + "loss": 0.0639, + "step": 4500 + }, + { + "epoch": 1.65, + "eval_loss": 0.08935546875, + "eval_runtime": 1011.4705, + "eval_samples_per_second": 2.739, + "eval_steps_per_second": 0.022, + "eval_wer": 22.55596008629989, + "step": 4500 + }, + { + "epoch": 1.66, + "learning_rate": 8.476911976911979e-06, + "loss": 0.0664, + "step": 4510 + }, + { + "epoch": 1.66, + "learning_rate": 8.473304473304474e-06, + "loss": 0.0662, + "step": 4520 + }, + { + "epoch": 1.67, + "learning_rate": 8.46969696969697e-06, + "loss": 0.067, + "step": 4530 + }, + { + "epoch": 1.67, + "learning_rate": 8.466089466089466e-06, + "loss": 0.0661, + "step": 4540 + }, + { + "epoch": 1.67, + "learning_rate": 8.462481962481963e-06, + "loss": 0.0655, + "step": 4550 + }, + { + "epoch": 1.68, + "learning_rate": 8.45887445887446e-06, + "loss": 0.0655, + "step": 4560 + }, + { + "epoch": 1.68, + "learning_rate": 8.455266955266957e-06, + "loss": 0.0641, + "step": 4570 + }, + { + "epoch": 1.68, + "learning_rate": 8.451659451659452e-06, + "loss": 0.0658, + "step": 4580 + }, + { + "epoch": 1.69, + "learning_rate": 8.448051948051949e-06, + "loss": 0.0646, + "step": 4590 + }, + { + "epoch": 1.69, + "learning_rate": 8.444444444444446e-06, + "loss": 0.0675, + "step": 4600 + }, + { + "epoch": 1.69, + "eval_loss": 0.088623046875, + "eval_runtime": 296.69, + "eval_samples_per_second": 9.336, + "eval_steps_per_second": 0.074, + "eval_wer": 22.431229773462782, + "step": 4600 + }, + { + "epoch": 1.69, + "learning_rate": 8.440836940836943e-06, + "loss": 0.0646, + "step": 4610 + }, + { + "epoch": 1.7, + "learning_rate": 8.437229437229438e-06, + "loss": 0.0634, + "step": 4620 + }, + { + "epoch": 1.7, + "learning_rate": 8.433621933621935e-06, + "loss": 0.0665, + "step": 4630 + }, + { + "epoch": 1.71, + "learning_rate": 8.43001443001443e-06, + "loss": 0.0659, + "step": 4640 + }, + { + "epoch": 1.71, + "learning_rate": 8.426406926406927e-06, + "loss": 0.0661, + "step": 4650 + }, + { + "epoch": 1.71, + "learning_rate": 8.422799422799424e-06, + "loss": 0.0665, + "step": 4660 + }, + { + "epoch": 1.72, + "learning_rate": 8.41919191919192e-06, + "loss": 0.0633, + "step": 4670 + }, + { + "epoch": 1.72, + "learning_rate": 8.415584415584416e-06, + "loss": 0.0644, + "step": 4680 + }, + { + "epoch": 1.72, + "learning_rate": 8.411976911976913e-06, + "loss": 0.0632, + "step": 4690 + }, + { + "epoch": 1.73, + "learning_rate": 8.40836940836941e-06, + "loss": 0.0621, + "step": 4700 + }, + { + "epoch": 1.73, + "eval_loss": 0.08856201171875, + "eval_runtime": 349.513, + "eval_samples_per_second": 7.925, + "eval_steps_per_second": 0.063, + "eval_wer": 22.518878101402372, + "step": 4700 + }, + { + "epoch": 1.73, + "learning_rate": 8.404761904761905e-06, + "loss": 0.0652, + "step": 4710 + }, + { + "epoch": 1.74, + "learning_rate": 8.401154401154402e-06, + "loss": 0.0629, + "step": 4720 + }, + { + "epoch": 1.74, + "learning_rate": 8.397546897546899e-06, + "loss": 0.0657, + "step": 4730 + }, + { + "epoch": 1.74, + "learning_rate": 8.393939393939394e-06, + "loss": 0.0645, + "step": 4740 + }, + { + "epoch": 1.75, + "learning_rate": 8.39033189033189e-06, + "loss": 0.0667, + "step": 4750 + }, + { + "epoch": 1.75, + "learning_rate": 8.386724386724387e-06, + "loss": 0.0634, + "step": 4760 + }, + { + "epoch": 1.75, + "learning_rate": 8.383116883116884e-06, + "loss": 0.0666, + "step": 4770 + }, + { + "epoch": 1.76, + "learning_rate": 8.379509379509381e-06, + "loss": 0.0663, + "step": 4780 + }, + { + "epoch": 1.76, + "learning_rate": 8.375901875901876e-06, + "loss": 0.0686, + "step": 4790 + }, + { + "epoch": 1.76, + "learning_rate": 8.372294372294373e-06, + "loss": 0.0658, + "step": 4800 + }, + { + "epoch": 1.76, + "eval_loss": 0.08856201171875, + "eval_runtime": 691.7141, + "eval_samples_per_second": 4.005, + "eval_steps_per_second": 0.032, + "eval_wer": 22.249190938511326, + "step": 4800 + }, + { + "epoch": 1.77, + "learning_rate": 8.368686868686869e-06, + "loss": 0.0649, + "step": 4810 + }, + { + "epoch": 1.77, + "learning_rate": 8.365079365079365e-06, + "loss": 0.064, + "step": 4820 + }, + { + "epoch": 1.78, + "learning_rate": 8.361471861471862e-06, + "loss": 0.0639, + "step": 4830 + }, + { + "epoch": 1.78, + "learning_rate": 8.357864357864358e-06, + "loss": 0.0658, + "step": 4840 + }, + { + "epoch": 1.78, + "learning_rate": 8.354256854256854e-06, + "loss": 0.0712, + "step": 4850 + }, + { + "epoch": 1.79, + "learning_rate": 8.350649350649351e-06, + "loss": 0.0664, + "step": 4860 + }, + { + "epoch": 1.79, + "learning_rate": 8.347041847041848e-06, + "loss": 0.0611, + "step": 4870 + }, + { + "epoch": 1.79, + "learning_rate": 8.343434343434345e-06, + "loss": 0.0675, + "step": 4880 + }, + { + "epoch": 1.8, + "learning_rate": 8.33982683982684e-06, + "loss": 0.0659, + "step": 4890 + }, + { + "epoch": 1.8, + "learning_rate": 8.336219336219337e-06, + "loss": 0.0654, + "step": 4900 + }, + { + "epoch": 1.8, + "eval_loss": 0.08856201171875, + "eval_runtime": 583.1687, + "eval_samples_per_second": 4.75, + "eval_steps_per_second": 0.038, + "eval_wer": 22.090749730312837, + "step": 4900 + }, + { + "epoch": 1.81, + "learning_rate": 8.332611832611832e-06, + "loss": 0.0679, + "step": 4910 + }, + { + "epoch": 1.81, + "learning_rate": 8.32900432900433e-06, + "loss": 0.0667, + "step": 4920 + }, + { + "epoch": 1.81, + "learning_rate": 8.325396825396826e-06, + "loss": 0.0661, + "step": 4930 + }, + { + "epoch": 1.82, + "learning_rate": 8.321789321789321e-06, + "loss": 0.0635, + "step": 4940 + }, + { + "epoch": 1.82, + "learning_rate": 8.318181818181818e-06, + "loss": 0.0645, + "step": 4950 + }, + { + "epoch": 1.82, + "learning_rate": 8.314574314574315e-06, + "loss": 0.0663, + "step": 4960 + }, + { + "epoch": 1.83, + "learning_rate": 8.310966810966812e-06, + "loss": 0.0646, + "step": 4970 + }, + { + "epoch": 1.83, + "learning_rate": 8.307359307359309e-06, + "loss": 0.0631, + "step": 4980 + }, + { + "epoch": 1.83, + "learning_rate": 8.303751803751804e-06, + "loss": 0.0659, + "step": 4990 + }, + { + "epoch": 1.84, + "learning_rate": 8.300144300144301e-06, + "loss": 0.0639, + "step": 5000 + }, + { + "epoch": 1.84, + "eval_loss": 0.0875244140625, + "eval_runtime": 638.4901, + "eval_samples_per_second": 4.338, + "eval_steps_per_second": 0.034, + "eval_wer": 22.249190938511326, + "step": 5000 + }, + { + "epoch": 1.84, + "learning_rate": 8.297258297258298e-06, + "loss": 0.0649, + "step": 5010 + }, + { + "epoch": 1.85, + "learning_rate": 8.293650793650794e-06, + "loss": 0.0653, + "step": 5020 + }, + { + "epoch": 1.85, + "learning_rate": 8.29004329004329e-06, + "loss": 0.0672, + "step": 5030 + }, + { + "epoch": 1.85, + "learning_rate": 8.286435786435787e-06, + "loss": 0.0646, + "step": 5040 + }, + { + "epoch": 1.86, + "learning_rate": 8.282828282828283e-06, + "loss": 0.0639, + "step": 5050 + }, + { + "epoch": 1.86, + "learning_rate": 8.27922077922078e-06, + "loss": 0.0671, + "step": 5060 + }, + { + "epoch": 1.86, + "learning_rate": 8.275613275613277e-06, + "loss": 0.0695, + "step": 5070 + }, + { + "epoch": 1.87, + "learning_rate": 8.272005772005772e-06, + "loss": 0.0654, + "step": 5080 + }, + { + "epoch": 1.87, + "learning_rate": 8.26839826839827e-06, + "loss": 0.0654, + "step": 5090 + }, + { + "epoch": 1.88, + "learning_rate": 8.264790764790766e-06, + "loss": 0.0631, + "step": 5100 + }, + { + "epoch": 1.88, + "eval_loss": 0.08734130859375, + "eval_runtime": 259.0875, + "eval_samples_per_second": 10.691, + "eval_steps_per_second": 0.085, + "eval_wer": 22.239077669902912, + "step": 5100 + }, + { + "epoch": 1.88, + "learning_rate": 8.261183261183261e-06, + "loss": 0.0656, + "step": 5110 + }, + { + "epoch": 1.88, + "learning_rate": 8.257575757575758e-06, + "loss": 0.0644, + "step": 5120 + }, + { + "epoch": 1.89, + "learning_rate": 8.253968253968254e-06, + "loss": 0.0634, + "step": 5130 + }, + { + "epoch": 1.89, + "learning_rate": 8.25036075036075e-06, + "loss": 0.0659, + "step": 5140 + }, + { + "epoch": 1.89, + "learning_rate": 8.246753246753247e-06, + "loss": 0.0656, + "step": 5150 + }, + { + "epoch": 1.9, + "learning_rate": 8.243145743145744e-06, + "loss": 0.0633, + "step": 5160 + }, + { + "epoch": 1.9, + "learning_rate": 8.239538239538241e-06, + "loss": 0.0665, + "step": 5170 + }, + { + "epoch": 1.9, + "learning_rate": 8.235930735930736e-06, + "loss": 0.0652, + "step": 5180 + }, + { + "epoch": 1.91, + "learning_rate": 8.232323232323233e-06, + "loss": 0.0626, + "step": 5190 + }, + { + "epoch": 1.91, + "learning_rate": 8.22871572871573e-06, + "loss": 0.0645, + "step": 5200 + }, + { + "epoch": 1.91, + "eval_loss": 0.0869140625, + "eval_runtime": 493.2946, + "eval_samples_per_second": 5.615, + "eval_steps_per_second": 0.045, + "eval_wer": 22.31324163969795, + "step": 5200 + }, + { + "epoch": 1.92, + "learning_rate": 8.225108225108225e-06, + "loss": 0.0644, + "step": 5210 + }, + { + "epoch": 1.92, + "learning_rate": 8.221500721500722e-06, + "loss": 0.0649, + "step": 5220 + }, + { + "epoch": 1.92, + "learning_rate": 8.217893217893217e-06, + "loss": 0.0658, + "step": 5230 + }, + { + "epoch": 1.93, + "learning_rate": 8.214285714285714e-06, + "loss": 0.066, + "step": 5240 + }, + { + "epoch": 1.93, + "learning_rate": 8.210678210678211e-06, + "loss": 0.0634, + "step": 5250 + }, + { + "epoch": 1.93, + "learning_rate": 8.207070707070708e-06, + "loss": 0.065, + "step": 5260 + }, + { + "epoch": 1.94, + "learning_rate": 8.203463203463205e-06, + "loss": 0.0633, + "step": 5270 + }, + { + "epoch": 1.94, + "learning_rate": 8.199855699855702e-06, + "loss": 0.0628, + "step": 5280 + }, + { + "epoch": 1.94, + "learning_rate": 8.196248196248197e-06, + "loss": 0.065, + "step": 5290 + }, + { + "epoch": 1.95, + "learning_rate": 8.192640692640694e-06, + "loss": 0.0665, + "step": 5300 + }, + { + "epoch": 1.95, + "eval_loss": 0.0872802734375, + "eval_runtime": 673.3399, + "eval_samples_per_second": 4.114, + "eval_steps_per_second": 0.033, + "eval_wer": 22.198624595469255, + "step": 5300 + }, + { + "epoch": 1.95, + "learning_rate": 8.18903318903319e-06, + "loss": 0.0622, + "step": 5310 + }, + { + "epoch": 1.96, + "learning_rate": 8.185425685425686e-06, + "loss": 0.0666, + "step": 5320 + }, + { + "epoch": 1.96, + "learning_rate": 8.181818181818183e-06, + "loss": 0.0666, + "step": 5330 + }, + { + "epoch": 1.96, + "learning_rate": 8.178210678210678e-06, + "loss": 0.0631, + "step": 5340 + }, + { + "epoch": 1.97, + "learning_rate": 8.174603174603175e-06, + "loss": 0.0632, + "step": 5350 + }, + { + "epoch": 1.97, + "learning_rate": 8.170995670995672e-06, + "loss": 0.0623, + "step": 5360 + }, + { + "epoch": 1.97, + "learning_rate": 8.167388167388169e-06, + "loss": 0.0657, + "step": 5370 + }, + { + "epoch": 1.98, + "learning_rate": 8.163780663780666e-06, + "loss": 0.0651, + "step": 5380 + }, + { + "epoch": 1.98, + "learning_rate": 8.160173160173161e-06, + "loss": 0.0655, + "step": 5390 + }, + { + "epoch": 1.99, + "learning_rate": 8.156565656565658e-06, + "loss": 0.0641, + "step": 5400 + }, + { + "epoch": 1.99, + "eval_loss": 0.0867919921875, + "eval_runtime": 648.7299, + "eval_samples_per_second": 4.27, + "eval_steps_per_second": 0.034, + "eval_wer": 22.228964401294498, + "step": 5400 + }, + { + "epoch": 1.99, + "learning_rate": 8.152958152958153e-06, + "loss": 0.0657, + "step": 5410 + }, + { + "epoch": 1.99, + "learning_rate": 8.14935064935065e-06, + "loss": 0.065, + "step": 5420 + }, + { + "epoch": 2.0, + "learning_rate": 8.145743145743147e-06, + "loss": 0.0652, + "step": 5430 + }, + { + "epoch": 2.0, + "learning_rate": 8.142135642135642e-06, + "loss": 0.064, + "step": 5440 + }, + { + "epoch": 2.0, + "learning_rate": 8.138528138528139e-06, + "loss": 0.0557, + "step": 5450 + }, + { + "epoch": 2.01, + "learning_rate": 8.134920634920636e-06, + "loss": 0.0552, + "step": 5460 + }, + { + "epoch": 2.01, + "learning_rate": 8.131313131313133e-06, + "loss": 0.0555, + "step": 5470 + }, + { + "epoch": 2.01, + "learning_rate": 8.12770562770563e-06, + "loss": 0.0545, + "step": 5480 + }, + { + "epoch": 2.02, + "learning_rate": 8.124098124098125e-06, + "loss": 0.0565, + "step": 5490 + }, + { + "epoch": 2.02, + "learning_rate": 8.120490620490622e-06, + "loss": 0.0558, + "step": 5500 + }, + { + "epoch": 2.02, + "eval_loss": 0.0870361328125, + "eval_runtime": 693.0613, + "eval_samples_per_second": 3.997, + "eval_steps_per_second": 0.032, + "eval_wer": 22.063781014023732, + "step": 5500 + }, + { + "epoch": 2.03, + "learning_rate": 8.116883116883117e-06, + "loss": 0.0541, + "step": 5510 + }, + { + "epoch": 2.03, + "learning_rate": 8.113275613275614e-06, + "loss": 0.0544, + "step": 5520 + }, + { + "epoch": 2.03, + "learning_rate": 8.10966810966811e-06, + "loss": 0.0532, + "step": 5530 + }, + { + "epoch": 2.04, + "learning_rate": 8.106060606060606e-06, + "loss": 0.054, + "step": 5540 + }, + { + "epoch": 2.04, + "learning_rate": 8.102453102453103e-06, + "loss": 0.0555, + "step": 5550 + }, + { + "epoch": 2.04, + "learning_rate": 8.0988455988456e-06, + "loss": 0.0543, + "step": 5560 + }, + { + "epoch": 2.05, + "learning_rate": 8.095238095238097e-06, + "loss": 0.0558, + "step": 5570 + }, + { + "epoch": 2.05, + "learning_rate": 8.091630591630593e-06, + "loss": 0.0523, + "step": 5580 + }, + { + "epoch": 2.06, + "learning_rate": 8.088023088023089e-06, + "loss": 0.0546, + "step": 5590 + }, + { + "epoch": 2.06, + "learning_rate": 8.084415584415586e-06, + "loss": 0.0556, + "step": 5600 + }, + { + "epoch": 2.06, + "eval_loss": 0.08795166015625, + "eval_runtime": 712.0699, + "eval_samples_per_second": 3.89, + "eval_steps_per_second": 0.031, + "eval_wer": 21.96264832793959, + "step": 5600 + }, + { + "epoch": 2.06, + "learning_rate": 8.08080808080808e-06, + "loss": 0.0568, + "step": 5610 + }, + { + "epoch": 2.07, + "learning_rate": 8.077200577200578e-06, + "loss": 0.053, + "step": 5620 + }, + { + "epoch": 2.07, + "learning_rate": 8.073593073593075e-06, + "loss": 0.0541, + "step": 5630 + }, + { + "epoch": 2.07, + "learning_rate": 8.069985569985571e-06, + "loss": 0.0538, + "step": 5640 + }, + { + "epoch": 2.08, + "learning_rate": 8.066378066378067e-06, + "loss": 0.053, + "step": 5650 + }, + { + "epoch": 2.08, + "learning_rate": 8.062770562770564e-06, + "loss": 0.055, + "step": 5660 + }, + { + "epoch": 2.08, + "learning_rate": 8.05916305916306e-06, + "loss": 0.0539, + "step": 5670 + }, + { + "epoch": 2.09, + "learning_rate": 8.055555555555557e-06, + "loss": 0.056, + "step": 5680 + }, + { + "epoch": 2.09, + "learning_rate": 8.051948051948052e-06, + "loss": 0.0557, + "step": 5690 + }, + { + "epoch": 2.1, + "learning_rate": 8.04834054834055e-06, + "loss": 0.0562, + "step": 5700 + }, + { + "epoch": 2.1, + "eval_loss": 0.08782958984375, + "eval_runtime": 270.8387, + "eval_samples_per_second": 10.227, + "eval_steps_per_second": 0.081, + "eval_wer": 22.10423408845739, + "step": 5700 + }, + { + "epoch": 2.1, + "learning_rate": 8.044733044733045e-06, + "loss": 0.0554, + "step": 5710 + }, + { + "epoch": 2.1, + "learning_rate": 8.041125541125541e-06, + "loss": 0.0524, + "step": 5720 + }, + { + "epoch": 2.11, + "learning_rate": 8.037518037518038e-06, + "loss": 0.0554, + "step": 5730 + }, + { + "epoch": 2.11, + "learning_rate": 8.033910533910535e-06, + "loss": 0.0561, + "step": 5740 + }, + { + "epoch": 2.11, + "learning_rate": 8.03030303030303e-06, + "loss": 0.0535, + "step": 5750 + }, + { + "epoch": 2.12, + "learning_rate": 8.026695526695527e-06, + "loss": 0.0548, + "step": 5760 + }, + { + "epoch": 2.12, + "learning_rate": 8.023088023088024e-06, + "loss": 0.0528, + "step": 5770 + }, + { + "epoch": 2.12, + "learning_rate": 8.019480519480521e-06, + "loss": 0.057, + "step": 5780 + }, + { + "epoch": 2.13, + "learning_rate": 8.015873015873016e-06, + "loss": 0.0579, + "step": 5790 + }, + { + "epoch": 2.13, + "learning_rate": 8.012265512265513e-06, + "loss": 0.0547, + "step": 5800 + }, + { + "epoch": 2.13, + "eval_loss": 0.0888671875, + "eval_runtime": 251.3262, + "eval_samples_per_second": 11.022, + "eval_steps_per_second": 0.088, + "eval_wer": 22.05029665587918, + "step": 5800 + }, + { + "epoch": 2.14, + "learning_rate": 8.008658008658008e-06, + "loss": 0.0538, + "step": 5810 + }, + { + "epoch": 2.14, + "learning_rate": 8.005050505050505e-06, + "loss": 0.0565, + "step": 5820 + }, + { + "epoch": 2.14, + "learning_rate": 8.001443001443002e-06, + "loss": 0.0551, + "step": 5830 + }, + { + "epoch": 2.15, + "learning_rate": 7.997835497835499e-06, + "loss": 0.0537, + "step": 5840 + }, + { + "epoch": 2.15, + "learning_rate": 7.994227994227994e-06, + "loss": 0.0545, + "step": 5850 + }, + { + "epoch": 2.15, + "learning_rate": 7.990620490620491e-06, + "loss": 0.0547, + "step": 5860 + }, + { + "epoch": 2.16, + "learning_rate": 7.987012987012988e-06, + "loss": 0.0549, + "step": 5870 + }, + { + "epoch": 2.16, + "learning_rate": 7.983405483405483e-06, + "loss": 0.0533, + "step": 5880 + }, + { + "epoch": 2.17, + "learning_rate": 7.97979797979798e-06, + "loss": 0.0535, + "step": 5890 + }, + { + "epoch": 2.17, + "learning_rate": 7.976190476190477e-06, + "loss": 0.0553, + "step": 5900 + }, + { + "epoch": 2.17, + "eval_loss": 0.08807373046875, + "eval_runtime": 528.7421, + "eval_samples_per_second": 5.239, + "eval_steps_per_second": 0.042, + "eval_wer": 22.218851132686083, + "step": 5900 + }, + { + "epoch": 2.17, + "learning_rate": 7.972582972582972e-06, + "loss": 0.0531, + "step": 5910 + }, + { + "epoch": 2.18, + "learning_rate": 7.96897546897547e-06, + "loss": 0.0553, + "step": 5920 + }, + { + "epoch": 2.18, + "learning_rate": 7.965367965367966e-06, + "loss": 0.0548, + "step": 5930 + }, + { + "epoch": 2.18, + "learning_rate": 7.961760461760463e-06, + "loss": 0.0533, + "step": 5940 + }, + { + "epoch": 2.19, + "learning_rate": 7.95815295815296e-06, + "loss": 0.0531, + "step": 5950 + }, + { + "epoch": 2.19, + "learning_rate": 7.954545454545455e-06, + "loss": 0.0536, + "step": 5960 + }, + { + "epoch": 2.19, + "learning_rate": 7.950937950937952e-06, + "loss": 0.0564, + "step": 5970 + }, + { + "epoch": 2.2, + "learning_rate": 7.947330447330447e-06, + "loss": 0.0542, + "step": 5980 + }, + { + "epoch": 2.2, + "learning_rate": 7.943722943722944e-06, + "loss": 0.0551, + "step": 5990 + }, + { + "epoch": 2.21, + "learning_rate": 7.940115440115441e-06, + "loss": 0.0547, + "step": 6000 + }, + { + "epoch": 2.21, + "eval_loss": 0.08795166015625, + "eval_runtime": 556.2644, + "eval_samples_per_second": 4.98, + "eval_steps_per_second": 0.04, + "eval_wer": 21.98287486515642, + "step": 6000 + }, + { + "epoch": 2.21, + "learning_rate": 7.936868686868688e-06, + "loss": 0.0552, + "step": 6010 + }, + { + "epoch": 2.21, + "learning_rate": 7.933621933621934e-06, + "loss": 0.0556, + "step": 6020 + }, + { + "epoch": 2.22, + "learning_rate": 7.930014430014431e-06, + "loss": 0.0536, + "step": 6030 + }, + { + "epoch": 2.22, + "learning_rate": 7.926406926406926e-06, + "loss": 0.056, + "step": 6040 + }, + { + "epoch": 2.22, + "learning_rate": 7.922799422799423e-06, + "loss": 0.0556, + "step": 6050 + }, + { + "epoch": 2.23, + "learning_rate": 7.91919191919192e-06, + "loss": 0.0551, + "step": 6060 + }, + { + "epoch": 2.23, + "learning_rate": 7.915584415584417e-06, + "loss": 0.0552, + "step": 6070 + }, + { + "epoch": 2.24, + "learning_rate": 7.911976911976912e-06, + "loss": 0.0558, + "step": 6080 + }, + { + "epoch": 2.24, + "learning_rate": 7.90836940836941e-06, + "loss": 0.0531, + "step": 6090 + }, + { + "epoch": 2.24, + "learning_rate": 7.904761904761904e-06, + "loss": 0.0544, + "step": 6100 + }, + { + "epoch": 2.24, + "eval_loss": 0.08709716796875, + "eval_runtime": 263.4486, + "eval_samples_per_second": 10.514, + "eval_steps_per_second": 0.084, + "eval_wer": 22.110976267529665, + "step": 6100 + }, + { + "epoch": 2.25, + "learning_rate": 7.901154401154401e-06, + "loss": 0.0523, + "step": 6110 + }, + { + "epoch": 2.25, + "learning_rate": 7.897546897546898e-06, + "loss": 0.0552, + "step": 6120 + }, + { + "epoch": 2.25, + "learning_rate": 7.893939393939395e-06, + "loss": 0.0545, + "step": 6130 + }, + { + "epoch": 2.26, + "learning_rate": 7.890331890331892e-06, + "loss": 0.0554, + "step": 6140 + }, + { + "epoch": 2.26, + "learning_rate": 7.886724386724387e-06, + "loss": 0.0555, + "step": 6150 + }, + { + "epoch": 2.26, + "learning_rate": 7.883116883116884e-06, + "loss": 0.0544, + "step": 6160 + }, + { + "epoch": 2.27, + "learning_rate": 7.879509379509381e-06, + "loss": 0.0523, + "step": 6170 + }, + { + "epoch": 2.27, + "learning_rate": 7.875901875901876e-06, + "loss": 0.0547, + "step": 6180 + }, + { + "epoch": 2.28, + "learning_rate": 7.872294372294373e-06, + "loss": 0.0536, + "step": 6190 + }, + { + "epoch": 2.28, + "learning_rate": 7.868686868686868e-06, + "loss": 0.0573, + "step": 6200 + }, + { + "epoch": 2.28, + "eval_loss": 0.08660888671875, + "eval_runtime": 367.0765, + "eval_samples_per_second": 7.546, + "eval_steps_per_second": 0.06, + "eval_wer": 21.635652642934197, + "step": 6200 + }, + { + "epoch": 2.28, + "learning_rate": 7.865079365079365e-06, + "loss": 0.0541, + "step": 6210 + }, + { + "epoch": 2.29, + "learning_rate": 7.861471861471862e-06, + "loss": 0.053, + "step": 6220 + }, + { + "epoch": 2.29, + "learning_rate": 7.857864357864359e-06, + "loss": 0.0558, + "step": 6230 + }, + { + "epoch": 2.29, + "learning_rate": 7.854256854256856e-06, + "loss": 0.0543, + "step": 6240 + }, + { + "epoch": 2.3, + "learning_rate": 7.850649350649351e-06, + "loss": 0.0527, + "step": 6250 + }, + { + "epoch": 2.3, + "learning_rate": 7.847041847041848e-06, + "loss": 0.0535, + "step": 6260 + }, + { + "epoch": 2.31, + "learning_rate": 7.843434343434345e-06, + "loss": 0.0537, + "step": 6270 + }, + { + "epoch": 2.31, + "learning_rate": 7.83982683982684e-06, + "loss": 0.0563, + "step": 6280 + }, + { + "epoch": 2.31, + "learning_rate": 7.836219336219337e-06, + "loss": 0.0555, + "step": 6290 + }, + { + "epoch": 2.32, + "learning_rate": 7.832611832611832e-06, + "loss": 0.0562, + "step": 6300 + }, + { + "epoch": 2.32, + "eval_loss": 0.08721923828125, + "eval_runtime": 637.4835, + "eval_samples_per_second": 4.345, + "eval_steps_per_second": 0.035, + "eval_wer": 21.97950377562028, + "step": 6300 + }, + { + "epoch": 2.32, + "learning_rate": 7.829004329004329e-06, + "loss": 0.0571, + "step": 6310 + }, + { + "epoch": 2.32, + "learning_rate": 7.825396825396826e-06, + "loss": 0.0566, + "step": 6320 + }, + { + "epoch": 2.33, + "learning_rate": 7.821789321789323e-06, + "loss": 0.0547, + "step": 6330 + }, + { + "epoch": 2.33, + "learning_rate": 7.81818181818182e-06, + "loss": 0.0557, + "step": 6340 + }, + { + "epoch": 2.33, + "learning_rate": 7.814574314574315e-06, + "loss": 0.0564, + "step": 6350 + }, + { + "epoch": 2.34, + "learning_rate": 7.810966810966812e-06, + "loss": 0.0552, + "step": 6360 + }, + { + "epoch": 2.34, + "learning_rate": 7.807359307359309e-06, + "loss": 0.0539, + "step": 6370 + }, + { + "epoch": 2.35, + "learning_rate": 7.803751803751804e-06, + "loss": 0.0536, + "step": 6380 + }, + { + "epoch": 2.35, + "learning_rate": 7.8001443001443e-06, + "loss": 0.0543, + "step": 6390 + }, + { + "epoch": 2.35, + "learning_rate": 7.796536796536796e-06, + "loss": 0.0551, + "step": 6400 + }, + { + "epoch": 2.35, + "eval_loss": 0.0872802734375, + "eval_runtime": 686.505, + "eval_samples_per_second": 4.035, + "eval_steps_per_second": 0.032, + "eval_wer": 21.976132686084142, + "step": 6400 + }, + { + "epoch": 2.36, + "learning_rate": 7.792929292929293e-06, + "loss": 0.0549, + "step": 6410 + }, + { + "epoch": 2.36, + "learning_rate": 7.78932178932179e-06, + "loss": 0.055, + "step": 6420 + }, + { + "epoch": 2.36, + "learning_rate": 7.785714285714287e-06, + "loss": 0.0545, + "step": 6430 + }, + { + "epoch": 2.37, + "learning_rate": 7.782106782106784e-06, + "loss": 0.0532, + "step": 6440 + }, + { + "epoch": 2.37, + "learning_rate": 7.77849927849928e-06, + "loss": 0.0558, + "step": 6450 + }, + { + "epoch": 2.38, + "learning_rate": 7.774891774891776e-06, + "loss": 0.0538, + "step": 6460 + }, + { + "epoch": 2.38, + "learning_rate": 7.771284271284273e-06, + "loss": 0.0534, + "step": 6470 + }, + { + "epoch": 2.38, + "learning_rate": 7.767676767676768e-06, + "loss": 0.0516, + "step": 6480 + }, + { + "epoch": 2.39, + "learning_rate": 7.764069264069265e-06, + "loss": 0.0537, + "step": 6490 + }, + { + "epoch": 2.39, + "learning_rate": 7.760461760461762e-06, + "loss": 0.0556, + "step": 6500 + }, + { + "epoch": 2.39, + "eval_loss": 0.08697509765625, + "eval_runtime": 549.3253, + "eval_samples_per_second": 5.043, + "eval_steps_per_second": 0.04, + "eval_wer": 22.01321467098166, + "step": 6500 + }, + { + "epoch": 2.39, + "learning_rate": 7.756854256854257e-06, + "loss": 0.0535, + "step": 6510 + }, + { + "epoch": 2.4, + "learning_rate": 7.753246753246754e-06, + "loss": 0.0539, + "step": 6520 + }, + { + "epoch": 2.4, + "learning_rate": 7.74963924963925e-06, + "loss": 0.0531, + "step": 6530 + }, + { + "epoch": 2.4, + "learning_rate": 7.746031746031747e-06, + "loss": 0.0546, + "step": 6540 + }, + { + "epoch": 2.41, + "learning_rate": 7.742424242424244e-06, + "loss": 0.0552, + "step": 6550 + }, + { + "epoch": 2.41, + "learning_rate": 7.73881673881674e-06, + "loss": 0.0565, + "step": 6560 + }, + { + "epoch": 2.42, + "learning_rate": 7.735209235209236e-06, + "loss": 0.0551, + "step": 6570 + }, + { + "epoch": 2.42, + "learning_rate": 7.731601731601732e-06, + "loss": 0.0551, + "step": 6580 + }, + { + "epoch": 2.42, + "learning_rate": 7.727994227994229e-06, + "loss": 0.0534, + "step": 6590 + }, + { + "epoch": 2.43, + "learning_rate": 7.724386724386725e-06, + "loss": 0.0558, + "step": 6600 + }, + { + "epoch": 2.43, + "eval_loss": 0.0870361328125, + "eval_runtime": 256.8196, + "eval_samples_per_second": 10.786, + "eval_steps_per_second": 0.086, + "eval_wer": 21.672734627831716, + "step": 6600 + }, + { + "epoch": 2.43, + "learning_rate": 7.72077922077922e-06, + "loss": 0.0564, + "step": 6610 + }, + { + "epoch": 2.43, + "learning_rate": 7.717171717171717e-06, + "loss": 0.0572, + "step": 6620 + }, + { + "epoch": 2.44, + "learning_rate": 7.713564213564214e-06, + "loss": 0.0569, + "step": 6630 + }, + { + "epoch": 2.44, + "learning_rate": 7.709956709956711e-06, + "loss": 0.0538, + "step": 6640 + }, + { + "epoch": 2.44, + "learning_rate": 7.706349206349208e-06, + "loss": 0.0541, + "step": 6650 + }, + { + "epoch": 2.45, + "learning_rate": 7.702741702741703e-06, + "loss": 0.0542, + "step": 6660 + }, + { + "epoch": 2.45, + "learning_rate": 7.6991341991342e-06, + "loss": 0.0532, + "step": 6670 + }, + { + "epoch": 2.46, + "learning_rate": 7.695526695526695e-06, + "loss": 0.0552, + "step": 6680 + }, + { + "epoch": 2.46, + "learning_rate": 7.691919191919192e-06, + "loss": 0.0553, + "step": 6690 + }, + { + "epoch": 2.46, + "learning_rate": 7.68831168831169e-06, + "loss": 0.0565, + "step": 6700 + }, + { + "epoch": 2.46, + "eval_loss": 0.08868408203125, + "eval_runtime": 462.3577, + "eval_samples_per_second": 5.991, + "eval_steps_per_second": 0.048, + "eval_wer": 22.646979503775622, + "step": 6700 + }, + { + "epoch": 2.47, + "learning_rate": 7.684704184704186e-06, + "loss": 0.0555, + "step": 6710 + }, + { + "epoch": 2.47, + "learning_rate": 7.681096681096681e-06, + "loss": 0.0566, + "step": 6720 + }, + { + "epoch": 2.47, + "learning_rate": 7.677489177489178e-06, + "loss": 0.0541, + "step": 6730 + }, + { + "epoch": 2.48, + "learning_rate": 7.673881673881675e-06, + "loss": 0.055, + "step": 6740 + }, + { + "epoch": 2.48, + "learning_rate": 7.670274170274172e-06, + "loss": 0.055, + "step": 6750 + }, + { + "epoch": 2.49, + "learning_rate": 7.666666666666667e-06, + "loss": 0.0544, + "step": 6760 + }, + { + "epoch": 2.49, + "learning_rate": 7.663059163059164e-06, + "loss": 0.0553, + "step": 6770 + }, + { + "epoch": 2.49, + "learning_rate": 7.65945165945166e-06, + "loss": 0.0538, + "step": 6780 + }, + { + "epoch": 2.5, + "learning_rate": 7.655844155844156e-06, + "loss": 0.0567, + "step": 6790 + }, + { + "epoch": 2.5, + "learning_rate": 7.652236652236653e-06, + "loss": 0.0549, + "step": 6800 + }, + { + "epoch": 2.5, + "eval_loss": 0.08734130859375, + "eval_runtime": 650.8788, + "eval_samples_per_second": 4.256, + "eval_steps_per_second": 0.034, + "eval_wer": 21.794093851132686, + "step": 6800 + }, + { + "epoch": 2.5, + "learning_rate": 7.64862914862915e-06, + "loss": 0.0559, + "step": 6810 + }, + { + "epoch": 2.51, + "learning_rate": 7.645021645021645e-06, + "loss": 0.0557, + "step": 6820 + }, + { + "epoch": 2.51, + "learning_rate": 7.641414141414142e-06, + "loss": 0.0532, + "step": 6830 + }, + { + "epoch": 2.51, + "learning_rate": 7.637806637806639e-06, + "loss": 0.0537, + "step": 6840 + }, + { + "epoch": 2.52, + "learning_rate": 7.634199134199136e-06, + "loss": 0.0572, + "step": 6850 + }, + { + "epoch": 2.52, + "learning_rate": 7.630591630591631e-06, + "loss": 0.0559, + "step": 6860 + }, + { + "epoch": 2.53, + "learning_rate": 7.626984126984127e-06, + "loss": 0.0558, + "step": 6870 + }, + { + "epoch": 2.53, + "learning_rate": 7.623376623376624e-06, + "loss": 0.0549, + "step": 6880 + }, + { + "epoch": 2.53, + "learning_rate": 7.61976911976912e-06, + "loss": 0.0533, + "step": 6890 + }, + { + "epoch": 2.54, + "learning_rate": 7.616161616161617e-06, + "loss": 0.0559, + "step": 6900 + }, + { + "epoch": 2.54, + "eval_loss": 0.08636474609375, + "eval_runtime": 684.4605, + "eval_samples_per_second": 4.047, + "eval_steps_per_second": 0.032, + "eval_wer": 22.185140237324703, + "step": 6900 + }, + { + "epoch": 2.54, + "learning_rate": 7.612554112554114e-06, + "loss": 0.0555, + "step": 6910 + }, + { + "epoch": 2.54, + "learning_rate": 7.608946608946609e-06, + "loss": 0.0552, + "step": 6920 + }, + { + "epoch": 2.55, + "learning_rate": 7.605339105339106e-06, + "loss": 0.0545, + "step": 6930 + }, + { + "epoch": 2.55, + "learning_rate": 7.601731601731602e-06, + "loss": 0.0522, + "step": 6940 + }, + { + "epoch": 2.56, + "learning_rate": 7.598124098124099e-06, + "loss": 0.0545, + "step": 6950 + }, + { + "epoch": 2.56, + "learning_rate": 7.594516594516596e-06, + "loss": 0.0562, + "step": 6960 + }, + { + "epoch": 2.56, + "learning_rate": 7.590909090909091e-06, + "loss": 0.0544, + "step": 6970 + }, + { + "epoch": 2.57, + "learning_rate": 7.587301587301588e-06, + "loss": 0.0545, + "step": 6980 + }, + { + "epoch": 2.57, + "learning_rate": 7.583694083694084e-06, + "loss": 0.0544, + "step": 6990 + }, + { + "epoch": 2.57, + "learning_rate": 7.580086580086581e-06, + "loss": 0.0552, + "step": 7000 + }, + { + "epoch": 2.57, + "eval_loss": 0.0867919921875, + "eval_runtime": 359.998, + "eval_samples_per_second": 7.694, + "eval_steps_per_second": 0.061, + "eval_wer": 21.85477346278317, + "step": 7000 + }, + { + "epoch": 2.58, + "learning_rate": 7.576479076479078e-06, + "loss": 0.0548, + "step": 7010 + }, + { + "epoch": 2.58, + "learning_rate": 7.573593073593074e-06, + "loss": 0.0534, + "step": 7020 + }, + { + "epoch": 2.58, + "learning_rate": 7.56998556998557e-06, + "loss": 0.055, + "step": 7030 + }, + { + "epoch": 2.59, + "learning_rate": 7.566378066378067e-06, + "loss": 0.0547, + "step": 7040 + }, + { + "epoch": 2.59, + "learning_rate": 7.562770562770564e-06, + "loss": 0.0539, + "step": 7050 + }, + { + "epoch": 2.6, + "learning_rate": 7.559163059163059e-06, + "loss": 0.0554, + "step": 7060 + }, + { + "epoch": 2.6, + "learning_rate": 7.555555555555556e-06, + "loss": 0.0587, + "step": 7070 + }, + { + "epoch": 2.6, + "learning_rate": 7.551948051948052e-06, + "loss": 0.0554, + "step": 7080 + }, + { + "epoch": 2.61, + "learning_rate": 7.548340548340549e-06, + "loss": 0.0551, + "step": 7090 + }, + { + "epoch": 2.61, + "learning_rate": 7.544733044733046e-06, + "loss": 0.0546, + "step": 7100 + }, + { + "epoch": 2.61, + "eval_loss": 0.08642578125, + "eval_runtime": 281.6212, + "eval_samples_per_second": 9.836, + "eval_steps_per_second": 0.078, + "eval_wer": 21.794093851132686, + "step": 7100 + }, + { + "epoch": 2.61, + "learning_rate": 7.541125541125541e-06, + "loss": 0.0537, + "step": 7110 + }, + { + "epoch": 2.62, + "learning_rate": 7.537518037518038e-06, + "loss": 0.0562, + "step": 7120 + }, + { + "epoch": 2.62, + "learning_rate": 7.533910533910534e-06, + "loss": 0.0545, + "step": 7130 + }, + { + "epoch": 2.62, + "learning_rate": 7.530303030303031e-06, + "loss": 0.0555, + "step": 7140 + }, + { + "epoch": 2.63, + "learning_rate": 7.526695526695528e-06, + "loss": 0.0557, + "step": 7150 + }, + { + "epoch": 2.63, + "learning_rate": 7.523088023088023e-06, + "loss": 0.0525, + "step": 7160 + }, + { + "epoch": 2.64, + "learning_rate": 7.51948051948052e-06, + "loss": 0.0566, + "step": 7170 + }, + { + "epoch": 2.64, + "learning_rate": 7.515873015873016e-06, + "loss": 0.0545, + "step": 7180 + }, + { + "epoch": 2.64, + "learning_rate": 7.512265512265513e-06, + "loss": 0.0562, + "step": 7190 + }, + { + "epoch": 2.65, + "learning_rate": 7.50865800865801e-06, + "loss": 0.0549, + "step": 7200 + }, + { + "epoch": 2.65, + "eval_loss": 0.08612060546875, + "eval_runtime": 784.1899, + "eval_samples_per_second": 3.532, + "eval_steps_per_second": 0.028, + "eval_wer": 22.357065803667744, + "step": 7200 + }, + { + "epoch": 2.65, + "learning_rate": 7.505050505050505e-06, + "loss": 0.0548, + "step": 7210 + }, + { + "epoch": 2.65, + "learning_rate": 7.501443001443002e-06, + "loss": 0.056, + "step": 7220 + }, + { + "epoch": 2.66, + "learning_rate": 7.497835497835498e-06, + "loss": 0.0544, + "step": 7230 + }, + { + "epoch": 2.66, + "learning_rate": 7.494227994227995e-06, + "loss": 0.054, + "step": 7240 + }, + { + "epoch": 2.67, + "learning_rate": 7.490620490620492e-06, + "loss": 0.0558, + "step": 7250 + }, + { + "epoch": 2.67, + "learning_rate": 7.487012987012988e-06, + "loss": 0.0549, + "step": 7260 + }, + { + "epoch": 2.67, + "learning_rate": 7.483405483405484e-06, + "loss": 0.0555, + "step": 7270 + }, + { + "epoch": 2.68, + "learning_rate": 7.47979797979798e-06, + "loss": 0.0559, + "step": 7280 + }, + { + "epoch": 2.68, + "learning_rate": 7.476190476190477e-06, + "loss": 0.0574, + "step": 7290 + }, + { + "epoch": 2.68, + "learning_rate": 7.472582972582974e-06, + "loss": 0.0567, + "step": 7300 + }, + { + "epoch": 2.68, + "eval_loss": 0.0860595703125, + "eval_runtime": 523.0183, + "eval_samples_per_second": 5.296, + "eval_steps_per_second": 0.042, + "eval_wer": 21.709816612729234, + "step": 7300 + }, + { + "epoch": 2.69, + "learning_rate": 7.46897546897547e-06, + "loss": 0.0541, + "step": 7310 + }, + { + "epoch": 2.69, + "learning_rate": 7.465367965367966e-06, + "loss": 0.0547, + "step": 7320 + }, + { + "epoch": 2.69, + "learning_rate": 7.461760461760462e-06, + "loss": 0.0558, + "step": 7330 + }, + { + "epoch": 2.7, + "learning_rate": 7.458152958152959e-06, + "loss": 0.0536, + "step": 7340 + }, + { + "epoch": 2.7, + "learning_rate": 7.454545454545456e-06, + "loss": 0.0539, + "step": 7350 + }, + { + "epoch": 2.71, + "learning_rate": 7.450937950937952e-06, + "loss": 0.0583, + "step": 7360 + }, + { + "epoch": 2.71, + "learning_rate": 7.447330447330448e-06, + "loss": 0.0541, + "step": 7370 + }, + { + "epoch": 2.71, + "learning_rate": 7.443722943722944e-06, + "loss": 0.0545, + "step": 7380 + }, + { + "epoch": 2.72, + "learning_rate": 7.440115440115441e-06, + "loss": 0.0528, + "step": 7390 + }, + { + "epoch": 2.72, + "learning_rate": 7.4365079365079376e-06, + "loss": 0.0548, + "step": 7400 + }, + { + "epoch": 2.72, + "eval_loss": 0.0858154296875, + "eval_runtime": 282.1778, + "eval_samples_per_second": 9.817, + "eval_steps_per_second": 0.078, + "eval_wer": 21.83791801510248, + "step": 7400 + }, + { + "epoch": 2.72, + "learning_rate": 7.432900432900434e-06, + "loss": 0.0563, + "step": 7410 + }, + { + "epoch": 2.73, + "learning_rate": 7.42929292929293e-06, + "loss": 0.0534, + "step": 7420 + }, + { + "epoch": 2.73, + "learning_rate": 7.425685425685426e-06, + "loss": 0.0571, + "step": 7430 + }, + { + "epoch": 2.74, + "learning_rate": 7.422077922077923e-06, + "loss": 0.0548, + "step": 7440 + }, + { + "epoch": 2.74, + "learning_rate": 7.4184704184704195e-06, + "loss": 0.0545, + "step": 7450 + }, + { + "epoch": 2.74, + "learning_rate": 7.4148629148629155e-06, + "loss": 0.0542, + "step": 7460 + }, + { + "epoch": 2.75, + "learning_rate": 7.411255411255412e-06, + "loss": 0.0547, + "step": 7470 + }, + { + "epoch": 2.75, + "learning_rate": 7.407647907647908e-06, + "loss": 0.0567, + "step": 7480 + }, + { + "epoch": 2.75, + "learning_rate": 7.4040404040404045e-06, + "loss": 0.0563, + "step": 7490 + }, + { + "epoch": 2.76, + "learning_rate": 7.400432900432901e-06, + "loss": 0.055, + "step": 7500 + }, + { + "epoch": 2.76, + "eval_loss": 0.08563232421875, + "eval_runtime": 382.1657, + "eval_samples_per_second": 7.248, + "eval_steps_per_second": 0.058, + "eval_wer": 21.986245954692556, + "step": 7500 + }, + { + "epoch": 2.76, + "learning_rate": 7.3968253968253975e-06, + "loss": 0.0533, + "step": 7510 + }, + { + "epoch": 2.76, + "learning_rate": 7.3932178932178935e-06, + "loss": 0.0546, + "step": 7520 + }, + { + "epoch": 2.77, + "learning_rate": 7.3896103896103896e-06, + "loss": 0.0551, + "step": 7530 + }, + { + "epoch": 2.77, + "learning_rate": 7.3860028860028865e-06, + "loss": 0.0537, + "step": 7540 + }, + { + "epoch": 2.78, + "learning_rate": 7.382395382395383e-06, + "loss": 0.0547, + "step": 7550 + }, + { + "epoch": 2.78, + "learning_rate": 7.378787878787879e-06, + "loss": 0.0556, + "step": 7560 + }, + { + "epoch": 2.78, + "learning_rate": 7.375180375180376e-06, + "loss": 0.0547, + "step": 7570 + }, + { + "epoch": 2.79, + "learning_rate": 7.3715728715728715e-06, + "loss": 0.0539, + "step": 7580 + }, + { + "epoch": 2.79, + "learning_rate": 7.367965367965368e-06, + "loss": 0.0554, + "step": 7590 + }, + { + "epoch": 2.79, + "learning_rate": 7.364357864357865e-06, + "loss": 0.0541, + "step": 7600 + }, + { + "epoch": 2.79, + "eval_loss": 0.08575439453125, + "eval_runtime": 628.8874, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.035, + "eval_wer": 21.810949298813377, + "step": 7600 + }, + { + "epoch": 2.8, + "learning_rate": 7.360750360750361e-06, + "loss": 0.0561, + "step": 7610 + }, + { + "epoch": 2.8, + "learning_rate": 7.357142857142858e-06, + "loss": 0.0545, + "step": 7620 + }, + { + "epoch": 2.81, + "learning_rate": 7.353535353535353e-06, + "loss": 0.0544, + "step": 7630 + }, + { + "epoch": 2.81, + "learning_rate": 7.34992784992785e-06, + "loss": 0.0543, + "step": 7640 + }, + { + "epoch": 2.81, + "learning_rate": 7.346320346320347e-06, + "loss": 0.0549, + "step": 7650 + }, + { + "epoch": 2.82, + "learning_rate": 7.342712842712843e-06, + "loss": 0.0536, + "step": 7660 + }, + { + "epoch": 2.82, + "learning_rate": 7.33910533910534e-06, + "loss": 0.056, + "step": 7670 + }, + { + "epoch": 2.82, + "learning_rate": 7.335497835497835e-06, + "loss": 0.0545, + "step": 7680 + }, + { + "epoch": 2.83, + "learning_rate": 7.331890331890332e-06, + "loss": 0.054, + "step": 7690 + }, + { + "epoch": 2.83, + "learning_rate": 7.328282828282829e-06, + "loss": 0.0554, + "step": 7700 + }, + { + "epoch": 2.83, + "eval_loss": 0.08563232421875, + "eval_runtime": 906.4345, + "eval_samples_per_second": 3.056, + "eval_steps_per_second": 0.024, + "eval_wer": 21.85477346278317, + "step": 7700 + }, + { + "epoch": 2.83, + "learning_rate": 7.324675324675325e-06, + "loss": 0.0578, + "step": 7710 + }, + { + "epoch": 2.84, + "learning_rate": 7.321067821067822e-06, + "loss": 0.0541, + "step": 7720 + }, + { + "epoch": 2.84, + "learning_rate": 7.317460317460317e-06, + "loss": 0.0557, + "step": 7730 + }, + { + "epoch": 2.85, + "learning_rate": 7.313852813852814e-06, + "loss": 0.0536, + "step": 7740 + }, + { + "epoch": 2.85, + "learning_rate": 7.310245310245311e-06, + "loss": 0.0538, + "step": 7750 + }, + { + "epoch": 2.85, + "learning_rate": 7.306637806637807e-06, + "loss": 0.0533, + "step": 7760 + }, + { + "epoch": 2.86, + "learning_rate": 7.303030303030304e-06, + "loss": 0.0549, + "step": 7770 + }, + { + "epoch": 2.86, + "learning_rate": 7.299422799422799e-06, + "loss": 0.0543, + "step": 7780 + }, + { + "epoch": 2.86, + "learning_rate": 7.295815295815296e-06, + "loss": 0.0561, + "step": 7790 + }, + { + "epoch": 2.87, + "learning_rate": 7.292207792207793e-06, + "loss": 0.0534, + "step": 7800 + }, + { + "epoch": 2.87, + "eval_loss": 0.08599853515625, + "eval_runtime": 594.8444, + "eval_samples_per_second": 4.657, + "eval_steps_per_second": 0.037, + "eval_wer": 21.39630528586839, + "step": 7800 + }, + { + "epoch": 2.87, + "learning_rate": 7.288600288600289e-06, + "loss": 0.0573, + "step": 7810 + }, + { + "epoch": 2.88, + "learning_rate": 7.284992784992786e-06, + "loss": 0.0568, + "step": 7820 + }, + { + "epoch": 2.88, + "learning_rate": 7.281385281385281e-06, + "loss": 0.0554, + "step": 7830 + }, + { + "epoch": 2.88, + "learning_rate": 7.277777777777778e-06, + "loss": 0.0552, + "step": 7840 + }, + { + "epoch": 2.89, + "learning_rate": 7.274170274170275e-06, + "loss": 0.0531, + "step": 7850 + }, + { + "epoch": 2.89, + "learning_rate": 7.270562770562771e-06, + "loss": 0.0539, + "step": 7860 + }, + { + "epoch": 2.89, + "learning_rate": 7.266955266955268e-06, + "loss": 0.0555, + "step": 7870 + }, + { + "epoch": 2.9, + "learning_rate": 7.263347763347765e-06, + "loss": 0.0538, + "step": 7880 + }, + { + "epoch": 2.9, + "learning_rate": 7.25974025974026e-06, + "loss": 0.0552, + "step": 7890 + }, + { + "epoch": 2.9, + "learning_rate": 7.256132756132757e-06, + "loss": 0.0544, + "step": 7900 + }, + { + "epoch": 2.9, + "eval_loss": 0.08575439453125, + "eval_runtime": 276.18, + "eval_samples_per_second": 10.03, + "eval_steps_per_second": 0.08, + "eval_wer": 21.419902912621357, + "step": 7900 + }, + { + "epoch": 2.91, + "learning_rate": 7.252525252525253e-06, + "loss": 0.0526, + "step": 7910 + }, + { + "epoch": 2.91, + "learning_rate": 7.24891774891775e-06, + "loss": 0.0554, + "step": 7920 + }, + { + "epoch": 2.92, + "learning_rate": 7.245310245310247e-06, + "loss": 0.0576, + "step": 7930 + }, + { + "epoch": 2.92, + "learning_rate": 7.241702741702742e-06, + "loss": 0.0552, + "step": 7940 + }, + { + "epoch": 2.92, + "learning_rate": 7.238095238095239e-06, + "loss": 0.0557, + "step": 7950 + }, + { + "epoch": 2.93, + "learning_rate": 7.234487734487735e-06, + "loss": 0.0517, + "step": 7960 + }, + { + "epoch": 2.93, + "learning_rate": 7.230880230880232e-06, + "loss": 0.0537, + "step": 7970 + }, + { + "epoch": 2.93, + "learning_rate": 7.227272727272729e-06, + "loss": 0.0576, + "step": 7980 + }, + { + "epoch": 2.94, + "learning_rate": 7.223665223665224e-06, + "loss": 0.0547, + "step": 7990 + }, + { + "epoch": 2.94, + "learning_rate": 7.220057720057721e-06, + "loss": 0.0556, + "step": 8000 + }, + { + "epoch": 2.94, + "eval_loss": 0.085205078125, + "eval_runtime": 353.3226, + "eval_samples_per_second": 7.84, + "eval_steps_per_second": 0.062, + "eval_wer": 21.64576591154261, + "step": 8000 + }, + { + "epoch": 2.94, + "learning_rate": 7.216450216450217e-06, + "loss": 0.0547, + "step": 8010 + }, + { + "epoch": 2.95, + "learning_rate": 7.213564213564213e-06, + "loss": 0.0531, + "step": 8020 + }, + { + "epoch": 2.95, + "learning_rate": 7.20995670995671e-06, + "loss": 0.0545, + "step": 8030 + }, + { + "epoch": 2.96, + "learning_rate": 7.206349206349207e-06, + "loss": 0.0566, + "step": 8040 + }, + { + "epoch": 2.96, + "learning_rate": 7.202741702741703e-06, + "loss": 0.0556, + "step": 8050 + }, + { + "epoch": 2.96, + "learning_rate": 7.1991341991342e-06, + "loss": 0.0538, + "step": 8060 + }, + { + "epoch": 2.97, + "learning_rate": 7.195526695526695e-06, + "loss": 0.0523, + "step": 8070 + }, + { + "epoch": 2.97, + "learning_rate": 7.191919191919192e-06, + "loss": 0.0556, + "step": 8080 + }, + { + "epoch": 2.97, + "learning_rate": 7.188311688311689e-06, + "loss": 0.0569, + "step": 8090 + }, + { + "epoch": 2.98, + "learning_rate": 7.184704184704185e-06, + "loss": 0.0544, + "step": 8100 + }, + { + "epoch": 2.98, + "eval_loss": 0.08526611328125, + "eval_runtime": 716.9844, + "eval_samples_per_second": 3.863, + "eval_steps_per_second": 0.031, + "eval_wer": 21.3524811218986, + "step": 8100 + } + ], + "max_steps": 28000, + "num_train_epochs": 11, + "total_flos": 2.1159834348111245e+21, + "trial_name": null, + "trial_params": null +}