{ "best_metric": 37.96787634887283, "best_model_checkpoint": "./whisper-tiny-ro/checkpoint-5000", "epoch": 17.73049645390071, "eval_steps": 1000, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.044326241134751775, "grad_norm": 28.326566696166992, "learning_rate": 2.2e-07, "loss": 1.8024, "step": 25 }, { "epoch": 0.08865248226950355, "grad_norm": 22.18955421447754, "learning_rate": 4.7000000000000005e-07, "loss": 1.7794, "step": 50 }, { "epoch": 0.13297872340425532, "grad_norm": 21.889328002929688, "learning_rate": 7.2e-07, "loss": 1.63, "step": 75 }, { "epoch": 0.1773049645390071, "grad_norm": 19.02008819580078, "learning_rate": 9.7e-07, "loss": 1.5239, "step": 100 }, { "epoch": 0.22163120567375885, "grad_norm": 16.531150817871094, "learning_rate": 1.2200000000000002e-06, "loss": 1.3903, "step": 125 }, { "epoch": 0.26595744680851063, "grad_norm": 16.245573043823242, "learning_rate": 1.4700000000000001e-06, "loss": 1.2517, "step": 150 }, { "epoch": 0.3102836879432624, "grad_norm": 17.484891891479492, "learning_rate": 1.72e-06, "loss": 1.1449, "step": 175 }, { "epoch": 0.3546099290780142, "grad_norm": 13.317365646362305, "learning_rate": 1.97e-06, "loss": 1.0231, "step": 200 }, { "epoch": 0.39893617021276595, "grad_norm": 16.296846389770508, "learning_rate": 2.2200000000000003e-06, "loss": 1.0033, "step": 225 }, { "epoch": 0.4432624113475177, "grad_norm": 14.858762741088867, "learning_rate": 2.47e-06, "loss": 0.9183, "step": 250 }, { "epoch": 0.4875886524822695, "grad_norm": 15.132709503173828, "learning_rate": 2.7200000000000002e-06, "loss": 0.9142, "step": 275 }, { "epoch": 0.5319148936170213, "grad_norm": 15.102398872375488, "learning_rate": 2.97e-06, "loss": 0.8795, "step": 300 }, { "epoch": 0.5762411347517731, "grad_norm": 15.617897033691406, "learning_rate": 3.2200000000000005e-06, "loss": 0.863, "step": 325 }, { "epoch": 0.6205673758865248, "grad_norm": 15.325774192810059, "learning_rate": 3.4700000000000002e-06, "loss": 0.8094, "step": 350 }, { "epoch": 0.6648936170212766, "grad_norm": 13.950435638427734, "learning_rate": 3.7200000000000004e-06, "loss": 0.8471, "step": 375 }, { "epoch": 0.7092198581560284, "grad_norm": 17.703575134277344, "learning_rate": 3.97e-06, "loss": 0.83, "step": 400 }, { "epoch": 0.7535460992907801, "grad_norm": 13.622574806213379, "learning_rate": 4.22e-06, "loss": 0.7605, "step": 425 }, { "epoch": 0.7978723404255319, "grad_norm": 13.574337005615234, "learning_rate": 4.47e-06, "loss": 0.7464, "step": 450 }, { "epoch": 0.8421985815602837, "grad_norm": 12.981876373291016, "learning_rate": 4.7200000000000005e-06, "loss": 0.767, "step": 475 }, { "epoch": 0.8865248226950354, "grad_norm": 15.00900936126709, "learning_rate": 4.970000000000001e-06, "loss": 0.7617, "step": 500 }, { "epoch": 0.9308510638297872, "grad_norm": 16.31970977783203, "learning_rate": 4.988421052631579e-06, "loss": 0.6962, "step": 525 }, { "epoch": 0.975177304964539, "grad_norm": 10.05798625946045, "learning_rate": 4.9752631578947375e-06, "loss": 0.6676, "step": 550 }, { "epoch": 1.0195035460992907, "grad_norm": 12.080132484436035, "learning_rate": 4.962105263157895e-06, "loss": 0.6351, "step": 575 }, { "epoch": 1.0638297872340425, "grad_norm": 12.87156867980957, "learning_rate": 4.948947368421053e-06, "loss": 0.6078, "step": 600 }, { "epoch": 1.1081560283687943, "grad_norm": 11.931558609008789, "learning_rate": 4.935789473684211e-06, "loss": 0.5755, "step": 625 }, { "epoch": 1.1524822695035462, "grad_norm": 12.827286720275879, "learning_rate": 4.922631578947369e-06, "loss": 0.5679, "step": 650 }, { "epoch": 1.196808510638298, "grad_norm": 12.644274711608887, "learning_rate": 4.909473684210527e-06, "loss": 0.6122, "step": 675 }, { "epoch": 1.2411347517730495, "grad_norm": 12.461518287658691, "learning_rate": 4.896315789473685e-06, "loss": 0.5522, "step": 700 }, { "epoch": 1.2854609929078014, "grad_norm": 14.115540504455566, "learning_rate": 4.8831578947368425e-06, "loss": 0.5764, "step": 725 }, { "epoch": 1.3297872340425532, "grad_norm": 13.1589994430542, "learning_rate": 4.87e-06, "loss": 0.5421, "step": 750 }, { "epoch": 1.374113475177305, "grad_norm": 12.696803092956543, "learning_rate": 4.856842105263158e-06, "loss": 0.5616, "step": 775 }, { "epoch": 1.4184397163120568, "grad_norm": 14.510184288024902, "learning_rate": 4.843684210526316e-06, "loss": 0.5725, "step": 800 }, { "epoch": 1.4627659574468086, "grad_norm": 11.529364585876465, "learning_rate": 4.830526315789474e-06, "loss": 0.5627, "step": 825 }, { "epoch": 1.5070921985815602, "grad_norm": 12.159563064575195, "learning_rate": 4.8173684210526324e-06, "loss": 0.5452, "step": 850 }, { "epoch": 1.5514184397163122, "grad_norm": 10.232617378234863, "learning_rate": 4.80421052631579e-06, "loss": 0.5192, "step": 875 }, { "epoch": 1.5957446808510638, "grad_norm": 10.81043529510498, "learning_rate": 4.791052631578948e-06, "loss": 0.5151, "step": 900 }, { "epoch": 1.6400709219858156, "grad_norm": 14.97497272491455, "learning_rate": 4.777894736842106e-06, "loss": 0.5263, "step": 925 }, { "epoch": 1.6843971631205674, "grad_norm": 14.701244354248047, "learning_rate": 4.764736842105264e-06, "loss": 0.524, "step": 950 }, { "epoch": 1.728723404255319, "grad_norm": 13.469274520874023, "learning_rate": 4.7515789473684216e-06, "loss": 0.5084, "step": 975 }, { "epoch": 1.773049645390071, "grad_norm": 11.251127243041992, "learning_rate": 4.738421052631579e-06, "loss": 0.5444, "step": 1000 }, { "epoch": 1.773049645390071, "eval_loss": 0.604159414768219, "eval_runtime": 587.0739, "eval_samples_per_second": 6.636, "eval_steps_per_second": 0.83, "eval_wer": 48.87994586701805, "step": 1000 }, { "epoch": 1.8173758865248226, "grad_norm": 13.032508850097656, "learning_rate": 4.725263157894737e-06, "loss": 0.5314, "step": 1025 }, { "epoch": 1.8617021276595744, "grad_norm": 12.22535228729248, "learning_rate": 4.712105263157895e-06, "loss": 0.4761, "step": 1050 }, { "epoch": 1.9060283687943262, "grad_norm": 15.19352912902832, "learning_rate": 4.698947368421053e-06, "loss": 0.4742, "step": 1075 }, { "epoch": 1.950354609929078, "grad_norm": 10.927416801452637, "learning_rate": 4.685789473684211e-06, "loss": 0.5012, "step": 1100 }, { "epoch": 1.9946808510638299, "grad_norm": 10.421246528625488, "learning_rate": 4.672631578947369e-06, "loss": 0.5225, "step": 1125 }, { "epoch": 2.0390070921985815, "grad_norm": 9.38261604309082, "learning_rate": 4.6594736842105265e-06, "loss": 0.4173, "step": 1150 }, { "epoch": 2.0833333333333335, "grad_norm": 10.129746437072754, "learning_rate": 4.646315789473684e-06, "loss": 0.4153, "step": 1175 }, { "epoch": 2.127659574468085, "grad_norm": 11.529908180236816, "learning_rate": 4.633157894736842e-06, "loss": 0.3981, "step": 1200 }, { "epoch": 2.171985815602837, "grad_norm": 11.076881408691406, "learning_rate": 4.620000000000001e-06, "loss": 0.4152, "step": 1225 }, { "epoch": 2.2163120567375887, "grad_norm": 9.994644165039062, "learning_rate": 4.606842105263158e-06, "loss": 0.3926, "step": 1250 }, { "epoch": 2.2606382978723403, "grad_norm": 10.582009315490723, "learning_rate": 4.5936842105263165e-06, "loss": 0.4378, "step": 1275 }, { "epoch": 2.3049645390070923, "grad_norm": 9.771284103393555, "learning_rate": 4.580526315789474e-06, "loss": 0.3882, "step": 1300 }, { "epoch": 2.349290780141844, "grad_norm": 11.286993026733398, "learning_rate": 4.567368421052632e-06, "loss": 0.3686, "step": 1325 }, { "epoch": 2.393617021276596, "grad_norm": 13.008705139160156, "learning_rate": 4.55421052631579e-06, "loss": 0.3899, "step": 1350 }, { "epoch": 2.4379432624113475, "grad_norm": 10.24173355102539, "learning_rate": 4.541052631578948e-06, "loss": 0.4028, "step": 1375 }, { "epoch": 2.482269503546099, "grad_norm": 10.5569486618042, "learning_rate": 4.527894736842106e-06, "loss": 0.3634, "step": 1400 }, { "epoch": 2.526595744680851, "grad_norm": 10.904850006103516, "learning_rate": 4.514736842105263e-06, "loss": 0.4181, "step": 1425 }, { "epoch": 2.5709219858156027, "grad_norm": 10.719099998474121, "learning_rate": 4.501578947368421e-06, "loss": 0.3803, "step": 1450 }, { "epoch": 2.6152482269503547, "grad_norm": 10.873899459838867, "learning_rate": 4.488421052631579e-06, "loss": 0.3983, "step": 1475 }, { "epoch": 2.6595744680851063, "grad_norm": 8.574480056762695, "learning_rate": 4.475263157894737e-06, "loss": 0.3894, "step": 1500 }, { "epoch": 2.703900709219858, "grad_norm": 10.148545265197754, "learning_rate": 4.462105263157895e-06, "loss": 0.3672, "step": 1525 }, { "epoch": 2.74822695035461, "grad_norm": 11.479018211364746, "learning_rate": 4.448947368421053e-06, "loss": 0.3824, "step": 1550 }, { "epoch": 2.7925531914893615, "grad_norm": 10.652966499328613, "learning_rate": 4.435789473684211e-06, "loss": 0.3849, "step": 1575 }, { "epoch": 2.8368794326241136, "grad_norm": 10.057666778564453, "learning_rate": 4.422631578947369e-06, "loss": 0.3702, "step": 1600 }, { "epoch": 2.881205673758865, "grad_norm": 9.985100746154785, "learning_rate": 4.409473684210527e-06, "loss": 0.4052, "step": 1625 }, { "epoch": 2.925531914893617, "grad_norm": 9.165911674499512, "learning_rate": 4.396315789473685e-06, "loss": 0.3697, "step": 1650 }, { "epoch": 2.969858156028369, "grad_norm": 10.057464599609375, "learning_rate": 4.383157894736842e-06, "loss": 0.3663, "step": 1675 }, { "epoch": 3.0141843971631204, "grad_norm": 10.039346694946289, "learning_rate": 4.3700000000000005e-06, "loss": 0.3443, "step": 1700 }, { "epoch": 3.0585106382978724, "grad_norm": 9.51621150970459, "learning_rate": 4.356842105263158e-06, "loss": 0.2917, "step": 1725 }, { "epoch": 3.102836879432624, "grad_norm": 10.015137672424316, "learning_rate": 4.343684210526316e-06, "loss": 0.3045, "step": 1750 }, { "epoch": 3.147163120567376, "grad_norm": 10.268891334533691, "learning_rate": 4.330526315789474e-06, "loss": 0.2821, "step": 1775 }, { "epoch": 3.1914893617021276, "grad_norm": 9.120494842529297, "learning_rate": 4.317368421052632e-06, "loss": 0.2956, "step": 1800 }, { "epoch": 3.2358156028368796, "grad_norm": 10.420275688171387, "learning_rate": 4.30421052631579e-06, "loss": 0.3086, "step": 1825 }, { "epoch": 3.280141843971631, "grad_norm": 9.254629135131836, "learning_rate": 4.291052631578947e-06, "loss": 0.3083, "step": 1850 }, { "epoch": 3.324468085106383, "grad_norm": 9.865363121032715, "learning_rate": 4.277894736842106e-06, "loss": 0.3071, "step": 1875 }, { "epoch": 3.368794326241135, "grad_norm": 11.790287017822266, "learning_rate": 4.264736842105264e-06, "loss": 0.3014, "step": 1900 }, { "epoch": 3.4131205673758864, "grad_norm": 10.183505058288574, "learning_rate": 4.251578947368421e-06, "loss": 0.293, "step": 1925 }, { "epoch": 3.4574468085106385, "grad_norm": 9.69072151184082, "learning_rate": 4.23842105263158e-06, "loss": 0.2978, "step": 1950 }, { "epoch": 3.50177304964539, "grad_norm": 10.96455192565918, "learning_rate": 4.225263157894737e-06, "loss": 0.311, "step": 1975 }, { "epoch": 3.546099290780142, "grad_norm": 11.342255592346191, "learning_rate": 4.212105263157895e-06, "loss": 0.3042, "step": 2000 }, { "epoch": 3.546099290780142, "eval_loss": 0.5099755525588989, "eval_runtime": 581.1553, "eval_samples_per_second": 6.704, "eval_steps_per_second": 0.838, "eval_wer": 41.17311870080843, "step": 2000 }, { "epoch": 3.5904255319148937, "grad_norm": 10.30219554901123, "learning_rate": 4.198947368421053e-06, "loss": 0.3039, "step": 2025 }, { "epoch": 3.6347517730496453, "grad_norm": 9.825774192810059, "learning_rate": 4.185789473684211e-06, "loss": 0.292, "step": 2050 }, { "epoch": 3.6790780141843973, "grad_norm": 9.612593650817871, "learning_rate": 4.172631578947369e-06, "loss": 0.3133, "step": 2075 }, { "epoch": 3.723404255319149, "grad_norm": 9.864873886108398, "learning_rate": 4.159473684210526e-06, "loss": 0.2786, "step": 2100 }, { "epoch": 3.7677304964539005, "grad_norm": 9.14278507232666, "learning_rate": 4.1463157894736845e-06, "loss": 0.2992, "step": 2125 }, { "epoch": 3.8120567375886525, "grad_norm": 10.981643676757812, "learning_rate": 4.133157894736842e-06, "loss": 0.2987, "step": 2150 }, { "epoch": 3.8563829787234045, "grad_norm": 10.71380615234375, "learning_rate": 4.12e-06, "loss": 0.3088, "step": 2175 }, { "epoch": 3.900709219858156, "grad_norm": 11.365142822265625, "learning_rate": 4.106842105263158e-06, "loss": 0.302, "step": 2200 }, { "epoch": 3.9450354609929077, "grad_norm": 11.918941497802734, "learning_rate": 4.093684210526316e-06, "loss": 0.31, "step": 2225 }, { "epoch": 3.9893617021276597, "grad_norm": 10.240377426147461, "learning_rate": 4.0805263157894745e-06, "loss": 0.3048, "step": 2250 }, { "epoch": 4.033687943262412, "grad_norm": 7.562131404876709, "learning_rate": 4.067368421052632e-06, "loss": 0.2673, "step": 2275 }, { "epoch": 4.078014184397163, "grad_norm": 7.741388320922852, "learning_rate": 4.05421052631579e-06, "loss": 0.2364, "step": 2300 }, { "epoch": 4.122340425531915, "grad_norm": 8.797900199890137, "learning_rate": 4.041052631578948e-06, "loss": 0.2524, "step": 2325 }, { "epoch": 4.166666666666667, "grad_norm": 9.751541137695312, "learning_rate": 4.027894736842105e-06, "loss": 0.2565, "step": 2350 }, { "epoch": 4.210992907801418, "grad_norm": 7.299990653991699, "learning_rate": 4.014736842105264e-06, "loss": 0.2405, "step": 2375 }, { "epoch": 4.25531914893617, "grad_norm": 8.407694816589355, "learning_rate": 4.001578947368421e-06, "loss": 0.226, "step": 2400 }, { "epoch": 4.299645390070922, "grad_norm": 8.874945640563965, "learning_rate": 3.9884210526315795e-06, "loss": 0.2366, "step": 2425 }, { "epoch": 4.343971631205674, "grad_norm": 8.172481536865234, "learning_rate": 3.975263157894737e-06, "loss": 0.2325, "step": 2450 }, { "epoch": 4.388297872340425, "grad_norm": 8.563679695129395, "learning_rate": 3.962105263157895e-06, "loss": 0.2266, "step": 2475 }, { "epoch": 4.432624113475177, "grad_norm": 7.646442413330078, "learning_rate": 3.948947368421053e-06, "loss": 0.2477, "step": 2500 }, { "epoch": 4.476950354609929, "grad_norm": 8.14061164855957, "learning_rate": 3.93578947368421e-06, "loss": 0.2372, "step": 2525 }, { "epoch": 4.5212765957446805, "grad_norm": 6.697457790374756, "learning_rate": 3.9226315789473694e-06, "loss": 0.2312, "step": 2550 }, { "epoch": 4.565602836879433, "grad_norm": 9.178577423095703, "learning_rate": 3.909473684210527e-06, "loss": 0.239, "step": 2575 }, { "epoch": 4.609929078014185, "grad_norm": 7.986817836761475, "learning_rate": 3.896315789473684e-06, "loss": 0.2266, "step": 2600 }, { "epoch": 4.654255319148936, "grad_norm": 9.992223739624023, "learning_rate": 3.883157894736843e-06, "loss": 0.2422, "step": 2625 }, { "epoch": 4.698581560283688, "grad_norm": 8.259024620056152, "learning_rate": 3.87e-06, "loss": 0.2382, "step": 2650 }, { "epoch": 4.74290780141844, "grad_norm": 8.913894653320312, "learning_rate": 3.8568421052631585e-06, "loss": 0.2404, "step": 2675 }, { "epoch": 4.787234042553192, "grad_norm": 8.490303993225098, "learning_rate": 3.843684210526316e-06, "loss": 0.251, "step": 2700 }, { "epoch": 4.831560283687943, "grad_norm": 8.170136451721191, "learning_rate": 3.830526315789474e-06, "loss": 0.2195, "step": 2725 }, { "epoch": 4.875886524822695, "grad_norm": 7.071116924285889, "learning_rate": 3.817368421052632e-06, "loss": 0.2237, "step": 2750 }, { "epoch": 4.920212765957447, "grad_norm": 7.995920181274414, "learning_rate": 3.8042105263157898e-06, "loss": 0.2377, "step": 2775 }, { "epoch": 4.964539007092198, "grad_norm": 8.777873039245605, "learning_rate": 3.7910526315789477e-06, "loss": 0.2211, "step": 2800 }, { "epoch": 5.00886524822695, "grad_norm": 7.402454376220703, "learning_rate": 3.7778947368421056e-06, "loss": 0.2341, "step": 2825 }, { "epoch": 5.053191489361702, "grad_norm": 7.576868057250977, "learning_rate": 3.764736842105263e-06, "loss": 0.2006, "step": 2850 }, { "epoch": 5.097517730496454, "grad_norm": 6.49124002456665, "learning_rate": 3.751578947368421e-06, "loss": 0.1835, "step": 2875 }, { "epoch": 5.141843971631205, "grad_norm": 5.912723064422607, "learning_rate": 3.7384210526315793e-06, "loss": 0.1962, "step": 2900 }, { "epoch": 5.1861702127659575, "grad_norm": 7.608515739440918, "learning_rate": 3.7252631578947372e-06, "loss": 0.18, "step": 2925 }, { "epoch": 5.2304964539007095, "grad_norm": 8.52009105682373, "learning_rate": 3.712105263157895e-06, "loss": 0.184, "step": 2950 }, { "epoch": 5.274822695035461, "grad_norm": 9.251614570617676, "learning_rate": 3.698947368421053e-06, "loss": 0.2098, "step": 2975 }, { "epoch": 5.319148936170213, "grad_norm": 8.74129581451416, "learning_rate": 3.685789473684211e-06, "loss": 0.1817, "step": 3000 }, { "epoch": 5.319148936170213, "eval_loss": 0.4850045144557953, "eval_runtime": 588.1549, "eval_samples_per_second": 6.624, "eval_steps_per_second": 0.828, "eval_wer": 40.756437195056805, "step": 3000 }, { "epoch": 5.363475177304965, "grad_norm": 9.787571907043457, "learning_rate": 3.672631578947369e-06, "loss": 0.2017, "step": 3025 }, { "epoch": 5.407801418439716, "grad_norm": 9.773175239562988, "learning_rate": 3.6594736842105268e-06, "loss": 0.1818, "step": 3050 }, { "epoch": 5.452127659574468, "grad_norm": 9.025221824645996, "learning_rate": 3.6463157894736847e-06, "loss": 0.1926, "step": 3075 }, { "epoch": 5.49645390070922, "grad_norm": 7.631556987762451, "learning_rate": 3.633157894736842e-06, "loss": 0.2017, "step": 3100 }, { "epoch": 5.540780141843972, "grad_norm": 8.103202819824219, "learning_rate": 3.62e-06, "loss": 0.1808, "step": 3125 }, { "epoch": 5.585106382978723, "grad_norm": 6.022019386291504, "learning_rate": 3.606842105263158e-06, "loss": 0.2017, "step": 3150 }, { "epoch": 5.629432624113475, "grad_norm": 6.922440528869629, "learning_rate": 3.593684210526316e-06, "loss": 0.2009, "step": 3175 }, { "epoch": 5.673758865248227, "grad_norm": 8.611794471740723, "learning_rate": 3.580526315789474e-06, "loss": 0.1731, "step": 3200 }, { "epoch": 5.718085106382979, "grad_norm": 7.4870147705078125, "learning_rate": 3.567368421052632e-06, "loss": 0.1797, "step": 3225 }, { "epoch": 5.76241134751773, "grad_norm": 8.757158279418945, "learning_rate": 3.55421052631579e-06, "loss": 0.1847, "step": 3250 }, { "epoch": 5.806737588652482, "grad_norm": 8.361138343811035, "learning_rate": 3.541052631578948e-06, "loss": 0.1738, "step": 3275 }, { "epoch": 5.851063829787234, "grad_norm": 8.07181453704834, "learning_rate": 3.527894736842106e-06, "loss": 0.2007, "step": 3300 }, { "epoch": 5.8953900709219855, "grad_norm": 7.998460292816162, "learning_rate": 3.5147368421052638e-06, "loss": 0.1848, "step": 3325 }, { "epoch": 5.939716312056738, "grad_norm": 7.463223934173584, "learning_rate": 3.5015789473684213e-06, "loss": 0.1894, "step": 3350 }, { "epoch": 5.98404255319149, "grad_norm": 8.993099212646484, "learning_rate": 3.488421052631579e-06, "loss": 0.1898, "step": 3375 }, { "epoch": 6.028368794326241, "grad_norm": 6.37155294418335, "learning_rate": 3.475263157894737e-06, "loss": 0.1772, "step": 3400 }, { "epoch": 6.072695035460993, "grad_norm": 6.82436990737915, "learning_rate": 3.462105263157895e-06, "loss": 0.1555, "step": 3425 }, { "epoch": 6.117021276595745, "grad_norm": 7.0470428466796875, "learning_rate": 3.448947368421053e-06, "loss": 0.1517, "step": 3450 }, { "epoch": 6.161347517730497, "grad_norm": 6.5624494552612305, "learning_rate": 3.435789473684211e-06, "loss": 0.149, "step": 3475 }, { "epoch": 6.205673758865248, "grad_norm": 7.491029262542725, "learning_rate": 3.4226315789473687e-06, "loss": 0.1599, "step": 3500 }, { "epoch": 6.25, "grad_norm": 6.855647563934326, "learning_rate": 3.409473684210526e-06, "loss": 0.1587, "step": 3525 }, { "epoch": 6.294326241134752, "grad_norm": 8.074361801147461, "learning_rate": 3.396315789473684e-06, "loss": 0.1674, "step": 3550 }, { "epoch": 6.338652482269503, "grad_norm": 5.963619709014893, "learning_rate": 3.3831578947368424e-06, "loss": 0.1499, "step": 3575 }, { "epoch": 6.382978723404255, "grad_norm": 8.021512985229492, "learning_rate": 3.3700000000000003e-06, "loss": 0.1488, "step": 3600 }, { "epoch": 6.427304964539007, "grad_norm": 5.919581413269043, "learning_rate": 3.3568421052631583e-06, "loss": 0.1498, "step": 3625 }, { "epoch": 6.471631205673759, "grad_norm": 6.950247287750244, "learning_rate": 3.343684210526316e-06, "loss": 0.1648, "step": 3650 }, { "epoch": 6.51595744680851, "grad_norm": 6.72702693939209, "learning_rate": 3.330526315789474e-06, "loss": 0.146, "step": 3675 }, { "epoch": 6.560283687943262, "grad_norm": 7.681860446929932, "learning_rate": 3.317368421052632e-06, "loss": 0.1542, "step": 3700 }, { "epoch": 6.6046099290780145, "grad_norm": 7.239710330963135, "learning_rate": 3.30421052631579e-06, "loss": 0.1499, "step": 3725 }, { "epoch": 6.648936170212766, "grad_norm": 7.310706615447998, "learning_rate": 3.291052631578948e-06, "loss": 0.1527, "step": 3750 }, { "epoch": 6.693262411347518, "grad_norm": 7.060523509979248, "learning_rate": 3.2778947368421053e-06, "loss": 0.1537, "step": 3775 }, { "epoch": 6.73758865248227, "grad_norm": 8.571366310119629, "learning_rate": 3.264736842105263e-06, "loss": 0.1598, "step": 3800 }, { "epoch": 6.781914893617021, "grad_norm": 6.046979904174805, "learning_rate": 3.251578947368421e-06, "loss": 0.1416, "step": 3825 }, { "epoch": 6.826241134751773, "grad_norm": 8.620864868164062, "learning_rate": 3.238421052631579e-06, "loss": 0.1454, "step": 3850 }, { "epoch": 6.870567375886525, "grad_norm": 8.330490112304688, "learning_rate": 3.225263157894737e-06, "loss": 0.1542, "step": 3875 }, { "epoch": 6.914893617021277, "grad_norm": 10.611557006835938, "learning_rate": 3.212105263157895e-06, "loss": 0.1676, "step": 3900 }, { "epoch": 6.959219858156028, "grad_norm": 6.815483570098877, "learning_rate": 3.198947368421053e-06, "loss": 0.1405, "step": 3925 }, { "epoch": 7.00354609929078, "grad_norm": 5.8308634757995605, "learning_rate": 3.185789473684211e-06, "loss": 0.1479, "step": 3950 }, { "epoch": 7.047872340425532, "grad_norm": 6.529901027679443, "learning_rate": 3.172631578947369e-06, "loss": 0.12, "step": 3975 }, { "epoch": 7.092198581560283, "grad_norm": 6.525743007659912, "learning_rate": 3.159473684210527e-06, "loss": 0.1214, "step": 4000 }, { "epoch": 7.092198581560283, "eval_loss": 0.48074454069137573, "eval_runtime": 589.8236, "eval_samples_per_second": 6.605, "eval_steps_per_second": 0.826, "eval_wer": 41.810605790804516, "step": 4000 }, { "epoch": 7.136524822695035, "grad_norm": 5.498377799987793, "learning_rate": 3.1463157894736844e-06, "loss": 0.1295, "step": 4025 }, { "epoch": 7.180851063829787, "grad_norm": 7.085293292999268, "learning_rate": 3.1331578947368423e-06, "loss": 0.1224, "step": 4050 }, { "epoch": 7.225177304964539, "grad_norm": 4.779361724853516, "learning_rate": 3.12e-06, "loss": 0.1245, "step": 4075 }, { "epoch": 7.2695035460992905, "grad_norm": 6.457351207733154, "learning_rate": 3.106842105263158e-06, "loss": 0.1192, "step": 4100 }, { "epoch": 7.3138297872340425, "grad_norm": 4.801368236541748, "learning_rate": 3.093684210526316e-06, "loss": 0.1322, "step": 4125 }, { "epoch": 7.358156028368795, "grad_norm": 6.449742794036865, "learning_rate": 3.080526315789474e-06, "loss": 0.1218, "step": 4150 }, { "epoch": 7.402482269503546, "grad_norm": 5.5234456062316895, "learning_rate": 3.067368421052632e-06, "loss": 0.1293, "step": 4175 }, { "epoch": 7.446808510638298, "grad_norm": 8.489788055419922, "learning_rate": 3.0542105263157893e-06, "loss": 0.1181, "step": 4200 }, { "epoch": 7.49113475177305, "grad_norm": 6.528730869293213, "learning_rate": 3.0410526315789472e-06, "loss": 0.1263, "step": 4225 }, { "epoch": 7.535460992907802, "grad_norm": 6.973687648773193, "learning_rate": 3.027894736842106e-06, "loss": 0.1179, "step": 4250 }, { "epoch": 7.579787234042553, "grad_norm": 6.633789539337158, "learning_rate": 3.0147368421052635e-06, "loss": 0.1314, "step": 4275 }, { "epoch": 7.624113475177305, "grad_norm": 7.518368721008301, "learning_rate": 3.0015789473684214e-06, "loss": 0.1305, "step": 4300 }, { "epoch": 7.668439716312057, "grad_norm": 5.739889144897461, "learning_rate": 2.9884210526315793e-06, "loss": 0.1295, "step": 4325 }, { "epoch": 7.712765957446808, "grad_norm": 6.737969398498535, "learning_rate": 2.975263157894737e-06, "loss": 0.1256, "step": 4350 }, { "epoch": 7.75709219858156, "grad_norm": 5.012901782989502, "learning_rate": 2.962105263157895e-06, "loss": 0.1312, "step": 4375 }, { "epoch": 7.801418439716312, "grad_norm": 5.5256853103637695, "learning_rate": 2.948947368421053e-06, "loss": 0.1242, "step": 4400 }, { "epoch": 7.845744680851064, "grad_norm": 6.629995346069336, "learning_rate": 2.935789473684211e-06, "loss": 0.125, "step": 4425 }, { "epoch": 7.890070921985815, "grad_norm": 5.227272987365723, "learning_rate": 2.9226315789473684e-06, "loss": 0.1191, "step": 4450 }, { "epoch": 7.934397163120567, "grad_norm": 5.111964225769043, "learning_rate": 2.9094736842105263e-06, "loss": 0.1145, "step": 4475 }, { "epoch": 7.9787234042553195, "grad_norm": 5.537423610687256, "learning_rate": 2.8963157894736842e-06, "loss": 0.1278, "step": 4500 }, { "epoch": 8.02304964539007, "grad_norm": 4.478297710418701, "learning_rate": 2.883157894736842e-06, "loss": 0.1155, "step": 4525 }, { "epoch": 8.067375886524824, "grad_norm": 5.642357349395752, "learning_rate": 2.87e-06, "loss": 0.1102, "step": 4550 }, { "epoch": 8.111702127659575, "grad_norm": 5.228881359100342, "learning_rate": 2.856842105263158e-06, "loss": 0.1009, "step": 4575 }, { "epoch": 8.156028368794326, "grad_norm": 5.18090295791626, "learning_rate": 2.8436842105263163e-06, "loss": 0.0992, "step": 4600 }, { "epoch": 8.200354609929079, "grad_norm": 6.664114475250244, "learning_rate": 2.830526315789474e-06, "loss": 0.1092, "step": 4625 }, { "epoch": 8.24468085106383, "grad_norm": 5.435600280761719, "learning_rate": 2.817368421052632e-06, "loss": 0.1011, "step": 4650 }, { "epoch": 8.289007092198581, "grad_norm": 5.778509140014648, "learning_rate": 2.80421052631579e-06, "loss": 0.1025, "step": 4675 }, { "epoch": 8.333333333333334, "grad_norm": 5.977304458618164, "learning_rate": 2.7910526315789475e-06, "loss": 0.1021, "step": 4700 }, { "epoch": 8.377659574468085, "grad_norm": 5.274112701416016, "learning_rate": 2.7778947368421054e-06, "loss": 0.0935, "step": 4725 }, { "epoch": 8.421985815602836, "grad_norm": 5.418082237243652, "learning_rate": 2.7647368421052633e-06, "loss": 0.0965, "step": 4750 }, { "epoch": 8.46631205673759, "grad_norm": 5.248587131500244, "learning_rate": 2.7515789473684212e-06, "loss": 0.1014, "step": 4775 }, { "epoch": 8.51063829787234, "grad_norm": 5.329669952392578, "learning_rate": 2.738421052631579e-06, "loss": 0.1052, "step": 4800 }, { "epoch": 8.554964539007091, "grad_norm": 5.469305038452148, "learning_rate": 2.725263157894737e-06, "loss": 0.0988, "step": 4825 }, { "epoch": 8.599290780141844, "grad_norm": 4.839619159698486, "learning_rate": 2.712105263157895e-06, "loss": 0.1038, "step": 4850 }, { "epoch": 8.643617021276595, "grad_norm": 5.6988420486450195, "learning_rate": 2.6989473684210524e-06, "loss": 0.1005, "step": 4875 }, { "epoch": 8.687943262411348, "grad_norm": 6.122032165527344, "learning_rate": 2.6857894736842104e-06, "loss": 0.1071, "step": 4900 }, { "epoch": 8.7322695035461, "grad_norm": 5.00734806060791, "learning_rate": 2.672631578947369e-06, "loss": 0.1009, "step": 4925 }, { "epoch": 8.77659574468085, "grad_norm": 6.199928283691406, "learning_rate": 2.6594736842105266e-06, "loss": 0.1041, "step": 4950 }, { "epoch": 8.820921985815604, "grad_norm": 6.134685516357422, "learning_rate": 2.6463157894736845e-06, "loss": 0.1044, "step": 4975 }, { "epoch": 8.865248226950355, "grad_norm": 5.176562786102295, "learning_rate": 2.6331578947368424e-06, "loss": 0.1066, "step": 5000 }, { "epoch": 8.865248226950355, "eval_loss": 0.4846879839897156, "eval_runtime": 580.4904, "eval_samples_per_second": 6.712, "eval_steps_per_second": 0.839, "eval_wer": 37.96787634887283, "step": 5000 }, { "epoch": 8.909574468085106, "grad_norm": 5.835010051727295, "learning_rate": 2.6200000000000003e-06, "loss": 0.1001, "step": 5025 }, { "epoch": 8.953900709219859, "grad_norm": 6.407568454742432, "learning_rate": 2.6068421052631582e-06, "loss": 0.0965, "step": 5050 }, { "epoch": 8.99822695035461, "grad_norm": 6.197821617126465, "learning_rate": 2.593684210526316e-06, "loss": 0.1048, "step": 5075 }, { "epoch": 9.042553191489361, "grad_norm": 4.808340072631836, "learning_rate": 2.580526315789474e-06, "loss": 0.085, "step": 5100 }, { "epoch": 9.086879432624114, "grad_norm": 4.098535060882568, "learning_rate": 2.5673684210526315e-06, "loss": 0.0815, "step": 5125 }, { "epoch": 9.131205673758865, "grad_norm": 4.34876012802124, "learning_rate": 2.5542105263157894e-06, "loss": 0.0884, "step": 5150 }, { "epoch": 9.175531914893616, "grad_norm": 4.318136692047119, "learning_rate": 2.5410526315789474e-06, "loss": 0.0826, "step": 5175 }, { "epoch": 9.21985815602837, "grad_norm": 5.460968017578125, "learning_rate": 2.5278947368421053e-06, "loss": 0.0799, "step": 5200 }, { "epoch": 9.26418439716312, "grad_norm": 4.200242042541504, "learning_rate": 2.514736842105263e-06, "loss": 0.0835, "step": 5225 }, { "epoch": 9.308510638297872, "grad_norm": 5.984395503997803, "learning_rate": 2.501578947368421e-06, "loss": 0.0881, "step": 5250 }, { "epoch": 9.352836879432624, "grad_norm": 4.829773426055908, "learning_rate": 2.488421052631579e-06, "loss": 0.0786, "step": 5275 }, { "epoch": 9.397163120567376, "grad_norm": 3.996610403060913, "learning_rate": 2.475263157894737e-06, "loss": 0.0865, "step": 5300 }, { "epoch": 9.441489361702128, "grad_norm": 6.336328029632568, "learning_rate": 2.462105263157895e-06, "loss": 0.0802, "step": 5325 }, { "epoch": 9.48581560283688, "grad_norm": 5.054424285888672, "learning_rate": 2.448947368421053e-06, "loss": 0.0928, "step": 5350 }, { "epoch": 9.53014184397163, "grad_norm": 6.595405101776123, "learning_rate": 2.4357894736842106e-06, "loss": 0.0858, "step": 5375 }, { "epoch": 9.574468085106384, "grad_norm": 5.797497272491455, "learning_rate": 2.4226315789473685e-06, "loss": 0.0846, "step": 5400 }, { "epoch": 9.618794326241135, "grad_norm": 5.1372551918029785, "learning_rate": 2.4094736842105265e-06, "loss": 0.0789, "step": 5425 }, { "epoch": 9.663120567375886, "grad_norm": 6.663181304931641, "learning_rate": 2.3963157894736844e-06, "loss": 0.0977, "step": 5450 }, { "epoch": 9.707446808510639, "grad_norm": 4.910397529602051, "learning_rate": 2.3831578947368423e-06, "loss": 0.087, "step": 5475 }, { "epoch": 9.75177304964539, "grad_norm": 5.87327241897583, "learning_rate": 2.37e-06, "loss": 0.076, "step": 5500 }, { "epoch": 9.796099290780141, "grad_norm": 4.994716167449951, "learning_rate": 2.356842105263158e-06, "loss": 0.083, "step": 5525 }, { "epoch": 9.840425531914894, "grad_norm": 5.601754665374756, "learning_rate": 2.343684210526316e-06, "loss": 0.0819, "step": 5550 }, { "epoch": 9.884751773049645, "grad_norm": 5.767611980438232, "learning_rate": 2.330526315789474e-06, "loss": 0.0831, "step": 5575 }, { "epoch": 9.929078014184396, "grad_norm": 6.679659366607666, "learning_rate": 2.317368421052632e-06, "loss": 0.084, "step": 5600 }, { "epoch": 9.97340425531915, "grad_norm": 3.7785329818725586, "learning_rate": 2.3042105263157897e-06, "loss": 0.0894, "step": 5625 }, { "epoch": 10.0177304964539, "grad_norm": 4.858386516571045, "learning_rate": 2.2910526315789476e-06, "loss": 0.0817, "step": 5650 }, { "epoch": 10.062056737588652, "grad_norm": 3.9162485599517822, "learning_rate": 2.277894736842105e-06, "loss": 0.0669, "step": 5675 }, { "epoch": 10.106382978723405, "grad_norm": 5.168406963348389, "learning_rate": 2.2647368421052635e-06, "loss": 0.0677, "step": 5700 }, { "epoch": 10.150709219858156, "grad_norm": 3.850172758102417, "learning_rate": 2.2515789473684214e-06, "loss": 0.0687, "step": 5725 }, { "epoch": 10.195035460992909, "grad_norm": 3.854781150817871, "learning_rate": 2.2384210526315793e-06, "loss": 0.0734, "step": 5750 }, { "epoch": 10.23936170212766, "grad_norm": 3.807837724685669, "learning_rate": 2.225263157894737e-06, "loss": 0.0654, "step": 5775 }, { "epoch": 10.28368794326241, "grad_norm": 6.461479187011719, "learning_rate": 2.2121052631578947e-06, "loss": 0.0692, "step": 5800 }, { "epoch": 10.328014184397164, "grad_norm": 5.051649570465088, "learning_rate": 2.1989473684210526e-06, "loss": 0.0705, "step": 5825 }, { "epoch": 10.372340425531915, "grad_norm": 4.43517541885376, "learning_rate": 2.1857894736842105e-06, "loss": 0.0738, "step": 5850 }, { "epoch": 10.416666666666666, "grad_norm": 3.5894205570220947, "learning_rate": 2.172631578947369e-06, "loss": 0.0699, "step": 5875 }, { "epoch": 10.460992907801419, "grad_norm": 4.5283203125, "learning_rate": 2.1594736842105267e-06, "loss": 0.0688, "step": 5900 }, { "epoch": 10.50531914893617, "grad_norm": 3.9678380489349365, "learning_rate": 2.1463157894736842e-06, "loss": 0.0678, "step": 5925 }, { "epoch": 10.549645390070921, "grad_norm": 6.319568157196045, "learning_rate": 2.133157894736842e-06, "loss": 0.0716, "step": 5950 }, { "epoch": 10.593971631205674, "grad_norm": 4.849029541015625, "learning_rate": 2.12e-06, "loss": 0.0746, "step": 5975 }, { "epoch": 10.638297872340425, "grad_norm": 3.9395298957824707, "learning_rate": 2.106842105263158e-06, "loss": 0.0673, "step": 6000 }, { "epoch": 10.638297872340425, "eval_loss": 0.4972631335258484, "eval_runtime": 583.012, "eval_samples_per_second": 6.683, "eval_steps_per_second": 0.835, "eval_wer": 39.709391360091175, "step": 6000 }, { "epoch": 10.682624113475176, "grad_norm": 4.806758880615234, "learning_rate": 2.0936842105263163e-06, "loss": 0.069, "step": 6025 }, { "epoch": 10.72695035460993, "grad_norm": 5.572425842285156, "learning_rate": 2.0805263157894738e-06, "loss": 0.0699, "step": 6050 }, { "epoch": 10.77127659574468, "grad_norm": 4.8004302978515625, "learning_rate": 2.0673684210526317e-06, "loss": 0.0712, "step": 6075 }, { "epoch": 10.815602836879432, "grad_norm": 4.476444721221924, "learning_rate": 2.0547368421052633e-06, "loss": 0.0746, "step": 6100 }, { "epoch": 10.859929078014185, "grad_norm": 4.727671146392822, "learning_rate": 2.0415789473684213e-06, "loss": 0.073, "step": 6125 }, { "epoch": 10.904255319148936, "grad_norm": 4.828220367431641, "learning_rate": 2.028421052631579e-06, "loss": 0.0698, "step": 6150 }, { "epoch": 10.948581560283689, "grad_norm": 5.050329685211182, "learning_rate": 2.015263157894737e-06, "loss": 0.0705, "step": 6175 }, { "epoch": 10.99290780141844, "grad_norm": 4.282689094543457, "learning_rate": 2.002105263157895e-06, "loss": 0.0718, "step": 6200 }, { "epoch": 11.037234042553191, "grad_norm": 4.424275875091553, "learning_rate": 1.988947368421053e-06, "loss": 0.0636, "step": 6225 }, { "epoch": 11.081560283687944, "grad_norm": 3.237255573272705, "learning_rate": 1.975789473684211e-06, "loss": 0.0544, "step": 6250 }, { "epoch": 11.125886524822695, "grad_norm": 3.363708972930908, "learning_rate": 1.9626315789473683e-06, "loss": 0.0582, "step": 6275 }, { "epoch": 11.170212765957446, "grad_norm": 4.743597030639648, "learning_rate": 1.949473684210526e-06, "loss": 0.0567, "step": 6300 }, { "epoch": 11.214539007092199, "grad_norm": 4.212203502655029, "learning_rate": 1.9363157894736845e-06, "loss": 0.0643, "step": 6325 }, { "epoch": 11.25886524822695, "grad_norm": 3.580488443374634, "learning_rate": 1.9231578947368424e-06, "loss": 0.057, "step": 6350 }, { "epoch": 11.303191489361701, "grad_norm": 3.409921646118164, "learning_rate": 1.9100000000000003e-06, "loss": 0.0567, "step": 6375 }, { "epoch": 11.347517730496454, "grad_norm": 3.3070523738861084, "learning_rate": 1.896842105263158e-06, "loss": 0.0562, "step": 6400 }, { "epoch": 11.391843971631205, "grad_norm": 4.054013252258301, "learning_rate": 1.883684210526316e-06, "loss": 0.0645, "step": 6425 }, { "epoch": 11.436170212765958, "grad_norm": 3.5053253173828125, "learning_rate": 1.8705263157894737e-06, "loss": 0.0632, "step": 6450 }, { "epoch": 11.48049645390071, "grad_norm": 3.654541492462158, "learning_rate": 1.8573684210526316e-06, "loss": 0.0586, "step": 6475 }, { "epoch": 11.52482269503546, "grad_norm": 4.121072769165039, "learning_rate": 1.8442105263157897e-06, "loss": 0.061, "step": 6500 }, { "epoch": 11.569148936170214, "grad_norm": 4.159468173980713, "learning_rate": 1.8310526315789476e-06, "loss": 0.0611, "step": 6525 }, { "epoch": 11.613475177304965, "grad_norm": 4.2946672439575195, "learning_rate": 1.8178947368421055e-06, "loss": 0.0609, "step": 6550 }, { "epoch": 11.657801418439716, "grad_norm": 3.857961416244507, "learning_rate": 1.8047368421052632e-06, "loss": 0.0613, "step": 6575 }, { "epoch": 11.702127659574469, "grad_norm": 3.1902592182159424, "learning_rate": 1.7915789473684211e-06, "loss": 0.0607, "step": 6600 }, { "epoch": 11.74645390070922, "grad_norm": 5.016479015350342, "learning_rate": 1.778421052631579e-06, "loss": 0.0586, "step": 6625 }, { "epoch": 11.790780141843971, "grad_norm": 4.0299601554870605, "learning_rate": 1.7652631578947371e-06, "loss": 0.059, "step": 6650 }, { "epoch": 11.835106382978724, "grad_norm": 4.405561447143555, "learning_rate": 1.752105263157895e-06, "loss": 0.0623, "step": 6675 }, { "epoch": 11.879432624113475, "grad_norm": 3.684788465499878, "learning_rate": 1.7389473684210527e-06, "loss": 0.0614, "step": 6700 }, { "epoch": 11.923758865248226, "grad_norm": 4.335251331329346, "learning_rate": 1.7257894736842107e-06, "loss": 0.0581, "step": 6725 }, { "epoch": 11.96808510638298, "grad_norm": 4.67876672744751, "learning_rate": 1.7126315789473686e-06, "loss": 0.0569, "step": 6750 }, { "epoch": 12.01241134751773, "grad_norm": 5.14631462097168, "learning_rate": 1.6994736842105265e-06, "loss": 0.0548, "step": 6775 }, { "epoch": 12.056737588652481, "grad_norm": 3.651719331741333, "learning_rate": 1.6863157894736842e-06, "loss": 0.0514, "step": 6800 }, { "epoch": 12.101063829787234, "grad_norm": 3.455418109893799, "learning_rate": 1.6731578947368423e-06, "loss": 0.0534, "step": 6825 }, { "epoch": 12.145390070921986, "grad_norm": 3.9486734867095947, "learning_rate": 1.6600000000000002e-06, "loss": 0.0495, "step": 6850 }, { "epoch": 12.189716312056738, "grad_norm": 2.9897313117980957, "learning_rate": 1.6468421052631581e-06, "loss": 0.051, "step": 6875 }, { "epoch": 12.23404255319149, "grad_norm": 4.208747863769531, "learning_rate": 1.633684210526316e-06, "loss": 0.0486, "step": 6900 }, { "epoch": 12.27836879432624, "grad_norm": 3.3527841567993164, "learning_rate": 1.6205263157894737e-06, "loss": 0.0524, "step": 6925 }, { "epoch": 12.322695035460994, "grad_norm": 3.6749916076660156, "learning_rate": 1.6073684210526316e-06, "loss": 0.0577, "step": 6950 }, { "epoch": 12.367021276595745, "grad_norm": 3.266439437866211, "learning_rate": 1.5942105263157895e-06, "loss": 0.0491, "step": 6975 }, { "epoch": 12.411347517730496, "grad_norm": 3.9574387073516846, "learning_rate": 1.5810526315789477e-06, "loss": 0.0537, "step": 7000 }, { "epoch": 12.411347517730496, "eval_loss": 0.5095303654670715, "eval_runtime": 586.2939, "eval_samples_per_second": 6.645, "eval_steps_per_second": 0.831, "eval_wer": 41.29064425371274, "step": 7000 }, { "epoch": 12.455673758865249, "grad_norm": 4.798894882202148, "learning_rate": 1.5678947368421056e-06, "loss": 0.0533, "step": 7025 }, { "epoch": 12.5, "grad_norm": 3.0210700035095215, "learning_rate": 1.5547368421052633e-06, "loss": 0.0483, "step": 7050 }, { "epoch": 12.544326241134751, "grad_norm": 4.632834434509277, "learning_rate": 1.5415789473684212e-06, "loss": 0.0509, "step": 7075 }, { "epoch": 12.588652482269504, "grad_norm": 4.397753715515137, "learning_rate": 1.528421052631579e-06, "loss": 0.0515, "step": 7100 }, { "epoch": 12.632978723404255, "grad_norm": 2.9680283069610596, "learning_rate": 1.5152631578947368e-06, "loss": 0.0475, "step": 7125 }, { "epoch": 12.677304964539006, "grad_norm": 3.9441206455230713, "learning_rate": 1.5021052631578947e-06, "loss": 0.055, "step": 7150 }, { "epoch": 12.72163120567376, "grad_norm": 3.183037519454956, "learning_rate": 1.4889473684210528e-06, "loss": 0.0522, "step": 7175 }, { "epoch": 12.76595744680851, "grad_norm": 3.4659500122070312, "learning_rate": 1.4757894736842107e-06, "loss": 0.0477, "step": 7200 }, { "epoch": 12.810283687943262, "grad_norm": 3.1689703464508057, "learning_rate": 1.4626315789473686e-06, "loss": 0.0512, "step": 7225 }, { "epoch": 12.854609929078014, "grad_norm": 5.009653568267822, "learning_rate": 1.4494736842105263e-06, "loss": 0.0487, "step": 7250 }, { "epoch": 12.898936170212766, "grad_norm": 3.3407084941864014, "learning_rate": 1.4363157894736842e-06, "loss": 0.0489, "step": 7275 }, { "epoch": 12.943262411347519, "grad_norm": 4.140749454498291, "learning_rate": 1.4231578947368421e-06, "loss": 0.047, "step": 7300 }, { "epoch": 12.98758865248227, "grad_norm": 3.0766468048095703, "learning_rate": 1.41e-06, "loss": 0.052, "step": 7325 }, { "epoch": 13.03191489361702, "grad_norm": 3.058790683746338, "learning_rate": 1.3968421052631582e-06, "loss": 0.0445, "step": 7350 }, { "epoch": 13.076241134751774, "grad_norm": 2.4315567016601562, "learning_rate": 1.3836842105263159e-06, "loss": 0.0447, "step": 7375 }, { "epoch": 13.120567375886525, "grad_norm": 2.692753314971924, "learning_rate": 1.3705263157894738e-06, "loss": 0.0398, "step": 7400 }, { "epoch": 13.164893617021276, "grad_norm": 3.2242069244384766, "learning_rate": 1.3573684210526317e-06, "loss": 0.0425, "step": 7425 }, { "epoch": 13.209219858156029, "grad_norm": 3.640981674194336, "learning_rate": 1.3442105263157896e-06, "loss": 0.043, "step": 7450 }, { "epoch": 13.25354609929078, "grad_norm": 2.966660261154175, "learning_rate": 1.3310526315789473e-06, "loss": 0.0422, "step": 7475 }, { "epoch": 13.297872340425531, "grad_norm": 2.7896602153778076, "learning_rate": 1.3178947368421054e-06, "loss": 0.044, "step": 7500 }, { "epoch": 13.342198581560284, "grad_norm": 3.0664894580841064, "learning_rate": 1.3047368421052633e-06, "loss": 0.0432, "step": 7525 }, { "epoch": 13.386524822695035, "grad_norm": 2.7736198902130127, "learning_rate": 1.2915789473684212e-06, "loss": 0.0416, "step": 7550 }, { "epoch": 13.430851063829786, "grad_norm": 3.5016989707946777, "learning_rate": 1.2784210526315791e-06, "loss": 0.0481, "step": 7575 }, { "epoch": 13.47517730496454, "grad_norm": 3.0631349086761475, "learning_rate": 1.2652631578947368e-06, "loss": 0.0441, "step": 7600 }, { "epoch": 13.51950354609929, "grad_norm": 3.7912166118621826, "learning_rate": 1.2521052631578948e-06, "loss": 0.047, "step": 7625 }, { "epoch": 13.563829787234042, "grad_norm": 3.7112090587615967, "learning_rate": 1.2389473684210527e-06, "loss": 0.0442, "step": 7650 }, { "epoch": 13.608156028368795, "grad_norm": 4.523186206817627, "learning_rate": 1.2257894736842106e-06, "loss": 0.0458, "step": 7675 }, { "epoch": 13.652482269503546, "grad_norm": 3.0612213611602783, "learning_rate": 1.2126315789473685e-06, "loss": 0.0461, "step": 7700 }, { "epoch": 13.696808510638299, "grad_norm": 2.902688980102539, "learning_rate": 1.1994736842105264e-06, "loss": 0.0446, "step": 7725 }, { "epoch": 13.74113475177305, "grad_norm": 2.876624822616577, "learning_rate": 1.1863157894736843e-06, "loss": 0.0433, "step": 7750 }, { "epoch": 13.785460992907801, "grad_norm": 3.696685314178467, "learning_rate": 1.1731578947368422e-06, "loss": 0.0464, "step": 7775 }, { "epoch": 13.829787234042554, "grad_norm": 3.2120165824890137, "learning_rate": 1.1600000000000001e-06, "loss": 0.0444, "step": 7800 }, { "epoch": 13.874113475177305, "grad_norm": 3.735292434692383, "learning_rate": 1.146842105263158e-06, "loss": 0.0417, "step": 7825 }, { "epoch": 13.918439716312056, "grad_norm": 3.8104641437530518, "learning_rate": 1.133684210526316e-06, "loss": 0.045, "step": 7850 }, { "epoch": 13.962765957446809, "grad_norm": 3.321183919906616, "learning_rate": 1.1205263157894736e-06, "loss": 0.046, "step": 7875 }, { "epoch": 14.00709219858156, "grad_norm": 3.0110223293304443, "learning_rate": 1.1073684210526318e-06, "loss": 0.0452, "step": 7900 }, { "epoch": 14.051418439716311, "grad_norm": 2.797724485397339, "learning_rate": 1.0942105263157895e-06, "loss": 0.0384, "step": 7925 }, { "epoch": 14.095744680851064, "grad_norm": 2.8559882640838623, "learning_rate": 1.0810526315789474e-06, "loss": 0.039, "step": 7950 }, { "epoch": 14.140070921985815, "grad_norm": 3.0210611820220947, "learning_rate": 1.0678947368421055e-06, "loss": 0.0359, "step": 7975 }, { "epoch": 14.184397163120567, "grad_norm": 3.4683313369750977, "learning_rate": 1.0547368421052632e-06, "loss": 0.0393, "step": 8000 }, { "epoch": 14.184397163120567, "eval_loss": 0.5176098942756653, "eval_runtime": 588.4879, "eval_samples_per_second": 6.62, "eval_steps_per_second": 0.828, "eval_wer": 41.376117383097686, "step": 8000 }, { "epoch": 14.22872340425532, "grad_norm": 3.2651422023773193, "learning_rate": 1.041578947368421e-06, "loss": 0.039, "step": 8025 }, { "epoch": 14.27304964539007, "grad_norm": 3.2940969467163086, "learning_rate": 1.028421052631579e-06, "loss": 0.0393, "step": 8050 }, { "epoch": 14.317375886524824, "grad_norm": 3.090914011001587, "learning_rate": 1.015263157894737e-06, "loss": 0.039, "step": 8075 }, { "epoch": 14.361702127659575, "grad_norm": 3.464435577392578, "learning_rate": 1.0021052631578948e-06, "loss": 0.0395, "step": 8100 }, { "epoch": 14.406028368794326, "grad_norm": 2.341763734817505, "learning_rate": 9.889473684210527e-07, "loss": 0.0392, "step": 8125 }, { "epoch": 14.450354609929079, "grad_norm": 2.7853071689605713, "learning_rate": 9.757894736842106e-07, "loss": 0.0399, "step": 8150 }, { "epoch": 14.49468085106383, "grad_norm": 3.5469071865081787, "learning_rate": 9.626315789473685e-07, "loss": 0.0404, "step": 8175 }, { "epoch": 14.539007092198581, "grad_norm": 3.5632236003875732, "learning_rate": 9.494736842105263e-07, "loss": 0.0409, "step": 8200 }, { "epoch": 14.583333333333334, "grad_norm": 2.9529061317443848, "learning_rate": 9.363157894736844e-07, "loss": 0.0411, "step": 8225 }, { "epoch": 14.627659574468085, "grad_norm": 2.856344223022461, "learning_rate": 9.231578947368422e-07, "loss": 0.0384, "step": 8250 }, { "epoch": 14.671985815602836, "grad_norm": 3.559720039367676, "learning_rate": 9.100000000000001e-07, "loss": 0.039, "step": 8275 }, { "epoch": 14.71631205673759, "grad_norm": 3.8412675857543945, "learning_rate": 8.968421052631579e-07, "loss": 0.0361, "step": 8300 }, { "epoch": 14.76063829787234, "grad_norm": 3.8791191577911377, "learning_rate": 8.836842105263159e-07, "loss": 0.0357, "step": 8325 }, { "epoch": 14.804964539007091, "grad_norm": 4.187379837036133, "learning_rate": 8.705263157894737e-07, "loss": 0.0428, "step": 8350 }, { "epoch": 14.849290780141844, "grad_norm": 4.432793617248535, "learning_rate": 8.573684210526316e-07, "loss": 0.0375, "step": 8375 }, { "epoch": 14.893617021276595, "grad_norm": 3.823516368865967, "learning_rate": 8.442105263157896e-07, "loss": 0.041, "step": 8400 }, { "epoch": 14.937943262411348, "grad_norm": 3.2699050903320312, "learning_rate": 8.310526315789474e-07, "loss": 0.0399, "step": 8425 }, { "epoch": 14.9822695035461, "grad_norm": 2.571930408477783, "learning_rate": 8.178947368421053e-07, "loss": 0.0405, "step": 8450 }, { "epoch": 15.02659574468085, "grad_norm": 2.3387529850006104, "learning_rate": 8.047368421052632e-07, "loss": 0.0367, "step": 8475 }, { "epoch": 15.070921985815604, "grad_norm": 3.301847219467163, "learning_rate": 7.915789473684212e-07, "loss": 0.0347, "step": 8500 }, { "epoch": 15.115248226950355, "grad_norm": 3.649311065673828, "learning_rate": 7.78421052631579e-07, "loss": 0.0368, "step": 8525 }, { "epoch": 15.159574468085106, "grad_norm": 2.7183964252471924, "learning_rate": 7.652631578947369e-07, "loss": 0.0357, "step": 8550 }, { "epoch": 15.203900709219859, "grad_norm": 4.076670169830322, "learning_rate": 7.521052631578949e-07, "loss": 0.0345, "step": 8575 }, { "epoch": 15.24822695035461, "grad_norm": 2.5695323944091797, "learning_rate": 7.389473684210527e-07, "loss": 0.0332, "step": 8600 }, { "epoch": 15.292553191489361, "grad_norm": 3.0496561527252197, "learning_rate": 7.257894736842106e-07, "loss": 0.0352, "step": 8625 }, { "epoch": 15.336879432624114, "grad_norm": 2.6376793384552, "learning_rate": 7.126315789473685e-07, "loss": 0.0381, "step": 8650 }, { "epoch": 15.381205673758865, "grad_norm": 2.617739200592041, "learning_rate": 6.994736842105264e-07, "loss": 0.0382, "step": 8675 }, { "epoch": 15.425531914893616, "grad_norm": 1.8115471601486206, "learning_rate": 6.863157894736842e-07, "loss": 0.0358, "step": 8700 }, { "epoch": 15.46985815602837, "grad_norm": 2.169344902038574, "learning_rate": 6.731578947368421e-07, "loss": 0.0336, "step": 8725 }, { "epoch": 15.51418439716312, "grad_norm": 2.6424083709716797, "learning_rate": 6.6e-07, "loss": 0.0358, "step": 8750 }, { "epoch": 15.558510638297872, "grad_norm": 3.1048264503479004, "learning_rate": 6.468421052631579e-07, "loss": 0.0351, "step": 8775 }, { "epoch": 15.602836879432624, "grad_norm": 2.3110456466674805, "learning_rate": 6.336842105263157e-07, "loss": 0.0338, "step": 8800 }, { "epoch": 15.647163120567376, "grad_norm": 3.729184150695801, "learning_rate": 6.205263157894738e-07, "loss": 0.0355, "step": 8825 }, { "epoch": 15.691489361702128, "grad_norm": 2.4826672077178955, "learning_rate": 6.073684210526317e-07, "loss": 0.0314, "step": 8850 }, { "epoch": 15.73581560283688, "grad_norm": 2.23388934135437, "learning_rate": 5.942105263157895e-07, "loss": 0.0369, "step": 8875 }, { "epoch": 15.78014184397163, "grad_norm": 2.6081252098083496, "learning_rate": 5.810526315789474e-07, "loss": 0.0335, "step": 8900 }, { "epoch": 15.824468085106384, "grad_norm": 2.6870105266571045, "learning_rate": 5.678947368421053e-07, "loss": 0.0355, "step": 8925 }, { "epoch": 15.868794326241135, "grad_norm": 2.663280725479126, "learning_rate": 5.547368421052632e-07, "loss": 0.0386, "step": 8950 }, { "epoch": 15.913120567375886, "grad_norm": 2.3229103088378906, "learning_rate": 5.415789473684211e-07, "loss": 0.0343, "step": 8975 }, { "epoch": 15.957446808510639, "grad_norm": 2.0813558101654053, "learning_rate": 5.284210526315789e-07, "loss": 0.0354, "step": 9000 }, { "epoch": 15.957446808510639, "eval_loss": 0.525884747505188, "eval_runtime": 589.6497, "eval_samples_per_second": 6.607, "eval_steps_per_second": 0.826, "eval_wer": 42.95380889632822, "step": 9000 }, { "epoch": 16.00177304964539, "grad_norm": 2.0741195678710938, "learning_rate": 5.152631578947369e-07, "loss": 0.0357, "step": 9025 }, { "epoch": 16.04609929078014, "grad_norm": 2.2553582191467285, "learning_rate": 5.021052631578948e-07, "loss": 0.0348, "step": 9050 }, { "epoch": 16.090425531914892, "grad_norm": 2.8926563262939453, "learning_rate": 4.889473684210526e-07, "loss": 0.0329, "step": 9075 }, { "epoch": 16.134751773049647, "grad_norm": 2.0093374252319336, "learning_rate": 4.757894736842106e-07, "loss": 0.0316, "step": 9100 }, { "epoch": 16.179078014184398, "grad_norm": 2.607196569442749, "learning_rate": 4.626315789473684e-07, "loss": 0.0343, "step": 9125 }, { "epoch": 16.22340425531915, "grad_norm": 3.0515787601470947, "learning_rate": 4.4947368421052637e-07, "loss": 0.0337, "step": 9150 }, { "epoch": 16.2677304964539, "grad_norm": 2.6530652046203613, "learning_rate": 4.363157894736843e-07, "loss": 0.0328, "step": 9175 }, { "epoch": 16.31205673758865, "grad_norm": 2.286144733428955, "learning_rate": 4.2315789473684214e-07, "loss": 0.0314, "step": 9200 }, { "epoch": 16.356382978723403, "grad_norm": 1.9126091003417969, "learning_rate": 4.1000000000000004e-07, "loss": 0.0337, "step": 9225 }, { "epoch": 16.400709219858157, "grad_norm": 2.4638893604278564, "learning_rate": 3.968421052631579e-07, "loss": 0.0326, "step": 9250 }, { "epoch": 16.44503546099291, "grad_norm": 1.9740127325057983, "learning_rate": 3.836842105263158e-07, "loss": 0.0316, "step": 9275 }, { "epoch": 16.48936170212766, "grad_norm": 2.466771125793457, "learning_rate": 3.7052631578947377e-07, "loss": 0.0323, "step": 9300 }, { "epoch": 16.53368794326241, "grad_norm": 3.461355686187744, "learning_rate": 3.573684210526316e-07, "loss": 0.0329, "step": 9325 }, { "epoch": 16.578014184397162, "grad_norm": 4.0049662590026855, "learning_rate": 3.4421052631578954e-07, "loss": 0.0307, "step": 9350 }, { "epoch": 16.622340425531917, "grad_norm": 3.518848419189453, "learning_rate": 3.310526315789474e-07, "loss": 0.0303, "step": 9375 }, { "epoch": 16.666666666666668, "grad_norm": 2.83296799659729, "learning_rate": 3.178947368421053e-07, "loss": 0.0357, "step": 9400 }, { "epoch": 16.71099290780142, "grad_norm": 2.5147087574005127, "learning_rate": 3.0473684210526316e-07, "loss": 0.0328, "step": 9425 }, { "epoch": 16.75531914893617, "grad_norm": 3.1541314125061035, "learning_rate": 2.9157894736842107e-07, "loss": 0.0344, "step": 9450 }, { "epoch": 16.79964539007092, "grad_norm": 2.6284327507019043, "learning_rate": 2.78421052631579e-07, "loss": 0.0317, "step": 9475 }, { "epoch": 16.843971631205672, "grad_norm": 2.6868457794189453, "learning_rate": 2.6526315789473684e-07, "loss": 0.0336, "step": 9500 }, { "epoch": 16.888297872340427, "grad_norm": 1.9752613306045532, "learning_rate": 2.5210526315789474e-07, "loss": 0.0321, "step": 9525 }, { "epoch": 16.93262411347518, "grad_norm": 2.544431447982788, "learning_rate": 2.3894736842105265e-07, "loss": 0.0334, "step": 9550 }, { "epoch": 16.97695035460993, "grad_norm": 3.398198366165161, "learning_rate": 2.2578947368421054e-07, "loss": 0.0342, "step": 9575 }, { "epoch": 17.02127659574468, "grad_norm": 2.5984408855438232, "learning_rate": 2.1263157894736842e-07, "loss": 0.0341, "step": 9600 }, { "epoch": 17.06560283687943, "grad_norm": 2.4616684913635254, "learning_rate": 1.9947368421052633e-07, "loss": 0.0297, "step": 9625 }, { "epoch": 17.109929078014183, "grad_norm": 1.7951653003692627, "learning_rate": 1.8631578947368424e-07, "loss": 0.0315, "step": 9650 }, { "epoch": 17.154255319148938, "grad_norm": 2.757528066635132, "learning_rate": 1.7315789473684212e-07, "loss": 0.0315, "step": 9675 }, { "epoch": 17.19858156028369, "grad_norm": 2.636103868484497, "learning_rate": 1.6e-07, "loss": 0.034, "step": 9700 }, { "epoch": 17.24290780141844, "grad_norm": 2.6068308353424072, "learning_rate": 1.468421052631579e-07, "loss": 0.0318, "step": 9725 }, { "epoch": 17.28723404255319, "grad_norm": 2.8780813217163086, "learning_rate": 1.3368421052631582e-07, "loss": 0.0291, "step": 9750 }, { "epoch": 17.331560283687942, "grad_norm": 2.056938409805298, "learning_rate": 1.205263157894737e-07, "loss": 0.0312, "step": 9775 }, { "epoch": 17.375886524822697, "grad_norm": 1.919609785079956, "learning_rate": 1.0736842105263159e-07, "loss": 0.0301, "step": 9800 }, { "epoch": 17.420212765957448, "grad_norm": 2.8991692066192627, "learning_rate": 9.421052631578948e-08, "loss": 0.0325, "step": 9825 }, { "epoch": 17.4645390070922, "grad_norm": 3.554067611694336, "learning_rate": 8.105263157894738e-08, "loss": 0.0325, "step": 9850 }, { "epoch": 17.50886524822695, "grad_norm": 2.3162522315979004, "learning_rate": 6.789473684210528e-08, "loss": 0.0295, "step": 9875 }, { "epoch": 17.5531914893617, "grad_norm": 1.9810761213302612, "learning_rate": 5.473684210526316e-08, "loss": 0.0316, "step": 9900 }, { "epoch": 17.597517730496453, "grad_norm": 2.424508571624756, "learning_rate": 4.1578947368421054e-08, "loss": 0.0299, "step": 9925 }, { "epoch": 17.641843971631207, "grad_norm": 3.3512706756591797, "learning_rate": 2.842105263157895e-08, "loss": 0.0316, "step": 9950 }, { "epoch": 17.68617021276596, "grad_norm": 2.7042930126190186, "learning_rate": 1.5263157894736843e-08, "loss": 0.0308, "step": 9975 }, { "epoch": 17.73049645390071, "grad_norm": 2.975961685180664, "learning_rate": 2.105263157894737e-09, "loss": 0.0317, "step": 10000 }, { "epoch": 17.73049645390071, "eval_loss": 0.5293700695037842, "eval_runtime": 590.1218, "eval_samples_per_second": 6.602, "eval_steps_per_second": 0.825, "eval_wer": 44.56711421346914, "step": 10000 }, { "epoch": 17.73049645390071, "step": 10000, "total_flos": 3.93818457710592e+18, "train_loss": 0.1984061565220356, "train_runtime": 10260.2689, "train_samples_per_second": 15.594, "train_steps_per_second": 0.975 }, { "epoch": 17.73049645390071, "eval_loss": 0.4848020076751709, "eval_runtime": 614.3795, "eval_samples_per_second": 6.341, "eval_steps_per_second": 0.793, "eval_wer": 37.98212187043698, "step": 10000 } ], "logging_steps": 25, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 18, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.93818457710592e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }