|
{ |
|
"best_metric": 37.96787634887283, |
|
"best_model_checkpoint": "./whisper-tiny-ro/checkpoint-5000", |
|
"epoch": 17.73049645390071, |
|
"eval_steps": 1000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.044326241134751775, |
|
"grad_norm": 28.326566696166992, |
|
"learning_rate": 2.2e-07, |
|
"loss": 1.8024, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08865248226950355, |
|
"grad_norm": 22.18955421447754, |
|
"learning_rate": 4.7000000000000005e-07, |
|
"loss": 1.7794, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13297872340425532, |
|
"grad_norm": 21.889328002929688, |
|
"learning_rate": 7.2e-07, |
|
"loss": 1.63, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1773049645390071, |
|
"grad_norm": 19.02008819580078, |
|
"learning_rate": 9.7e-07, |
|
"loss": 1.5239, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22163120567375885, |
|
"grad_norm": 16.531150817871094, |
|
"learning_rate": 1.2200000000000002e-06, |
|
"loss": 1.3903, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"grad_norm": 16.245573043823242, |
|
"learning_rate": 1.4700000000000001e-06, |
|
"loss": 1.2517, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3102836879432624, |
|
"grad_norm": 17.484891891479492, |
|
"learning_rate": 1.72e-06, |
|
"loss": 1.1449, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3546099290780142, |
|
"grad_norm": 13.317365646362305, |
|
"learning_rate": 1.97e-06, |
|
"loss": 1.0231, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39893617021276595, |
|
"grad_norm": 16.296846389770508, |
|
"learning_rate": 2.2200000000000003e-06, |
|
"loss": 1.0033, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4432624113475177, |
|
"grad_norm": 14.858762741088867, |
|
"learning_rate": 2.47e-06, |
|
"loss": 0.9183, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4875886524822695, |
|
"grad_norm": 15.132709503173828, |
|
"learning_rate": 2.7200000000000002e-06, |
|
"loss": 0.9142, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"grad_norm": 15.102398872375488, |
|
"learning_rate": 2.97e-06, |
|
"loss": 0.8795, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5762411347517731, |
|
"grad_norm": 15.617897033691406, |
|
"learning_rate": 3.2200000000000005e-06, |
|
"loss": 0.863, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6205673758865248, |
|
"grad_norm": 15.325774192810059, |
|
"learning_rate": 3.4700000000000002e-06, |
|
"loss": 0.8094, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6648936170212766, |
|
"grad_norm": 13.950435638427734, |
|
"learning_rate": 3.7200000000000004e-06, |
|
"loss": 0.8471, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7092198581560284, |
|
"grad_norm": 17.703575134277344, |
|
"learning_rate": 3.97e-06, |
|
"loss": 0.83, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7535460992907801, |
|
"grad_norm": 13.622574806213379, |
|
"learning_rate": 4.22e-06, |
|
"loss": 0.7605, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"grad_norm": 13.574337005615234, |
|
"learning_rate": 4.47e-06, |
|
"loss": 0.7464, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8421985815602837, |
|
"grad_norm": 12.981876373291016, |
|
"learning_rate": 4.7200000000000005e-06, |
|
"loss": 0.767, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8865248226950354, |
|
"grad_norm": 15.00900936126709, |
|
"learning_rate": 4.970000000000001e-06, |
|
"loss": 0.7617, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9308510638297872, |
|
"grad_norm": 16.31970977783203, |
|
"learning_rate": 4.988421052631579e-06, |
|
"loss": 0.6962, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.975177304964539, |
|
"grad_norm": 10.05798625946045, |
|
"learning_rate": 4.9752631578947375e-06, |
|
"loss": 0.6676, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.0195035460992907, |
|
"grad_norm": 12.080132484436035, |
|
"learning_rate": 4.962105263157895e-06, |
|
"loss": 0.6351, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 12.87156867980957, |
|
"learning_rate": 4.948947368421053e-06, |
|
"loss": 0.6078, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1081560283687943, |
|
"grad_norm": 11.931558609008789, |
|
"learning_rate": 4.935789473684211e-06, |
|
"loss": 0.5755, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.1524822695035462, |
|
"grad_norm": 12.827286720275879, |
|
"learning_rate": 4.922631578947369e-06, |
|
"loss": 0.5679, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.196808510638298, |
|
"grad_norm": 12.644274711608887, |
|
"learning_rate": 4.909473684210527e-06, |
|
"loss": 0.6122, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.2411347517730495, |
|
"grad_norm": 12.461518287658691, |
|
"learning_rate": 4.896315789473685e-06, |
|
"loss": 0.5522, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2854609929078014, |
|
"grad_norm": 14.115540504455566, |
|
"learning_rate": 4.8831578947368425e-06, |
|
"loss": 0.5764, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"grad_norm": 13.1589994430542, |
|
"learning_rate": 4.87e-06, |
|
"loss": 0.5421, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.374113475177305, |
|
"grad_norm": 12.696803092956543, |
|
"learning_rate": 4.856842105263158e-06, |
|
"loss": 0.5616, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.4184397163120568, |
|
"grad_norm": 14.510184288024902, |
|
"learning_rate": 4.843684210526316e-06, |
|
"loss": 0.5725, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4627659574468086, |
|
"grad_norm": 11.529364585876465, |
|
"learning_rate": 4.830526315789474e-06, |
|
"loss": 0.5627, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.5070921985815602, |
|
"grad_norm": 12.159563064575195, |
|
"learning_rate": 4.8173684210526324e-06, |
|
"loss": 0.5452, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.5514184397163122, |
|
"grad_norm": 10.232617378234863, |
|
"learning_rate": 4.80421052631579e-06, |
|
"loss": 0.5192, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"grad_norm": 10.81043529510498, |
|
"learning_rate": 4.791052631578948e-06, |
|
"loss": 0.5151, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6400709219858156, |
|
"grad_norm": 14.97497272491455, |
|
"learning_rate": 4.777894736842106e-06, |
|
"loss": 0.5263, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.6843971631205674, |
|
"grad_norm": 14.701244354248047, |
|
"learning_rate": 4.764736842105264e-06, |
|
"loss": 0.524, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.728723404255319, |
|
"grad_norm": 13.469274520874023, |
|
"learning_rate": 4.7515789473684216e-06, |
|
"loss": 0.5084, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.773049645390071, |
|
"grad_norm": 11.251127243041992, |
|
"learning_rate": 4.738421052631579e-06, |
|
"loss": 0.5444, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.773049645390071, |
|
"eval_loss": 0.604159414768219, |
|
"eval_runtime": 587.0739, |
|
"eval_samples_per_second": 6.636, |
|
"eval_steps_per_second": 0.83, |
|
"eval_wer": 48.87994586701805, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8173758865248226, |
|
"grad_norm": 13.032508850097656, |
|
"learning_rate": 4.725263157894737e-06, |
|
"loss": 0.5314, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.8617021276595744, |
|
"grad_norm": 12.22535228729248, |
|
"learning_rate": 4.712105263157895e-06, |
|
"loss": 0.4761, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.9060283687943262, |
|
"grad_norm": 15.19352912902832, |
|
"learning_rate": 4.698947368421053e-06, |
|
"loss": 0.4742, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.950354609929078, |
|
"grad_norm": 10.927416801452637, |
|
"learning_rate": 4.685789473684211e-06, |
|
"loss": 0.5012, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.9946808510638299, |
|
"grad_norm": 10.421246528625488, |
|
"learning_rate": 4.672631578947369e-06, |
|
"loss": 0.5225, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.0390070921985815, |
|
"grad_norm": 9.38261604309082, |
|
"learning_rate": 4.6594736842105265e-06, |
|
"loss": 0.4173, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 10.129746437072754, |
|
"learning_rate": 4.646315789473684e-06, |
|
"loss": 0.4153, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 11.529908180236816, |
|
"learning_rate": 4.633157894736842e-06, |
|
"loss": 0.3981, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.171985815602837, |
|
"grad_norm": 11.076881408691406, |
|
"learning_rate": 4.620000000000001e-06, |
|
"loss": 0.4152, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.2163120567375887, |
|
"grad_norm": 9.994644165039062, |
|
"learning_rate": 4.606842105263158e-06, |
|
"loss": 0.3926, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.2606382978723403, |
|
"grad_norm": 10.582009315490723, |
|
"learning_rate": 4.5936842105263165e-06, |
|
"loss": 0.4378, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.3049645390070923, |
|
"grad_norm": 9.771284103393555, |
|
"learning_rate": 4.580526315789474e-06, |
|
"loss": 0.3882, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.349290780141844, |
|
"grad_norm": 11.286993026733398, |
|
"learning_rate": 4.567368421052632e-06, |
|
"loss": 0.3686, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.393617021276596, |
|
"grad_norm": 13.008705139160156, |
|
"learning_rate": 4.55421052631579e-06, |
|
"loss": 0.3899, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.4379432624113475, |
|
"grad_norm": 10.24173355102539, |
|
"learning_rate": 4.541052631578948e-06, |
|
"loss": 0.4028, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.482269503546099, |
|
"grad_norm": 10.5569486618042, |
|
"learning_rate": 4.527894736842106e-06, |
|
"loss": 0.3634, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.526595744680851, |
|
"grad_norm": 10.904850006103516, |
|
"learning_rate": 4.514736842105263e-06, |
|
"loss": 0.4181, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.5709219858156027, |
|
"grad_norm": 10.719099998474121, |
|
"learning_rate": 4.501578947368421e-06, |
|
"loss": 0.3803, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.6152482269503547, |
|
"grad_norm": 10.873899459838867, |
|
"learning_rate": 4.488421052631579e-06, |
|
"loss": 0.3983, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 8.574480056762695, |
|
"learning_rate": 4.475263157894737e-06, |
|
"loss": 0.3894, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.703900709219858, |
|
"grad_norm": 10.148545265197754, |
|
"learning_rate": 4.462105263157895e-06, |
|
"loss": 0.3672, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.74822695035461, |
|
"grad_norm": 11.479018211364746, |
|
"learning_rate": 4.448947368421053e-06, |
|
"loss": 0.3824, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.7925531914893615, |
|
"grad_norm": 10.652966499328613, |
|
"learning_rate": 4.435789473684211e-06, |
|
"loss": 0.3849, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.8368794326241136, |
|
"grad_norm": 10.057666778564453, |
|
"learning_rate": 4.422631578947369e-06, |
|
"loss": 0.3702, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.881205673758865, |
|
"grad_norm": 9.985100746154785, |
|
"learning_rate": 4.409473684210527e-06, |
|
"loss": 0.4052, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.925531914893617, |
|
"grad_norm": 9.165911674499512, |
|
"learning_rate": 4.396315789473685e-06, |
|
"loss": 0.3697, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.969858156028369, |
|
"grad_norm": 10.057464599609375, |
|
"learning_rate": 4.383157894736842e-06, |
|
"loss": 0.3663, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 3.0141843971631204, |
|
"grad_norm": 10.039346694946289, |
|
"learning_rate": 4.3700000000000005e-06, |
|
"loss": 0.3443, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.0585106382978724, |
|
"grad_norm": 9.51621150970459, |
|
"learning_rate": 4.356842105263158e-06, |
|
"loss": 0.2917, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 3.102836879432624, |
|
"grad_norm": 10.015137672424316, |
|
"learning_rate": 4.343684210526316e-06, |
|
"loss": 0.3045, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.147163120567376, |
|
"grad_norm": 10.268891334533691, |
|
"learning_rate": 4.330526315789474e-06, |
|
"loss": 0.2821, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"grad_norm": 9.120494842529297, |
|
"learning_rate": 4.317368421052632e-06, |
|
"loss": 0.2956, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.2358156028368796, |
|
"grad_norm": 10.420275688171387, |
|
"learning_rate": 4.30421052631579e-06, |
|
"loss": 0.3086, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 3.280141843971631, |
|
"grad_norm": 9.254629135131836, |
|
"learning_rate": 4.291052631578947e-06, |
|
"loss": 0.3083, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.324468085106383, |
|
"grad_norm": 9.865363121032715, |
|
"learning_rate": 4.277894736842106e-06, |
|
"loss": 0.3071, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 3.368794326241135, |
|
"grad_norm": 11.790287017822266, |
|
"learning_rate": 4.264736842105264e-06, |
|
"loss": 0.3014, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.4131205673758864, |
|
"grad_norm": 10.183505058288574, |
|
"learning_rate": 4.251578947368421e-06, |
|
"loss": 0.293, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 3.4574468085106385, |
|
"grad_norm": 9.69072151184082, |
|
"learning_rate": 4.23842105263158e-06, |
|
"loss": 0.2978, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.50177304964539, |
|
"grad_norm": 10.96455192565918, |
|
"learning_rate": 4.225263157894737e-06, |
|
"loss": 0.311, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 3.546099290780142, |
|
"grad_norm": 11.342255592346191, |
|
"learning_rate": 4.212105263157895e-06, |
|
"loss": 0.3042, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.546099290780142, |
|
"eval_loss": 0.5099755525588989, |
|
"eval_runtime": 581.1553, |
|
"eval_samples_per_second": 6.704, |
|
"eval_steps_per_second": 0.838, |
|
"eval_wer": 41.17311870080843, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.5904255319148937, |
|
"grad_norm": 10.30219554901123, |
|
"learning_rate": 4.198947368421053e-06, |
|
"loss": 0.3039, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 3.6347517730496453, |
|
"grad_norm": 9.825774192810059, |
|
"learning_rate": 4.185789473684211e-06, |
|
"loss": 0.292, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.6790780141843973, |
|
"grad_norm": 9.612593650817871, |
|
"learning_rate": 4.172631578947369e-06, |
|
"loss": 0.3133, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"grad_norm": 9.864873886108398, |
|
"learning_rate": 4.159473684210526e-06, |
|
"loss": 0.2786, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.7677304964539005, |
|
"grad_norm": 9.14278507232666, |
|
"learning_rate": 4.1463157894736845e-06, |
|
"loss": 0.2992, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 3.8120567375886525, |
|
"grad_norm": 10.981643676757812, |
|
"learning_rate": 4.133157894736842e-06, |
|
"loss": 0.2987, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.8563829787234045, |
|
"grad_norm": 10.71380615234375, |
|
"learning_rate": 4.12e-06, |
|
"loss": 0.3088, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 3.900709219858156, |
|
"grad_norm": 11.365142822265625, |
|
"learning_rate": 4.106842105263158e-06, |
|
"loss": 0.302, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.9450354609929077, |
|
"grad_norm": 11.918941497802734, |
|
"learning_rate": 4.093684210526316e-06, |
|
"loss": 0.31, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"grad_norm": 10.240377426147461, |
|
"learning_rate": 4.0805263157894745e-06, |
|
"loss": 0.3048, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 4.033687943262412, |
|
"grad_norm": 7.562131404876709, |
|
"learning_rate": 4.067368421052632e-06, |
|
"loss": 0.2673, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 4.078014184397163, |
|
"grad_norm": 7.741388320922852, |
|
"learning_rate": 4.05421052631579e-06, |
|
"loss": 0.2364, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.122340425531915, |
|
"grad_norm": 8.797900199890137, |
|
"learning_rate": 4.041052631578948e-06, |
|
"loss": 0.2524, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 9.751541137695312, |
|
"learning_rate": 4.027894736842105e-06, |
|
"loss": 0.2565, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 4.210992907801418, |
|
"grad_norm": 7.299990653991699, |
|
"learning_rate": 4.014736842105264e-06, |
|
"loss": 0.2405, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"grad_norm": 8.407694816589355, |
|
"learning_rate": 4.001578947368421e-06, |
|
"loss": 0.226, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.299645390070922, |
|
"grad_norm": 8.874945640563965, |
|
"learning_rate": 3.9884210526315795e-06, |
|
"loss": 0.2366, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 4.343971631205674, |
|
"grad_norm": 8.172481536865234, |
|
"learning_rate": 3.975263157894737e-06, |
|
"loss": 0.2325, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 4.388297872340425, |
|
"grad_norm": 8.563679695129395, |
|
"learning_rate": 3.962105263157895e-06, |
|
"loss": 0.2266, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 4.432624113475177, |
|
"grad_norm": 7.646442413330078, |
|
"learning_rate": 3.948947368421053e-06, |
|
"loss": 0.2477, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.476950354609929, |
|
"grad_norm": 8.14061164855957, |
|
"learning_rate": 3.93578947368421e-06, |
|
"loss": 0.2372, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 4.5212765957446805, |
|
"grad_norm": 6.697457790374756, |
|
"learning_rate": 3.9226315789473694e-06, |
|
"loss": 0.2312, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 4.565602836879433, |
|
"grad_norm": 9.178577423095703, |
|
"learning_rate": 3.909473684210527e-06, |
|
"loss": 0.239, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 4.609929078014185, |
|
"grad_norm": 7.986817836761475, |
|
"learning_rate": 3.896315789473684e-06, |
|
"loss": 0.2266, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.654255319148936, |
|
"grad_norm": 9.992223739624023, |
|
"learning_rate": 3.883157894736843e-06, |
|
"loss": 0.2422, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 4.698581560283688, |
|
"grad_norm": 8.259024620056152, |
|
"learning_rate": 3.87e-06, |
|
"loss": 0.2382, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 4.74290780141844, |
|
"grad_norm": 8.913894653320312, |
|
"learning_rate": 3.8568421052631585e-06, |
|
"loss": 0.2404, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"grad_norm": 8.490303993225098, |
|
"learning_rate": 3.843684210526316e-06, |
|
"loss": 0.251, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.831560283687943, |
|
"grad_norm": 8.170136451721191, |
|
"learning_rate": 3.830526315789474e-06, |
|
"loss": 0.2195, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 4.875886524822695, |
|
"grad_norm": 7.071116924285889, |
|
"learning_rate": 3.817368421052632e-06, |
|
"loss": 0.2237, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.920212765957447, |
|
"grad_norm": 7.995920181274414, |
|
"learning_rate": 3.8042105263157898e-06, |
|
"loss": 0.2377, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 4.964539007092198, |
|
"grad_norm": 8.777873039245605, |
|
"learning_rate": 3.7910526315789477e-06, |
|
"loss": 0.2211, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.00886524822695, |
|
"grad_norm": 7.402454376220703, |
|
"learning_rate": 3.7778947368421056e-06, |
|
"loss": 0.2341, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 5.053191489361702, |
|
"grad_norm": 7.576868057250977, |
|
"learning_rate": 3.764736842105263e-06, |
|
"loss": 0.2006, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 5.097517730496454, |
|
"grad_norm": 6.49124002456665, |
|
"learning_rate": 3.751578947368421e-06, |
|
"loss": 0.1835, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 5.141843971631205, |
|
"grad_norm": 5.912723064422607, |
|
"learning_rate": 3.7384210526315793e-06, |
|
"loss": 0.1962, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.1861702127659575, |
|
"grad_norm": 7.608515739440918, |
|
"learning_rate": 3.7252631578947372e-06, |
|
"loss": 0.18, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 5.2304964539007095, |
|
"grad_norm": 8.52009105682373, |
|
"learning_rate": 3.712105263157895e-06, |
|
"loss": 0.184, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 5.274822695035461, |
|
"grad_norm": 9.251614570617676, |
|
"learning_rate": 3.698947368421053e-06, |
|
"loss": 0.2098, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 8.74129581451416, |
|
"learning_rate": 3.685789473684211e-06, |
|
"loss": 0.1817, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"eval_loss": 0.4850045144557953, |
|
"eval_runtime": 588.1549, |
|
"eval_samples_per_second": 6.624, |
|
"eval_steps_per_second": 0.828, |
|
"eval_wer": 40.756437195056805, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.363475177304965, |
|
"grad_norm": 9.787571907043457, |
|
"learning_rate": 3.672631578947369e-06, |
|
"loss": 0.2017, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 5.407801418439716, |
|
"grad_norm": 9.773175239562988, |
|
"learning_rate": 3.6594736842105268e-06, |
|
"loss": 0.1818, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 5.452127659574468, |
|
"grad_norm": 9.025221824645996, |
|
"learning_rate": 3.6463157894736847e-06, |
|
"loss": 0.1926, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 5.49645390070922, |
|
"grad_norm": 7.631556987762451, |
|
"learning_rate": 3.633157894736842e-06, |
|
"loss": 0.2017, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.540780141843972, |
|
"grad_norm": 8.103202819824219, |
|
"learning_rate": 3.62e-06, |
|
"loss": 0.1808, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 5.585106382978723, |
|
"grad_norm": 6.022019386291504, |
|
"learning_rate": 3.606842105263158e-06, |
|
"loss": 0.2017, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 5.629432624113475, |
|
"grad_norm": 6.922440528869629, |
|
"learning_rate": 3.593684210526316e-06, |
|
"loss": 0.2009, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 5.673758865248227, |
|
"grad_norm": 8.611794471740723, |
|
"learning_rate": 3.580526315789474e-06, |
|
"loss": 0.1731, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.718085106382979, |
|
"grad_norm": 7.4870147705078125, |
|
"learning_rate": 3.567368421052632e-06, |
|
"loss": 0.1797, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 5.76241134751773, |
|
"grad_norm": 8.757158279418945, |
|
"learning_rate": 3.55421052631579e-06, |
|
"loss": 0.1847, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 5.806737588652482, |
|
"grad_norm": 8.361138343811035, |
|
"learning_rate": 3.541052631578948e-06, |
|
"loss": 0.1738, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"grad_norm": 8.07181453704834, |
|
"learning_rate": 3.527894736842106e-06, |
|
"loss": 0.2007, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.8953900709219855, |
|
"grad_norm": 7.998460292816162, |
|
"learning_rate": 3.5147368421052638e-06, |
|
"loss": 0.1848, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 5.939716312056738, |
|
"grad_norm": 7.463223934173584, |
|
"learning_rate": 3.5015789473684213e-06, |
|
"loss": 0.1894, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 5.98404255319149, |
|
"grad_norm": 8.993099212646484, |
|
"learning_rate": 3.488421052631579e-06, |
|
"loss": 0.1898, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 6.028368794326241, |
|
"grad_norm": 6.37155294418335, |
|
"learning_rate": 3.475263157894737e-06, |
|
"loss": 0.1772, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.072695035460993, |
|
"grad_norm": 6.82436990737915, |
|
"learning_rate": 3.462105263157895e-06, |
|
"loss": 0.1555, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 6.117021276595745, |
|
"grad_norm": 7.0470428466796875, |
|
"learning_rate": 3.448947368421053e-06, |
|
"loss": 0.1517, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 6.161347517730497, |
|
"grad_norm": 6.5624494552612305, |
|
"learning_rate": 3.435789473684211e-06, |
|
"loss": 0.149, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 6.205673758865248, |
|
"grad_norm": 7.491029262542725, |
|
"learning_rate": 3.4226315789473687e-06, |
|
"loss": 0.1599, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 6.855647563934326, |
|
"learning_rate": 3.409473684210526e-06, |
|
"loss": 0.1587, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 6.294326241134752, |
|
"grad_norm": 8.074361801147461, |
|
"learning_rate": 3.396315789473684e-06, |
|
"loss": 0.1674, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 6.338652482269503, |
|
"grad_norm": 5.963619709014893, |
|
"learning_rate": 3.3831578947368424e-06, |
|
"loss": 0.1499, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"grad_norm": 8.021512985229492, |
|
"learning_rate": 3.3700000000000003e-06, |
|
"loss": 0.1488, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.427304964539007, |
|
"grad_norm": 5.919581413269043, |
|
"learning_rate": 3.3568421052631583e-06, |
|
"loss": 0.1498, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 6.471631205673759, |
|
"grad_norm": 6.950247287750244, |
|
"learning_rate": 3.343684210526316e-06, |
|
"loss": 0.1648, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 6.51595744680851, |
|
"grad_norm": 6.72702693939209, |
|
"learning_rate": 3.330526315789474e-06, |
|
"loss": 0.146, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 6.560283687943262, |
|
"grad_norm": 7.681860446929932, |
|
"learning_rate": 3.317368421052632e-06, |
|
"loss": 0.1542, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.6046099290780145, |
|
"grad_norm": 7.239710330963135, |
|
"learning_rate": 3.30421052631579e-06, |
|
"loss": 0.1499, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"grad_norm": 7.310706615447998, |
|
"learning_rate": 3.291052631578948e-06, |
|
"loss": 0.1527, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 6.693262411347518, |
|
"grad_norm": 7.060523509979248, |
|
"learning_rate": 3.2778947368421053e-06, |
|
"loss": 0.1537, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 6.73758865248227, |
|
"grad_norm": 8.571366310119629, |
|
"learning_rate": 3.264736842105263e-06, |
|
"loss": 0.1598, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.781914893617021, |
|
"grad_norm": 6.046979904174805, |
|
"learning_rate": 3.251578947368421e-06, |
|
"loss": 0.1416, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 6.826241134751773, |
|
"grad_norm": 8.620864868164062, |
|
"learning_rate": 3.238421052631579e-06, |
|
"loss": 0.1454, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 6.870567375886525, |
|
"grad_norm": 8.330490112304688, |
|
"learning_rate": 3.225263157894737e-06, |
|
"loss": 0.1542, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"grad_norm": 10.611557006835938, |
|
"learning_rate": 3.212105263157895e-06, |
|
"loss": 0.1676, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.959219858156028, |
|
"grad_norm": 6.815483570098877, |
|
"learning_rate": 3.198947368421053e-06, |
|
"loss": 0.1405, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 7.00354609929078, |
|
"grad_norm": 5.8308634757995605, |
|
"learning_rate": 3.185789473684211e-06, |
|
"loss": 0.1479, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 7.047872340425532, |
|
"grad_norm": 6.529901027679443, |
|
"learning_rate": 3.172631578947369e-06, |
|
"loss": 0.12, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 7.092198581560283, |
|
"grad_norm": 6.525743007659912, |
|
"learning_rate": 3.159473684210527e-06, |
|
"loss": 0.1214, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.092198581560283, |
|
"eval_loss": 0.48074454069137573, |
|
"eval_runtime": 589.8236, |
|
"eval_samples_per_second": 6.605, |
|
"eval_steps_per_second": 0.826, |
|
"eval_wer": 41.810605790804516, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.136524822695035, |
|
"grad_norm": 5.498377799987793, |
|
"learning_rate": 3.1463157894736844e-06, |
|
"loss": 0.1295, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 7.180851063829787, |
|
"grad_norm": 7.085293292999268, |
|
"learning_rate": 3.1331578947368423e-06, |
|
"loss": 0.1224, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 7.225177304964539, |
|
"grad_norm": 4.779361724853516, |
|
"learning_rate": 3.12e-06, |
|
"loss": 0.1245, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 7.2695035460992905, |
|
"grad_norm": 6.457351207733154, |
|
"learning_rate": 3.106842105263158e-06, |
|
"loss": 0.1192, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 7.3138297872340425, |
|
"grad_norm": 4.801368236541748, |
|
"learning_rate": 3.093684210526316e-06, |
|
"loss": 0.1322, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 7.358156028368795, |
|
"grad_norm": 6.449742794036865, |
|
"learning_rate": 3.080526315789474e-06, |
|
"loss": 0.1218, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 7.402482269503546, |
|
"grad_norm": 5.5234456062316895, |
|
"learning_rate": 3.067368421052632e-06, |
|
"loss": 0.1293, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"grad_norm": 8.489788055419922, |
|
"learning_rate": 3.0542105263157893e-06, |
|
"loss": 0.1181, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 7.49113475177305, |
|
"grad_norm": 6.528730869293213, |
|
"learning_rate": 3.0410526315789472e-06, |
|
"loss": 0.1263, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 7.535460992907802, |
|
"grad_norm": 6.973687648773193, |
|
"learning_rate": 3.027894736842106e-06, |
|
"loss": 0.1179, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 7.579787234042553, |
|
"grad_norm": 6.633789539337158, |
|
"learning_rate": 3.0147368421052635e-06, |
|
"loss": 0.1314, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 7.624113475177305, |
|
"grad_norm": 7.518368721008301, |
|
"learning_rate": 3.0015789473684214e-06, |
|
"loss": 0.1305, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.668439716312057, |
|
"grad_norm": 5.739889144897461, |
|
"learning_rate": 2.9884210526315793e-06, |
|
"loss": 0.1295, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 7.712765957446808, |
|
"grad_norm": 6.737969398498535, |
|
"learning_rate": 2.975263157894737e-06, |
|
"loss": 0.1256, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 7.75709219858156, |
|
"grad_norm": 5.012901782989502, |
|
"learning_rate": 2.962105263157895e-06, |
|
"loss": 0.1312, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 7.801418439716312, |
|
"grad_norm": 5.5256853103637695, |
|
"learning_rate": 2.948947368421053e-06, |
|
"loss": 0.1242, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.845744680851064, |
|
"grad_norm": 6.629995346069336, |
|
"learning_rate": 2.935789473684211e-06, |
|
"loss": 0.125, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 7.890070921985815, |
|
"grad_norm": 5.227272987365723, |
|
"learning_rate": 2.9226315789473684e-06, |
|
"loss": 0.1191, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 7.934397163120567, |
|
"grad_norm": 5.111964225769043, |
|
"learning_rate": 2.9094736842105263e-06, |
|
"loss": 0.1145, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"grad_norm": 5.537423610687256, |
|
"learning_rate": 2.8963157894736842e-06, |
|
"loss": 0.1278, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.02304964539007, |
|
"grad_norm": 4.478297710418701, |
|
"learning_rate": 2.883157894736842e-06, |
|
"loss": 0.1155, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 8.067375886524824, |
|
"grad_norm": 5.642357349395752, |
|
"learning_rate": 2.87e-06, |
|
"loss": 0.1102, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 8.111702127659575, |
|
"grad_norm": 5.228881359100342, |
|
"learning_rate": 2.856842105263158e-06, |
|
"loss": 0.1009, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 8.156028368794326, |
|
"grad_norm": 5.18090295791626, |
|
"learning_rate": 2.8436842105263163e-06, |
|
"loss": 0.0992, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 8.200354609929079, |
|
"grad_norm": 6.664114475250244, |
|
"learning_rate": 2.830526315789474e-06, |
|
"loss": 0.1092, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 8.24468085106383, |
|
"grad_norm": 5.435600280761719, |
|
"learning_rate": 2.817368421052632e-06, |
|
"loss": 0.1011, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 8.289007092198581, |
|
"grad_norm": 5.778509140014648, |
|
"learning_rate": 2.80421052631579e-06, |
|
"loss": 0.1025, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 5.977304458618164, |
|
"learning_rate": 2.7910526315789475e-06, |
|
"loss": 0.1021, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 8.377659574468085, |
|
"grad_norm": 5.274112701416016, |
|
"learning_rate": 2.7778947368421054e-06, |
|
"loss": 0.0935, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 8.421985815602836, |
|
"grad_norm": 5.418082237243652, |
|
"learning_rate": 2.7647368421052633e-06, |
|
"loss": 0.0965, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 8.46631205673759, |
|
"grad_norm": 5.248587131500244, |
|
"learning_rate": 2.7515789473684212e-06, |
|
"loss": 0.1014, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"grad_norm": 5.329669952392578, |
|
"learning_rate": 2.738421052631579e-06, |
|
"loss": 0.1052, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 8.554964539007091, |
|
"grad_norm": 5.469305038452148, |
|
"learning_rate": 2.725263157894737e-06, |
|
"loss": 0.0988, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 8.599290780141844, |
|
"grad_norm": 4.839619159698486, |
|
"learning_rate": 2.712105263157895e-06, |
|
"loss": 0.1038, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 8.643617021276595, |
|
"grad_norm": 5.6988420486450195, |
|
"learning_rate": 2.6989473684210524e-06, |
|
"loss": 0.1005, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 8.687943262411348, |
|
"grad_norm": 6.122032165527344, |
|
"learning_rate": 2.6857894736842104e-06, |
|
"loss": 0.1071, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 8.7322695035461, |
|
"grad_norm": 5.00734806060791, |
|
"learning_rate": 2.672631578947369e-06, |
|
"loss": 0.1009, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 8.77659574468085, |
|
"grad_norm": 6.199928283691406, |
|
"learning_rate": 2.6594736842105266e-06, |
|
"loss": 0.1041, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 8.820921985815604, |
|
"grad_norm": 6.134685516357422, |
|
"learning_rate": 2.6463157894736845e-06, |
|
"loss": 0.1044, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 8.865248226950355, |
|
"grad_norm": 5.176562786102295, |
|
"learning_rate": 2.6331578947368424e-06, |
|
"loss": 0.1066, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.865248226950355, |
|
"eval_loss": 0.4846879839897156, |
|
"eval_runtime": 580.4904, |
|
"eval_samples_per_second": 6.712, |
|
"eval_steps_per_second": 0.839, |
|
"eval_wer": 37.96787634887283, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.909574468085106, |
|
"grad_norm": 5.835010051727295, |
|
"learning_rate": 2.6200000000000003e-06, |
|
"loss": 0.1001, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 8.953900709219859, |
|
"grad_norm": 6.407568454742432, |
|
"learning_rate": 2.6068421052631582e-06, |
|
"loss": 0.0965, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 8.99822695035461, |
|
"grad_norm": 6.197821617126465, |
|
"learning_rate": 2.593684210526316e-06, |
|
"loss": 0.1048, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 9.042553191489361, |
|
"grad_norm": 4.808340072631836, |
|
"learning_rate": 2.580526315789474e-06, |
|
"loss": 0.085, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 9.086879432624114, |
|
"grad_norm": 4.098535060882568, |
|
"learning_rate": 2.5673684210526315e-06, |
|
"loss": 0.0815, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 9.131205673758865, |
|
"grad_norm": 4.34876012802124, |
|
"learning_rate": 2.5542105263157894e-06, |
|
"loss": 0.0884, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 9.175531914893616, |
|
"grad_norm": 4.318136692047119, |
|
"learning_rate": 2.5410526315789474e-06, |
|
"loss": 0.0826, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 9.21985815602837, |
|
"grad_norm": 5.460968017578125, |
|
"learning_rate": 2.5278947368421053e-06, |
|
"loss": 0.0799, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 9.26418439716312, |
|
"grad_norm": 4.200242042541504, |
|
"learning_rate": 2.514736842105263e-06, |
|
"loss": 0.0835, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"grad_norm": 5.984395503997803, |
|
"learning_rate": 2.501578947368421e-06, |
|
"loss": 0.0881, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 9.352836879432624, |
|
"grad_norm": 4.829773426055908, |
|
"learning_rate": 2.488421052631579e-06, |
|
"loss": 0.0786, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 9.397163120567376, |
|
"grad_norm": 3.996610403060913, |
|
"learning_rate": 2.475263157894737e-06, |
|
"loss": 0.0865, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 9.441489361702128, |
|
"grad_norm": 6.336328029632568, |
|
"learning_rate": 2.462105263157895e-06, |
|
"loss": 0.0802, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 9.48581560283688, |
|
"grad_norm": 5.054424285888672, |
|
"learning_rate": 2.448947368421053e-06, |
|
"loss": 0.0928, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 9.53014184397163, |
|
"grad_norm": 6.595405101776123, |
|
"learning_rate": 2.4357894736842106e-06, |
|
"loss": 0.0858, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"grad_norm": 5.797497272491455, |
|
"learning_rate": 2.4226315789473685e-06, |
|
"loss": 0.0846, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 9.618794326241135, |
|
"grad_norm": 5.1372551918029785, |
|
"learning_rate": 2.4094736842105265e-06, |
|
"loss": 0.0789, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 9.663120567375886, |
|
"grad_norm": 6.663181304931641, |
|
"learning_rate": 2.3963157894736844e-06, |
|
"loss": 0.0977, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 9.707446808510639, |
|
"grad_norm": 4.910397529602051, |
|
"learning_rate": 2.3831578947368423e-06, |
|
"loss": 0.087, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 9.75177304964539, |
|
"grad_norm": 5.87327241897583, |
|
"learning_rate": 2.37e-06, |
|
"loss": 0.076, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.796099290780141, |
|
"grad_norm": 4.994716167449951, |
|
"learning_rate": 2.356842105263158e-06, |
|
"loss": 0.083, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 9.840425531914894, |
|
"grad_norm": 5.601754665374756, |
|
"learning_rate": 2.343684210526316e-06, |
|
"loss": 0.0819, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 9.884751773049645, |
|
"grad_norm": 5.767611980438232, |
|
"learning_rate": 2.330526315789474e-06, |
|
"loss": 0.0831, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 9.929078014184396, |
|
"grad_norm": 6.679659366607666, |
|
"learning_rate": 2.317368421052632e-06, |
|
"loss": 0.084, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.97340425531915, |
|
"grad_norm": 3.7785329818725586, |
|
"learning_rate": 2.3042105263157897e-06, |
|
"loss": 0.0894, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 10.0177304964539, |
|
"grad_norm": 4.858386516571045, |
|
"learning_rate": 2.2910526315789476e-06, |
|
"loss": 0.0817, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 10.062056737588652, |
|
"grad_norm": 3.9162485599517822, |
|
"learning_rate": 2.277894736842105e-06, |
|
"loss": 0.0669, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 10.106382978723405, |
|
"grad_norm": 5.168406963348389, |
|
"learning_rate": 2.2647368421052635e-06, |
|
"loss": 0.0677, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 10.150709219858156, |
|
"grad_norm": 3.850172758102417, |
|
"learning_rate": 2.2515789473684214e-06, |
|
"loss": 0.0687, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 10.195035460992909, |
|
"grad_norm": 3.854781150817871, |
|
"learning_rate": 2.2384210526315793e-06, |
|
"loss": 0.0734, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 10.23936170212766, |
|
"grad_norm": 3.807837724685669, |
|
"learning_rate": 2.225263157894737e-06, |
|
"loss": 0.0654, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 10.28368794326241, |
|
"grad_norm": 6.461479187011719, |
|
"learning_rate": 2.2121052631578947e-06, |
|
"loss": 0.0692, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 10.328014184397164, |
|
"grad_norm": 5.051649570465088, |
|
"learning_rate": 2.1989473684210526e-06, |
|
"loss": 0.0705, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 10.372340425531915, |
|
"grad_norm": 4.43517541885376, |
|
"learning_rate": 2.1857894736842105e-06, |
|
"loss": 0.0738, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 10.416666666666666, |
|
"grad_norm": 3.5894205570220947, |
|
"learning_rate": 2.172631578947369e-06, |
|
"loss": 0.0699, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 10.460992907801419, |
|
"grad_norm": 4.5283203125, |
|
"learning_rate": 2.1594736842105267e-06, |
|
"loss": 0.0688, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 10.50531914893617, |
|
"grad_norm": 3.9678380489349365, |
|
"learning_rate": 2.1463157894736842e-06, |
|
"loss": 0.0678, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 10.549645390070921, |
|
"grad_norm": 6.319568157196045, |
|
"learning_rate": 2.133157894736842e-06, |
|
"loss": 0.0716, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 10.593971631205674, |
|
"grad_norm": 4.849029541015625, |
|
"learning_rate": 2.12e-06, |
|
"loss": 0.0746, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 3.9395298957824707, |
|
"learning_rate": 2.106842105263158e-06, |
|
"loss": 0.0673, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"eval_loss": 0.4972631335258484, |
|
"eval_runtime": 583.012, |
|
"eval_samples_per_second": 6.683, |
|
"eval_steps_per_second": 0.835, |
|
"eval_wer": 39.709391360091175, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.682624113475176, |
|
"grad_norm": 4.806758880615234, |
|
"learning_rate": 2.0936842105263163e-06, |
|
"loss": 0.069, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 10.72695035460993, |
|
"grad_norm": 5.572425842285156, |
|
"learning_rate": 2.0805263157894738e-06, |
|
"loss": 0.0699, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 10.77127659574468, |
|
"grad_norm": 4.8004302978515625, |
|
"learning_rate": 2.0673684210526317e-06, |
|
"loss": 0.0712, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 10.815602836879432, |
|
"grad_norm": 4.476444721221924, |
|
"learning_rate": 2.0547368421052633e-06, |
|
"loss": 0.0746, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 10.859929078014185, |
|
"grad_norm": 4.727671146392822, |
|
"learning_rate": 2.0415789473684213e-06, |
|
"loss": 0.073, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 10.904255319148936, |
|
"grad_norm": 4.828220367431641, |
|
"learning_rate": 2.028421052631579e-06, |
|
"loss": 0.0698, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 10.948581560283689, |
|
"grad_norm": 5.050329685211182, |
|
"learning_rate": 2.015263157894737e-06, |
|
"loss": 0.0705, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 10.99290780141844, |
|
"grad_norm": 4.282689094543457, |
|
"learning_rate": 2.002105263157895e-06, |
|
"loss": 0.0718, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 11.037234042553191, |
|
"grad_norm": 4.424275875091553, |
|
"learning_rate": 1.988947368421053e-06, |
|
"loss": 0.0636, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 11.081560283687944, |
|
"grad_norm": 3.237255573272705, |
|
"learning_rate": 1.975789473684211e-06, |
|
"loss": 0.0544, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 11.125886524822695, |
|
"grad_norm": 3.363708972930908, |
|
"learning_rate": 1.9626315789473683e-06, |
|
"loss": 0.0582, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 11.170212765957446, |
|
"grad_norm": 4.743597030639648, |
|
"learning_rate": 1.949473684210526e-06, |
|
"loss": 0.0567, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 11.214539007092199, |
|
"grad_norm": 4.212203502655029, |
|
"learning_rate": 1.9363157894736845e-06, |
|
"loss": 0.0643, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 11.25886524822695, |
|
"grad_norm": 3.580488443374634, |
|
"learning_rate": 1.9231578947368424e-06, |
|
"loss": 0.057, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 11.303191489361701, |
|
"grad_norm": 3.409921646118164, |
|
"learning_rate": 1.9100000000000003e-06, |
|
"loss": 0.0567, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 11.347517730496454, |
|
"grad_norm": 3.3070523738861084, |
|
"learning_rate": 1.896842105263158e-06, |
|
"loss": 0.0562, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 11.391843971631205, |
|
"grad_norm": 4.054013252258301, |
|
"learning_rate": 1.883684210526316e-06, |
|
"loss": 0.0645, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 11.436170212765958, |
|
"grad_norm": 3.5053253173828125, |
|
"learning_rate": 1.8705263157894737e-06, |
|
"loss": 0.0632, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 11.48049645390071, |
|
"grad_norm": 3.654541492462158, |
|
"learning_rate": 1.8573684210526316e-06, |
|
"loss": 0.0586, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 11.52482269503546, |
|
"grad_norm": 4.121072769165039, |
|
"learning_rate": 1.8442105263157897e-06, |
|
"loss": 0.061, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 11.569148936170214, |
|
"grad_norm": 4.159468173980713, |
|
"learning_rate": 1.8310526315789476e-06, |
|
"loss": 0.0611, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 11.613475177304965, |
|
"grad_norm": 4.2946672439575195, |
|
"learning_rate": 1.8178947368421055e-06, |
|
"loss": 0.0609, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 11.657801418439716, |
|
"grad_norm": 3.857961416244507, |
|
"learning_rate": 1.8047368421052632e-06, |
|
"loss": 0.0613, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 11.702127659574469, |
|
"grad_norm": 3.1902592182159424, |
|
"learning_rate": 1.7915789473684211e-06, |
|
"loss": 0.0607, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 11.74645390070922, |
|
"grad_norm": 5.016479015350342, |
|
"learning_rate": 1.778421052631579e-06, |
|
"loss": 0.0586, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 11.790780141843971, |
|
"grad_norm": 4.0299601554870605, |
|
"learning_rate": 1.7652631578947371e-06, |
|
"loss": 0.059, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 11.835106382978724, |
|
"grad_norm": 4.405561447143555, |
|
"learning_rate": 1.752105263157895e-06, |
|
"loss": 0.0623, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 11.879432624113475, |
|
"grad_norm": 3.684788465499878, |
|
"learning_rate": 1.7389473684210527e-06, |
|
"loss": 0.0614, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 11.923758865248226, |
|
"grad_norm": 4.335251331329346, |
|
"learning_rate": 1.7257894736842107e-06, |
|
"loss": 0.0581, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"grad_norm": 4.67876672744751, |
|
"learning_rate": 1.7126315789473686e-06, |
|
"loss": 0.0569, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 12.01241134751773, |
|
"grad_norm": 5.14631462097168, |
|
"learning_rate": 1.6994736842105265e-06, |
|
"loss": 0.0548, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 12.056737588652481, |
|
"grad_norm": 3.651719331741333, |
|
"learning_rate": 1.6863157894736842e-06, |
|
"loss": 0.0514, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 12.101063829787234, |
|
"grad_norm": 3.455418109893799, |
|
"learning_rate": 1.6731578947368423e-06, |
|
"loss": 0.0534, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 12.145390070921986, |
|
"grad_norm": 3.9486734867095947, |
|
"learning_rate": 1.6600000000000002e-06, |
|
"loss": 0.0495, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 12.189716312056738, |
|
"grad_norm": 2.9897313117980957, |
|
"learning_rate": 1.6468421052631581e-06, |
|
"loss": 0.051, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 12.23404255319149, |
|
"grad_norm": 4.208747863769531, |
|
"learning_rate": 1.633684210526316e-06, |
|
"loss": 0.0486, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 12.27836879432624, |
|
"grad_norm": 3.3527841567993164, |
|
"learning_rate": 1.6205263157894737e-06, |
|
"loss": 0.0524, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 12.322695035460994, |
|
"grad_norm": 3.6749916076660156, |
|
"learning_rate": 1.6073684210526316e-06, |
|
"loss": 0.0577, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 12.367021276595745, |
|
"grad_norm": 3.266439437866211, |
|
"learning_rate": 1.5942105263157895e-06, |
|
"loss": 0.0491, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 12.411347517730496, |
|
"grad_norm": 3.9574387073516846, |
|
"learning_rate": 1.5810526315789477e-06, |
|
"loss": 0.0537, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 12.411347517730496, |
|
"eval_loss": 0.5095303654670715, |
|
"eval_runtime": 586.2939, |
|
"eval_samples_per_second": 6.645, |
|
"eval_steps_per_second": 0.831, |
|
"eval_wer": 41.29064425371274, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 12.455673758865249, |
|
"grad_norm": 4.798894882202148, |
|
"learning_rate": 1.5678947368421056e-06, |
|
"loss": 0.0533, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 3.0210700035095215, |
|
"learning_rate": 1.5547368421052633e-06, |
|
"loss": 0.0483, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 12.544326241134751, |
|
"grad_norm": 4.632834434509277, |
|
"learning_rate": 1.5415789473684212e-06, |
|
"loss": 0.0509, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 12.588652482269504, |
|
"grad_norm": 4.397753715515137, |
|
"learning_rate": 1.528421052631579e-06, |
|
"loss": 0.0515, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 12.632978723404255, |
|
"grad_norm": 2.9680283069610596, |
|
"learning_rate": 1.5152631578947368e-06, |
|
"loss": 0.0475, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 12.677304964539006, |
|
"grad_norm": 3.9441206455230713, |
|
"learning_rate": 1.5021052631578947e-06, |
|
"loss": 0.055, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 12.72163120567376, |
|
"grad_norm": 3.183037519454956, |
|
"learning_rate": 1.4889473684210528e-06, |
|
"loss": 0.0522, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"grad_norm": 3.4659500122070312, |
|
"learning_rate": 1.4757894736842107e-06, |
|
"loss": 0.0477, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 12.810283687943262, |
|
"grad_norm": 3.1689703464508057, |
|
"learning_rate": 1.4626315789473686e-06, |
|
"loss": 0.0512, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 12.854609929078014, |
|
"grad_norm": 5.009653568267822, |
|
"learning_rate": 1.4494736842105263e-06, |
|
"loss": 0.0487, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 12.898936170212766, |
|
"grad_norm": 3.3407084941864014, |
|
"learning_rate": 1.4363157894736842e-06, |
|
"loss": 0.0489, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 12.943262411347519, |
|
"grad_norm": 4.140749454498291, |
|
"learning_rate": 1.4231578947368421e-06, |
|
"loss": 0.047, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 12.98758865248227, |
|
"grad_norm": 3.0766468048095703, |
|
"learning_rate": 1.41e-06, |
|
"loss": 0.052, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 13.03191489361702, |
|
"grad_norm": 3.058790683746338, |
|
"learning_rate": 1.3968421052631582e-06, |
|
"loss": 0.0445, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 13.076241134751774, |
|
"grad_norm": 2.4315567016601562, |
|
"learning_rate": 1.3836842105263159e-06, |
|
"loss": 0.0447, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 13.120567375886525, |
|
"grad_norm": 2.692753314971924, |
|
"learning_rate": 1.3705263157894738e-06, |
|
"loss": 0.0398, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 13.164893617021276, |
|
"grad_norm": 3.2242069244384766, |
|
"learning_rate": 1.3573684210526317e-06, |
|
"loss": 0.0425, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 13.209219858156029, |
|
"grad_norm": 3.640981674194336, |
|
"learning_rate": 1.3442105263157896e-06, |
|
"loss": 0.043, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 13.25354609929078, |
|
"grad_norm": 2.966660261154175, |
|
"learning_rate": 1.3310526315789473e-06, |
|
"loss": 0.0422, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"grad_norm": 2.7896602153778076, |
|
"learning_rate": 1.3178947368421054e-06, |
|
"loss": 0.044, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 13.342198581560284, |
|
"grad_norm": 3.0664894580841064, |
|
"learning_rate": 1.3047368421052633e-06, |
|
"loss": 0.0432, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 13.386524822695035, |
|
"grad_norm": 2.7736198902130127, |
|
"learning_rate": 1.2915789473684212e-06, |
|
"loss": 0.0416, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 13.430851063829786, |
|
"grad_norm": 3.5016989707946777, |
|
"learning_rate": 1.2784210526315791e-06, |
|
"loss": 0.0481, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 13.47517730496454, |
|
"grad_norm": 3.0631349086761475, |
|
"learning_rate": 1.2652631578947368e-06, |
|
"loss": 0.0441, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 13.51950354609929, |
|
"grad_norm": 3.7912166118621826, |
|
"learning_rate": 1.2521052631578948e-06, |
|
"loss": 0.047, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 13.563829787234042, |
|
"grad_norm": 3.7112090587615967, |
|
"learning_rate": 1.2389473684210527e-06, |
|
"loss": 0.0442, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 13.608156028368795, |
|
"grad_norm": 4.523186206817627, |
|
"learning_rate": 1.2257894736842106e-06, |
|
"loss": 0.0458, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 13.652482269503546, |
|
"grad_norm": 3.0612213611602783, |
|
"learning_rate": 1.2126315789473685e-06, |
|
"loss": 0.0461, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 13.696808510638299, |
|
"grad_norm": 2.902688980102539, |
|
"learning_rate": 1.1994736842105264e-06, |
|
"loss": 0.0446, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 13.74113475177305, |
|
"grad_norm": 2.876624822616577, |
|
"learning_rate": 1.1863157894736843e-06, |
|
"loss": 0.0433, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 13.785460992907801, |
|
"grad_norm": 3.696685314178467, |
|
"learning_rate": 1.1731578947368422e-06, |
|
"loss": 0.0464, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 13.829787234042554, |
|
"grad_norm": 3.2120165824890137, |
|
"learning_rate": 1.1600000000000001e-06, |
|
"loss": 0.0444, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 13.874113475177305, |
|
"grad_norm": 3.735292434692383, |
|
"learning_rate": 1.146842105263158e-06, |
|
"loss": 0.0417, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 13.918439716312056, |
|
"grad_norm": 3.8104641437530518, |
|
"learning_rate": 1.133684210526316e-06, |
|
"loss": 0.045, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 13.962765957446809, |
|
"grad_norm": 3.321183919906616, |
|
"learning_rate": 1.1205263157894736e-06, |
|
"loss": 0.046, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 14.00709219858156, |
|
"grad_norm": 3.0110223293304443, |
|
"learning_rate": 1.1073684210526318e-06, |
|
"loss": 0.0452, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 14.051418439716311, |
|
"grad_norm": 2.797724485397339, |
|
"learning_rate": 1.0942105263157895e-06, |
|
"loss": 0.0384, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 14.095744680851064, |
|
"grad_norm": 2.8559882640838623, |
|
"learning_rate": 1.0810526315789474e-06, |
|
"loss": 0.039, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 14.140070921985815, |
|
"grad_norm": 3.0210611820220947, |
|
"learning_rate": 1.0678947368421055e-06, |
|
"loss": 0.0359, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 14.184397163120567, |
|
"grad_norm": 3.4683313369750977, |
|
"learning_rate": 1.0547368421052632e-06, |
|
"loss": 0.0393, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 14.184397163120567, |
|
"eval_loss": 0.5176098942756653, |
|
"eval_runtime": 588.4879, |
|
"eval_samples_per_second": 6.62, |
|
"eval_steps_per_second": 0.828, |
|
"eval_wer": 41.376117383097686, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 14.22872340425532, |
|
"grad_norm": 3.2651422023773193, |
|
"learning_rate": 1.041578947368421e-06, |
|
"loss": 0.039, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 14.27304964539007, |
|
"grad_norm": 3.2940969467163086, |
|
"learning_rate": 1.028421052631579e-06, |
|
"loss": 0.0393, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 14.317375886524824, |
|
"grad_norm": 3.090914011001587, |
|
"learning_rate": 1.015263157894737e-06, |
|
"loss": 0.039, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 14.361702127659575, |
|
"grad_norm": 3.464435577392578, |
|
"learning_rate": 1.0021052631578948e-06, |
|
"loss": 0.0395, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 14.406028368794326, |
|
"grad_norm": 2.341763734817505, |
|
"learning_rate": 9.889473684210527e-07, |
|
"loss": 0.0392, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 14.450354609929079, |
|
"grad_norm": 2.7853071689605713, |
|
"learning_rate": 9.757894736842106e-07, |
|
"loss": 0.0399, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 14.49468085106383, |
|
"grad_norm": 3.5469071865081787, |
|
"learning_rate": 9.626315789473685e-07, |
|
"loss": 0.0404, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 14.539007092198581, |
|
"grad_norm": 3.5632236003875732, |
|
"learning_rate": 9.494736842105263e-07, |
|
"loss": 0.0409, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 14.583333333333334, |
|
"grad_norm": 2.9529061317443848, |
|
"learning_rate": 9.363157894736844e-07, |
|
"loss": 0.0411, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"grad_norm": 2.856344223022461, |
|
"learning_rate": 9.231578947368422e-07, |
|
"loss": 0.0384, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 14.671985815602836, |
|
"grad_norm": 3.559720039367676, |
|
"learning_rate": 9.100000000000001e-07, |
|
"loss": 0.039, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 14.71631205673759, |
|
"grad_norm": 3.8412675857543945, |
|
"learning_rate": 8.968421052631579e-07, |
|
"loss": 0.0361, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 14.76063829787234, |
|
"grad_norm": 3.8791191577911377, |
|
"learning_rate": 8.836842105263159e-07, |
|
"loss": 0.0357, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 14.804964539007091, |
|
"grad_norm": 4.187379837036133, |
|
"learning_rate": 8.705263157894737e-07, |
|
"loss": 0.0428, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 14.849290780141844, |
|
"grad_norm": 4.432793617248535, |
|
"learning_rate": 8.573684210526316e-07, |
|
"loss": 0.0375, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"grad_norm": 3.823516368865967, |
|
"learning_rate": 8.442105263157896e-07, |
|
"loss": 0.041, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 14.937943262411348, |
|
"grad_norm": 3.2699050903320312, |
|
"learning_rate": 8.310526315789474e-07, |
|
"loss": 0.0399, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 14.9822695035461, |
|
"grad_norm": 2.571930408477783, |
|
"learning_rate": 8.178947368421053e-07, |
|
"loss": 0.0405, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 15.02659574468085, |
|
"grad_norm": 2.3387529850006104, |
|
"learning_rate": 8.047368421052632e-07, |
|
"loss": 0.0367, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 15.070921985815604, |
|
"grad_norm": 3.301847219467163, |
|
"learning_rate": 7.915789473684212e-07, |
|
"loss": 0.0347, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 15.115248226950355, |
|
"grad_norm": 3.649311065673828, |
|
"learning_rate": 7.78421052631579e-07, |
|
"loss": 0.0368, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 15.159574468085106, |
|
"grad_norm": 2.7183964252471924, |
|
"learning_rate": 7.652631578947369e-07, |
|
"loss": 0.0357, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 15.203900709219859, |
|
"grad_norm": 4.076670169830322, |
|
"learning_rate": 7.521052631578949e-07, |
|
"loss": 0.0345, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 15.24822695035461, |
|
"grad_norm": 2.5695323944091797, |
|
"learning_rate": 7.389473684210527e-07, |
|
"loss": 0.0332, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 15.292553191489361, |
|
"grad_norm": 3.0496561527252197, |
|
"learning_rate": 7.257894736842106e-07, |
|
"loss": 0.0352, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 15.336879432624114, |
|
"grad_norm": 2.6376793384552, |
|
"learning_rate": 7.126315789473685e-07, |
|
"loss": 0.0381, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 15.381205673758865, |
|
"grad_norm": 2.617739200592041, |
|
"learning_rate": 6.994736842105264e-07, |
|
"loss": 0.0382, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 15.425531914893616, |
|
"grad_norm": 1.8115471601486206, |
|
"learning_rate": 6.863157894736842e-07, |
|
"loss": 0.0358, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 15.46985815602837, |
|
"grad_norm": 2.169344902038574, |
|
"learning_rate": 6.731578947368421e-07, |
|
"loss": 0.0336, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 15.51418439716312, |
|
"grad_norm": 2.6424083709716797, |
|
"learning_rate": 6.6e-07, |
|
"loss": 0.0358, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 15.558510638297872, |
|
"grad_norm": 3.1048264503479004, |
|
"learning_rate": 6.468421052631579e-07, |
|
"loss": 0.0351, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 15.602836879432624, |
|
"grad_norm": 2.3110456466674805, |
|
"learning_rate": 6.336842105263157e-07, |
|
"loss": 0.0338, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 15.647163120567376, |
|
"grad_norm": 3.729184150695801, |
|
"learning_rate": 6.205263157894738e-07, |
|
"loss": 0.0355, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 15.691489361702128, |
|
"grad_norm": 2.4826672077178955, |
|
"learning_rate": 6.073684210526317e-07, |
|
"loss": 0.0314, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 15.73581560283688, |
|
"grad_norm": 2.23388934135437, |
|
"learning_rate": 5.942105263157895e-07, |
|
"loss": 0.0369, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 15.78014184397163, |
|
"grad_norm": 2.6081252098083496, |
|
"learning_rate": 5.810526315789474e-07, |
|
"loss": 0.0335, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 15.824468085106384, |
|
"grad_norm": 2.6870105266571045, |
|
"learning_rate": 5.678947368421053e-07, |
|
"loss": 0.0355, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 15.868794326241135, |
|
"grad_norm": 2.663280725479126, |
|
"learning_rate": 5.547368421052632e-07, |
|
"loss": 0.0386, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 15.913120567375886, |
|
"grad_norm": 2.3229103088378906, |
|
"learning_rate": 5.415789473684211e-07, |
|
"loss": 0.0343, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"grad_norm": 2.0813558101654053, |
|
"learning_rate": 5.284210526315789e-07, |
|
"loss": 0.0354, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"eval_loss": 0.525884747505188, |
|
"eval_runtime": 589.6497, |
|
"eval_samples_per_second": 6.607, |
|
"eval_steps_per_second": 0.826, |
|
"eval_wer": 42.95380889632822, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 16.00177304964539, |
|
"grad_norm": 2.0741195678710938, |
|
"learning_rate": 5.152631578947369e-07, |
|
"loss": 0.0357, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 16.04609929078014, |
|
"grad_norm": 2.2553582191467285, |
|
"learning_rate": 5.021052631578948e-07, |
|
"loss": 0.0348, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 16.090425531914892, |
|
"grad_norm": 2.8926563262939453, |
|
"learning_rate": 4.889473684210526e-07, |
|
"loss": 0.0329, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 16.134751773049647, |
|
"grad_norm": 2.0093374252319336, |
|
"learning_rate": 4.757894736842106e-07, |
|
"loss": 0.0316, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 16.179078014184398, |
|
"grad_norm": 2.607196569442749, |
|
"learning_rate": 4.626315789473684e-07, |
|
"loss": 0.0343, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 16.22340425531915, |
|
"grad_norm": 3.0515787601470947, |
|
"learning_rate": 4.4947368421052637e-07, |
|
"loss": 0.0337, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 16.2677304964539, |
|
"grad_norm": 2.6530652046203613, |
|
"learning_rate": 4.363157894736843e-07, |
|
"loss": 0.0328, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 16.31205673758865, |
|
"grad_norm": 2.286144733428955, |
|
"learning_rate": 4.2315789473684214e-07, |
|
"loss": 0.0314, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 16.356382978723403, |
|
"grad_norm": 1.9126091003417969, |
|
"learning_rate": 4.1000000000000004e-07, |
|
"loss": 0.0337, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 16.400709219858157, |
|
"grad_norm": 2.4638893604278564, |
|
"learning_rate": 3.968421052631579e-07, |
|
"loss": 0.0326, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 16.44503546099291, |
|
"grad_norm": 1.9740127325057983, |
|
"learning_rate": 3.836842105263158e-07, |
|
"loss": 0.0316, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 16.48936170212766, |
|
"grad_norm": 2.466771125793457, |
|
"learning_rate": 3.7052631578947377e-07, |
|
"loss": 0.0323, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 16.53368794326241, |
|
"grad_norm": 3.461355686187744, |
|
"learning_rate": 3.573684210526316e-07, |
|
"loss": 0.0329, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 16.578014184397162, |
|
"grad_norm": 4.0049662590026855, |
|
"learning_rate": 3.4421052631578954e-07, |
|
"loss": 0.0307, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 16.622340425531917, |
|
"grad_norm": 3.518848419189453, |
|
"learning_rate": 3.310526315789474e-07, |
|
"loss": 0.0303, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 2.83296799659729, |
|
"learning_rate": 3.178947368421053e-07, |
|
"loss": 0.0357, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 16.71099290780142, |
|
"grad_norm": 2.5147087574005127, |
|
"learning_rate": 3.0473684210526316e-07, |
|
"loss": 0.0328, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 16.75531914893617, |
|
"grad_norm": 3.1541314125061035, |
|
"learning_rate": 2.9157894736842107e-07, |
|
"loss": 0.0344, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 16.79964539007092, |
|
"grad_norm": 2.6284327507019043, |
|
"learning_rate": 2.78421052631579e-07, |
|
"loss": 0.0317, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 16.843971631205672, |
|
"grad_norm": 2.6868457794189453, |
|
"learning_rate": 2.6526315789473684e-07, |
|
"loss": 0.0336, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 16.888297872340427, |
|
"grad_norm": 1.9752613306045532, |
|
"learning_rate": 2.5210526315789474e-07, |
|
"loss": 0.0321, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 16.93262411347518, |
|
"grad_norm": 2.544431447982788, |
|
"learning_rate": 2.3894736842105265e-07, |
|
"loss": 0.0334, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 16.97695035460993, |
|
"grad_norm": 3.398198366165161, |
|
"learning_rate": 2.2578947368421054e-07, |
|
"loss": 0.0342, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 17.02127659574468, |
|
"grad_norm": 2.5984408855438232, |
|
"learning_rate": 2.1263157894736842e-07, |
|
"loss": 0.0341, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 17.06560283687943, |
|
"grad_norm": 2.4616684913635254, |
|
"learning_rate": 1.9947368421052633e-07, |
|
"loss": 0.0297, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 17.109929078014183, |
|
"grad_norm": 1.7951653003692627, |
|
"learning_rate": 1.8631578947368424e-07, |
|
"loss": 0.0315, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 17.154255319148938, |
|
"grad_norm": 2.757528066635132, |
|
"learning_rate": 1.7315789473684212e-07, |
|
"loss": 0.0315, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 17.19858156028369, |
|
"grad_norm": 2.636103868484497, |
|
"learning_rate": 1.6e-07, |
|
"loss": 0.034, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 17.24290780141844, |
|
"grad_norm": 2.6068308353424072, |
|
"learning_rate": 1.468421052631579e-07, |
|
"loss": 0.0318, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"grad_norm": 2.8780813217163086, |
|
"learning_rate": 1.3368421052631582e-07, |
|
"loss": 0.0291, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 17.331560283687942, |
|
"grad_norm": 2.056938409805298, |
|
"learning_rate": 1.205263157894737e-07, |
|
"loss": 0.0312, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 17.375886524822697, |
|
"grad_norm": 1.919609785079956, |
|
"learning_rate": 1.0736842105263159e-07, |
|
"loss": 0.0301, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 17.420212765957448, |
|
"grad_norm": 2.8991692066192627, |
|
"learning_rate": 9.421052631578948e-08, |
|
"loss": 0.0325, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 17.4645390070922, |
|
"grad_norm": 3.554067611694336, |
|
"learning_rate": 8.105263157894738e-08, |
|
"loss": 0.0325, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 17.50886524822695, |
|
"grad_norm": 2.3162522315979004, |
|
"learning_rate": 6.789473684210528e-08, |
|
"loss": 0.0295, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 17.5531914893617, |
|
"grad_norm": 1.9810761213302612, |
|
"learning_rate": 5.473684210526316e-08, |
|
"loss": 0.0316, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 17.597517730496453, |
|
"grad_norm": 2.424508571624756, |
|
"learning_rate": 4.1578947368421054e-08, |
|
"loss": 0.0299, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 17.641843971631207, |
|
"grad_norm": 3.3512706756591797, |
|
"learning_rate": 2.842105263157895e-08, |
|
"loss": 0.0316, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 17.68617021276596, |
|
"grad_norm": 2.7042930126190186, |
|
"learning_rate": 1.5263157894736843e-08, |
|
"loss": 0.0308, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 17.73049645390071, |
|
"grad_norm": 2.975961685180664, |
|
"learning_rate": 2.105263157894737e-09, |
|
"loss": 0.0317, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 17.73049645390071, |
|
"eval_loss": 0.5293700695037842, |
|
"eval_runtime": 590.1218, |
|
"eval_samples_per_second": 6.602, |
|
"eval_steps_per_second": 0.825, |
|
"eval_wer": 44.56711421346914, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 17.73049645390071, |
|
"step": 10000, |
|
"total_flos": 3.93818457710592e+18, |
|
"train_loss": 0.1984061565220356, |
|
"train_runtime": 10260.2689, |
|
"train_samples_per_second": 15.594, |
|
"train_steps_per_second": 0.975 |
|
}, |
|
{ |
|
"epoch": 17.73049645390071, |
|
"eval_loss": 0.4848020076751709, |
|
"eval_runtime": 614.3795, |
|
"eval_samples_per_second": 6.341, |
|
"eval_steps_per_second": 0.793, |
|
"eval_wer": 37.98212187043698, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 18, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.93818457710592e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|