|
{ |
|
"best_global_step": 10000, |
|
"best_metric": 0.2586507557925852, |
|
"best_model_checkpoint": "/home/cluster-dgxa100/slp01/bagas-fine-tune-whisper/whisper-tiny-javanese-openslr-v3/checkpoint-10000", |
|
"epoch": 1.0807262117036798, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01080672178094775, |
|
"grad_norm": 25.346445083618164, |
|
"learning_rate": 1.94e-06, |
|
"loss": 3.5433, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0216134435618955, |
|
"grad_norm": 21.656307220458984, |
|
"learning_rate": 3.94e-06, |
|
"loss": 2.0264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03242016534284325, |
|
"grad_norm": 18.657211303710938, |
|
"learning_rate": 5.94e-06, |
|
"loss": 1.5688, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.043226887123791, |
|
"grad_norm": 16.42237663269043, |
|
"learning_rate": 7.94e-06, |
|
"loss": 1.3214, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.054033608904738746, |
|
"grad_norm": 18.631206512451172, |
|
"learning_rate": 9.940000000000001e-06, |
|
"loss": 1.1788, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.054033608904738746, |
|
"eval_loss": 0.967095136642456, |
|
"eval_runtime": 5770.5819, |
|
"eval_samples_per_second": 3.207, |
|
"eval_steps_per_second": 0.802, |
|
"eval_wer": 0.6590292385770924, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0648403306856865, |
|
"grad_norm": 15.337555885314941, |
|
"learning_rate": 1.1940000000000001e-05, |
|
"loss": 1.0627, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07564705246663425, |
|
"grad_norm": 14.623177528381348, |
|
"learning_rate": 1.394e-05, |
|
"loss": 0.9632, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.086453774247582, |
|
"grad_norm": 17.126712799072266, |
|
"learning_rate": 1.5940000000000003e-05, |
|
"loss": 0.906, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09726049602852975, |
|
"grad_norm": 16.75067710876465, |
|
"learning_rate": 1.794e-05, |
|
"loss": 0.8503, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.10806721780947749, |
|
"grad_norm": 14.265076637268066, |
|
"learning_rate": 1.9940000000000002e-05, |
|
"loss": 0.8015, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.10806721780947749, |
|
"eval_loss": 0.6976613402366638, |
|
"eval_runtime": 5463.0331, |
|
"eval_samples_per_second": 3.387, |
|
"eval_steps_per_second": 0.847, |
|
"eval_wer": 0.5304858499049883, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11887393959042525, |
|
"grad_norm": 13.737130165100098, |
|
"learning_rate": 1.9784444444444446e-05, |
|
"loss": 0.7589, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.129680661371373, |
|
"grad_norm": 18.01378631591797, |
|
"learning_rate": 1.9562222222222225e-05, |
|
"loss": 0.7589, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14048738315232073, |
|
"grad_norm": 11.696120262145996, |
|
"learning_rate": 1.934e-05, |
|
"loss": 0.7087, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1512941049332685, |
|
"grad_norm": 13.419560432434082, |
|
"learning_rate": 1.911777777777778e-05, |
|
"loss": 0.683, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.16210082671421625, |
|
"grad_norm": 12.753211975097656, |
|
"learning_rate": 1.8895555555555557e-05, |
|
"loss": 0.6498, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16210082671421625, |
|
"eval_loss": 0.5724753737449646, |
|
"eval_runtime": 4564.4621, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 1.013, |
|
"eval_wer": 0.6670133485560569, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.172907548495164, |
|
"grad_norm": 11.64907455444336, |
|
"learning_rate": 1.8673333333333333e-05, |
|
"loss": 0.6216, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.18371427027611173, |
|
"grad_norm": 13.781865119934082, |
|
"learning_rate": 1.8451111111111113e-05, |
|
"loss": 0.6138, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1945209920570595, |
|
"grad_norm": 12.58388900756836, |
|
"learning_rate": 1.822888888888889e-05, |
|
"loss": 0.595, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.20532771383800724, |
|
"grad_norm": 14.661055564880371, |
|
"learning_rate": 1.8006666666666668e-05, |
|
"loss": 0.5938, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.21613443561895498, |
|
"grad_norm": 11.948161125183105, |
|
"learning_rate": 1.7784444444444448e-05, |
|
"loss": 0.5828, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.21613443561895498, |
|
"eval_loss": 0.5093731880187988, |
|
"eval_runtime": 5328.6402, |
|
"eval_samples_per_second": 3.473, |
|
"eval_steps_per_second": 0.868, |
|
"eval_wer": 0.4828939857208768, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22694115739990273, |
|
"grad_norm": 12.322188377380371, |
|
"learning_rate": 1.7562222222222224e-05, |
|
"loss": 0.5752, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.2377478791808505, |
|
"grad_norm": 17.046159744262695, |
|
"learning_rate": 1.734e-05, |
|
"loss": 0.5663, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.24855460096179824, |
|
"grad_norm": 10.154263496398926, |
|
"learning_rate": 1.711777777777778e-05, |
|
"loss": 0.537, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.259361322742746, |
|
"grad_norm": 11.958285331726074, |
|
"learning_rate": 1.6895555555555556e-05, |
|
"loss": 0.5246, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.27016804452369375, |
|
"grad_norm": 10.264266014099121, |
|
"learning_rate": 1.6673333333333335e-05, |
|
"loss": 0.5226, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.27016804452369375, |
|
"eval_loss": 0.46415480971336365, |
|
"eval_runtime": 4645.745, |
|
"eval_samples_per_second": 3.983, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.38602898052064843, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.28097476630464147, |
|
"grad_norm": 12.049257278442383, |
|
"learning_rate": 1.6451111111111115e-05, |
|
"loss": 0.493, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.29178148808558924, |
|
"grad_norm": 9.821508407592773, |
|
"learning_rate": 1.622888888888889e-05, |
|
"loss": 0.5153, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.302588209866537, |
|
"grad_norm": 10.481095314025879, |
|
"learning_rate": 1.6006666666666667e-05, |
|
"loss": 0.5, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.3133949316474847, |
|
"grad_norm": 10.193309783935547, |
|
"learning_rate": 1.5784444444444447e-05, |
|
"loss": 0.5248, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.3242016534284325, |
|
"grad_norm": 12.328668594360352, |
|
"learning_rate": 1.5562222222222223e-05, |
|
"loss": 0.4955, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3242016534284325, |
|
"eval_loss": 0.4340818226337433, |
|
"eval_runtime": 4456.9484, |
|
"eval_samples_per_second": 4.152, |
|
"eval_steps_per_second": 1.038, |
|
"eval_wer": 0.39154200455117727, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.33500837520938026, |
|
"grad_norm": 12.583343505859375, |
|
"learning_rate": 1.5340000000000002e-05, |
|
"loss": 0.5082, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.345815096990328, |
|
"grad_norm": 8.40932846069336, |
|
"learning_rate": 1.511777777777778e-05, |
|
"loss": 0.4905, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.35662181877127574, |
|
"grad_norm": 14.150980949401855, |
|
"learning_rate": 1.4895555555555556e-05, |
|
"loss": 0.466, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.36742854055222346, |
|
"grad_norm": 13.014771461486816, |
|
"learning_rate": 1.4673333333333336e-05, |
|
"loss": 0.4788, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.37823526233317123, |
|
"grad_norm": 11.843710899353027, |
|
"learning_rate": 1.4451111111111112e-05, |
|
"loss": 0.4616, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.37823526233317123, |
|
"eval_loss": 0.4127795398235321, |
|
"eval_runtime": 4528.1925, |
|
"eval_samples_per_second": 4.086, |
|
"eval_steps_per_second": 1.022, |
|
"eval_wer": 0.35399088200564593, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.389041984114119, |
|
"grad_norm": 11.520469665527344, |
|
"learning_rate": 1.422888888888889e-05, |
|
"loss": 0.4695, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.3998487058950667, |
|
"grad_norm": 10.21032428741455, |
|
"learning_rate": 1.400666666666667e-05, |
|
"loss": 0.47, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.4106554276760145, |
|
"grad_norm": 9.393896102905273, |
|
"learning_rate": 1.3784444444444445e-05, |
|
"loss": 0.4656, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.42146214945696225, |
|
"grad_norm": 10.503016471862793, |
|
"learning_rate": 1.3562222222222223e-05, |
|
"loss": 0.4446, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.43226887123790997, |
|
"grad_norm": 10.747596740722656, |
|
"learning_rate": 1.3340000000000001e-05, |
|
"loss": 0.4474, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.43226887123790997, |
|
"eval_loss": 0.3900074064731598, |
|
"eval_runtime": 4858.8536, |
|
"eval_samples_per_second": 3.808, |
|
"eval_steps_per_second": 0.952, |
|
"eval_wer": 0.36136504038974343, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.44307559301885774, |
|
"grad_norm": 13.275285720825195, |
|
"learning_rate": 1.3117777777777779e-05, |
|
"loss": 0.4488, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.45388231479980545, |
|
"grad_norm": 11.318832397460938, |
|
"learning_rate": 1.2897777777777778e-05, |
|
"loss": 0.4292, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.4646890365807532, |
|
"grad_norm": 10.3064546585083, |
|
"learning_rate": 1.2675555555555557e-05, |
|
"loss": 0.4302, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.475495758361701, |
|
"grad_norm": 11.634562492370605, |
|
"learning_rate": 1.2453333333333335e-05, |
|
"loss": 0.426, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.4863024801426487, |
|
"grad_norm": 10.647918701171875, |
|
"learning_rate": 1.2231111111111111e-05, |
|
"loss": 0.4387, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4863024801426487, |
|
"eval_loss": 0.37359631061553955, |
|
"eval_runtime": 4990.4878, |
|
"eval_samples_per_second": 3.708, |
|
"eval_steps_per_second": 0.927, |
|
"eval_wer": 0.35633684967821144, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4971092019235965, |
|
"grad_norm": 9.396610260009766, |
|
"learning_rate": 1.200888888888889e-05, |
|
"loss": 0.4195, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.5079159237045442, |
|
"grad_norm": 10.845105171203613, |
|
"learning_rate": 1.1786666666666668e-05, |
|
"loss": 0.4056, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.518722645485492, |
|
"grad_norm": 9.404190063476562, |
|
"learning_rate": 1.1564444444444445e-05, |
|
"loss": 0.4306, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.5295293672664397, |
|
"grad_norm": 9.176289558410645, |
|
"learning_rate": 1.1342222222222224e-05, |
|
"loss": 0.4239, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.5403360890473875, |
|
"grad_norm": 10.088706016540527, |
|
"learning_rate": 1.1120000000000002e-05, |
|
"loss": 0.4154, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5403360890473875, |
|
"eval_loss": 0.36057594418525696, |
|
"eval_runtime": 5945.658, |
|
"eval_samples_per_second": 3.112, |
|
"eval_steps_per_second": 0.778, |
|
"eval_wer": 0.32743452795220485, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5511428108283353, |
|
"grad_norm": 9.688194274902344, |
|
"learning_rate": 1.0897777777777778e-05, |
|
"loss": 0.4115, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.5619495326092829, |
|
"grad_norm": 9.752260208129883, |
|
"learning_rate": 1.0675555555555558e-05, |
|
"loss": 0.3854, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.5727562543902307, |
|
"grad_norm": 10.447392463684082, |
|
"learning_rate": 1.0453333333333334e-05, |
|
"loss": 0.4141, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.5835629761711785, |
|
"grad_norm": 11.185776710510254, |
|
"learning_rate": 1.0231111111111112e-05, |
|
"loss": 0.3924, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.5943696979521262, |
|
"grad_norm": 10.3914794921875, |
|
"learning_rate": 1.000888888888889e-05, |
|
"loss": 0.419, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5943696979521262, |
|
"eval_loss": 0.3494803309440613, |
|
"eval_runtime": 6902.9208, |
|
"eval_samples_per_second": 2.681, |
|
"eval_steps_per_second": 0.67, |
|
"eval_wer": 0.314375307908257, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.605176419733074, |
|
"grad_norm": 11.420536041259766, |
|
"learning_rate": 9.786666666666667e-06, |
|
"loss": 0.4096, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.6159831415140217, |
|
"grad_norm": 9.05328369140625, |
|
"learning_rate": 9.564444444444445e-06, |
|
"loss": 0.3917, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.6267898632949694, |
|
"grad_norm": 10.281911849975586, |
|
"learning_rate": 9.342222222222223e-06, |
|
"loss": 0.3965, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.6375965850759172, |
|
"grad_norm": 10.587265014648438, |
|
"learning_rate": 9.12e-06, |
|
"loss": 0.374, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.648403306856865, |
|
"grad_norm": 7.721372127532959, |
|
"learning_rate": 8.897777777777779e-06, |
|
"loss": 0.3799, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.648403306856865, |
|
"eval_loss": 0.3397567868232727, |
|
"eval_runtime": 7002.8513, |
|
"eval_samples_per_second": 2.642, |
|
"eval_steps_per_second": 0.661, |
|
"eval_wer": 0.2921668139413039, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6592100286378128, |
|
"grad_norm": 6.785597324371338, |
|
"learning_rate": 8.675555555555556e-06, |
|
"loss": 0.3953, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.6700167504187605, |
|
"grad_norm": 9.53781509399414, |
|
"learning_rate": 8.453333333333334e-06, |
|
"loss": 0.3786, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.6808234721997082, |
|
"grad_norm": 8.857239723205566, |
|
"learning_rate": 8.231111111111112e-06, |
|
"loss": 0.3744, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.691630193980656, |
|
"grad_norm": 9.638261795043945, |
|
"learning_rate": 8.00888888888889e-06, |
|
"loss": 0.3809, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.7024369157616037, |
|
"grad_norm": 8.304004669189453, |
|
"learning_rate": 7.786666666666666e-06, |
|
"loss": 0.3802, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.7024369157616037, |
|
"eval_loss": 0.3289755880832672, |
|
"eval_runtime": 5885.8991, |
|
"eval_samples_per_second": 3.144, |
|
"eval_steps_per_second": 0.786, |
|
"eval_wer": 0.3044049452998538, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.7132436375425515, |
|
"grad_norm": 9.978581428527832, |
|
"learning_rate": 7.564444444444446e-06, |
|
"loss": 0.3537, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.7240503593234993, |
|
"grad_norm": 10.849929809570312, |
|
"learning_rate": 7.342222222222223e-06, |
|
"loss": 0.3762, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.7348570811044469, |
|
"grad_norm": 11.856138229370117, |
|
"learning_rate": 7.1200000000000004e-06, |
|
"loss": 0.3477, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.7456638028853947, |
|
"grad_norm": 10.761491775512695, |
|
"learning_rate": 6.897777777777779e-06, |
|
"loss": 0.361, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.7564705246663425, |
|
"grad_norm": 9.24421501159668, |
|
"learning_rate": 6.675555555555556e-06, |
|
"loss": 0.3611, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7564705246663425, |
|
"eval_loss": 0.3224972188472748, |
|
"eval_runtime": 5632.6127, |
|
"eval_samples_per_second": 3.285, |
|
"eval_steps_per_second": 0.821, |
|
"eval_wer": 0.2823372093932546, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7672772464472902, |
|
"grad_norm": 10.52470874786377, |
|
"learning_rate": 6.453333333333334e-06, |
|
"loss": 0.3638, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.778083968228238, |
|
"grad_norm": 9.080463409423828, |
|
"learning_rate": 6.231111111111111e-06, |
|
"loss": 0.3532, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.7888906900091858, |
|
"grad_norm": 8.789374351501465, |
|
"learning_rate": 6.00888888888889e-06, |
|
"loss": 0.3592, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.7996974117901334, |
|
"grad_norm": 8.97732162475586, |
|
"learning_rate": 5.7866666666666674e-06, |
|
"loss": 0.3611, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.8105041335710812, |
|
"grad_norm": 10.455592155456543, |
|
"learning_rate": 5.5644444444444444e-06, |
|
"loss": 0.3548, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.8105041335710812, |
|
"eval_loss": 0.31678903102874756, |
|
"eval_runtime": 3060.9871, |
|
"eval_samples_per_second": 6.045, |
|
"eval_steps_per_second": 1.511, |
|
"eval_wer": 0.27332869353060313, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.821310855352029, |
|
"grad_norm": 8.56920051574707, |
|
"learning_rate": 5.342222222222223e-06, |
|
"loss": 0.3628, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.8321175771329767, |
|
"grad_norm": 11.37761402130127, |
|
"learning_rate": 5.12e-06, |
|
"loss": 0.3353, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.8429242989139245, |
|
"grad_norm": 9.396086692810059, |
|
"learning_rate": 4.897777777777778e-06, |
|
"loss": 0.3704, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.8537310206948722, |
|
"grad_norm": 10.0977144241333, |
|
"learning_rate": 4.677777777777778e-06, |
|
"loss": 0.364, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.8645377424758199, |
|
"grad_norm": 8.653088569641113, |
|
"learning_rate": 4.455555555555555e-06, |
|
"loss": 0.346, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8645377424758199, |
|
"eval_loss": 0.3104597330093384, |
|
"eval_runtime": 3053.8514, |
|
"eval_samples_per_second": 6.059, |
|
"eval_steps_per_second": 1.515, |
|
"eval_wer": 0.26601709428444076, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8753444642567677, |
|
"grad_norm": 9.058122634887695, |
|
"learning_rate": 4.233333333333334e-06, |
|
"loss": 0.3382, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.8861511860377155, |
|
"grad_norm": 12.135452270507812, |
|
"learning_rate": 4.011111111111111e-06, |
|
"loss": 0.3456, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.8969579078186632, |
|
"grad_norm": 6.601293563842773, |
|
"learning_rate": 3.7888888888888893e-06, |
|
"loss": 0.3404, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.9077646295996109, |
|
"grad_norm": 9.51930046081543, |
|
"learning_rate": 3.566666666666667e-06, |
|
"loss": 0.3479, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.9185713513805587, |
|
"grad_norm": 7.031350135803223, |
|
"learning_rate": 3.3444444444444445e-06, |
|
"loss": 0.3547, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.9185713513805587, |
|
"eval_loss": 0.3063461184501648, |
|
"eval_runtime": 3070.7291, |
|
"eval_samples_per_second": 6.026, |
|
"eval_steps_per_second": 1.506, |
|
"eval_wer": 0.27081068822871623, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.9293780731615064, |
|
"grad_norm": 11.10822868347168, |
|
"learning_rate": 3.1222222222222228e-06, |
|
"loss": 0.3454, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.9401847949424542, |
|
"grad_norm": 9.607211112976074, |
|
"learning_rate": 2.9e-06, |
|
"loss": 0.3319, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.950991516723402, |
|
"grad_norm": 10.614663124084473, |
|
"learning_rate": 2.677777777777778e-06, |
|
"loss": 0.3441, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.9617982385043498, |
|
"grad_norm": 8.344138145446777, |
|
"learning_rate": 2.455555555555556e-06, |
|
"loss": 0.3466, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.9726049602852974, |
|
"grad_norm": 11.955930709838867, |
|
"learning_rate": 2.2333333333333333e-06, |
|
"loss": 0.3211, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9726049602852974, |
|
"eval_loss": 0.30189329385757446, |
|
"eval_runtime": 3095.8164, |
|
"eval_samples_per_second": 5.977, |
|
"eval_steps_per_second": 1.494, |
|
"eval_wer": 0.28268910454413937, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9834116820662452, |
|
"grad_norm": 9.438616752624512, |
|
"learning_rate": 2.011111111111111e-06, |
|
"loss": 0.343, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.994218403847193, |
|
"grad_norm": 10.029309272766113, |
|
"learning_rate": 1.788888888888889e-06, |
|
"loss": 0.3582, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.0050791592370454, |
|
"grad_norm": 9.47360610961914, |
|
"learning_rate": 1.566666666666667e-06, |
|
"loss": 0.3024, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.0158858810179932, |
|
"grad_norm": 9.3403959274292, |
|
"learning_rate": 1.3444444444444446e-06, |
|
"loss": 0.2811, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.026692602798941, |
|
"grad_norm": 9.723664283752441, |
|
"learning_rate": 1.1222222222222222e-06, |
|
"loss": 0.2718, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.026692602798941, |
|
"eval_loss": 0.2989746034145355, |
|
"eval_runtime": 3189.5179, |
|
"eval_samples_per_second": 5.802, |
|
"eval_steps_per_second": 1.45, |
|
"eval_wer": 0.2659936346077151, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.0374993245798887, |
|
"grad_norm": 7.739469051361084, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 0.2765, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.0483060463608365, |
|
"grad_norm": 8.379693984985352, |
|
"learning_rate": 6.777777777777779e-07, |
|
"loss": 0.2872, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.0591127681417842, |
|
"grad_norm": 8.849838256835938, |
|
"learning_rate": 4.5555555555555563e-07, |
|
"loss": 0.2782, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.069919489922732, |
|
"grad_norm": 8.006597518920898, |
|
"learning_rate": 2.3333333333333336e-07, |
|
"loss": 0.2673, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.0807262117036798, |
|
"grad_norm": 10.859480857849121, |
|
"learning_rate": 1.1111111111111112e-08, |
|
"loss": 0.2859, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0807262117036798, |
|
"eval_loss": 0.2979792058467865, |
|
"eval_runtime": 3174.7879, |
|
"eval_samples_per_second": 5.828, |
|
"eval_steps_per_second": 1.457, |
|
"eval_wer": 0.2586507557925852, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0807262117036798, |
|
"step": 10000, |
|
"total_flos": 3.93912009474048e+18, |
|
"train_loss": 0.09938106536865235, |
|
"train_runtime": 25030.863, |
|
"train_samples_per_second": 6.392, |
|
"train_steps_per_second": 0.4 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.93912009474048e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|