|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1874967448481797, |
|
"eval_steps": 1000, |
|
"global_step": 10800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.736080970816479e-05, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 2e-06, |
|
"loss": 1.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001736080970816479, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3311, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.003472161941632958, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 0.0004, |
|
"loss": 0.2169, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.005208242912449436, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 0.0006, |
|
"loss": 0.2032, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.006944323883265916, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 0.0008, |
|
"loss": 0.188, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.008680404854082394, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 0.001, |
|
"loss": 0.1758, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.010416485824898873, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0012, |
|
"loss": 0.1637, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.012152566795715351, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0014, |
|
"loss": 0.1518, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.013888647766531832, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0016, |
|
"loss": 0.1485, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.01562472873734831, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0018000000000000002, |
|
"loss": 0.1433, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01736080970816479, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 0.002, |
|
"loss": 0.139, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01736080970816479, |
|
"eval_covost2-en-de_loss": 1.896493673324585, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 9.8697, |
|
"eval_covost2-en-de_samples_per_second": 6.485, |
|
"eval_covost2-en-de_steps_per_second": 0.811, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01736080970816479, |
|
"eval_covost2-zh-en_loss": 3.1452860832214355, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.3732, |
|
"eval_covost2-zh-en_samples_per_second": 7.643, |
|
"eval_covost2-zh-en_steps_per_second": 0.955, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01736080970816479, |
|
"eval_peoplespeech-clean-transcription_loss": 3.2206106185913086, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.6941, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.602, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.825, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01909689067898127, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 0.001999725185109816, |
|
"loss": 0.1334, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.020832971649797746, |
|
"grad_norm": 0.07373046875, |
|
"learning_rate": 0.0019989008914857113, |
|
"loss": 0.1288, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.022569052620614226, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 0.00199752757218401, |
|
"loss": 0.1262, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.024305133591430703, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 0.001995605982021898, |
|
"loss": 0.1222, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.026041214562247183, |
|
"grad_norm": 0.058349609375, |
|
"learning_rate": 0.0019931371771625545, |
|
"loss": 0.1193, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.027777295533063663, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 0.001990122514534651, |
|
"loss": 0.1196, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.02951337650388014, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 0.0019865636510865464, |
|
"loss": 0.115, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.03124945747469662, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 0.001982462542875576, |
|
"loss": 0.115, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.0329855384455131, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 0.001977821443992945, |
|
"loss": 0.1125, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.03472161941632958, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 0.001972642905324813, |
|
"loss": 0.1094, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03472161941632958, |
|
"eval_covost2-en-de_loss": 1.6700351238250732, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.1279, |
|
"eval_covost2-en-de_samples_per_second": 7.874, |
|
"eval_covost2-en-de_steps_per_second": 0.984, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03472161941632958, |
|
"eval_covost2-zh-en_loss": 3.093877077102661, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.1488, |
|
"eval_covost2-zh-en_samples_per_second": 7.854, |
|
"eval_covost2-zh-en_steps_per_second": 0.982, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03472161941632958, |
|
"eval_peoplespeech-clean-transcription_loss": 2.478968620300293, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.5507, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.701, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.838, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.036457700387146054, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 0.0019669297731502505, |
|
"loss": 0.1077, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.03819378135796254, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 0.00196068518757684, |
|
"loss": 0.1069, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.039929862328779014, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 0.001953912580814779, |
|
"loss": 0.1043, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.04166594329959549, |
|
"grad_norm": 0.044921875, |
|
"learning_rate": 0.0019466156752904343, |
|
"loss": 0.1035, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.043402024270411975, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 0.0019387984816003866, |
|
"loss": 0.1033, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04513810524122845, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 0.0019304652963070869, |
|
"loss": 0.102, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.04687418621204493, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 0.0019216206995773372, |
|
"loss": 0.0998, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.048610267182861405, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 0.0019122695526648968, |
|
"loss": 0.1002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.05034634815367789, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 0.0019024169952385887, |
|
"loss": 0.0978, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.052082429124494366, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 0.0018920684425573864, |
|
"loss": 0.097, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.052082429124494366, |
|
"eval_covost2-en-de_loss": 1.749150276184082, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.1948, |
|
"eval_covost2-en-de_samples_per_second": 7.81, |
|
"eval_covost2-en-de_steps_per_second": 0.976, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.052082429124494366, |
|
"eval_covost2-zh-en_loss": 3.198117971420288, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.1979, |
|
"eval_covost2-zh-en_samples_per_second": 7.807, |
|
"eval_covost2-zh-en_steps_per_second": 0.976, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.052082429124494366, |
|
"eval_peoplespeech-clean-transcription_loss": 2.345036506652832, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 11.4402, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 5.594, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.699, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05381851009531084, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 0.0018812295824940284, |
|
"loss": 0.0955, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.055554591066127326, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 0.0018699063724087904, |
|
"loss": 0.0951, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.0572906720369438, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 0.0018581050358751443, |
|
"loss": 0.0947, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.05902675300776028, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 0.0018458320592590974, |
|
"loss": 0.0939, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.060762833978576763, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 0.0018330941881540914, |
|
"loss": 0.0941, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.06249891494939324, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 0.0018198984236734246, |
|
"loss": 0.0927, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.06423499592020972, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 0.0018062520186022297, |
|
"loss": 0.0948, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.0659710768910262, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 0.0017921624734111292, |
|
"loss": 0.09, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.06770715786184267, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 0.001777637532133752, |
|
"loss": 0.0926, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.06944323883265915, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 0.0017626851781103819, |
|
"loss": 0.0906, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06944323883265915, |
|
"eval_covost2-en-de_loss": 1.7936017513275146, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.0356, |
|
"eval_covost2-en-de_samples_per_second": 7.965, |
|
"eval_covost2-en-de_steps_per_second": 0.996, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06944323883265915, |
|
"eval_covost2-zh-en_loss": 3.2699265480041504, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 9.5779, |
|
"eval_covost2-zh-en_samples_per_second": 6.682, |
|
"eval_covost2-zh-en_steps_per_second": 0.835, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06944323883265915, |
|
"eval_peoplespeech-clean-transcription_loss": 2.3380110263824463, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.5943, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.671, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.834, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.07117931980347564, |
|
"grad_norm": 0.041259765625, |
|
"learning_rate": 0.001747313629600077, |
|
"loss": 0.0926, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.07291540077429211, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 0.001731531335263669, |
|
"loss": 0.0907, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.07465148174510859, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 0.0017153469695201276, |
|
"loss": 0.0898, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.07638756271592508, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 0.0016987694277788418, |
|
"loss": 0.0876, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.07812364368674155, |
|
"grad_norm": 0.042724609375, |
|
"learning_rate": 0.001681807821550438, |
|
"loss": 0.0874, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.07985972465755803, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 0.0016644714734388218, |
|
"loss": 0.0865, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.08159580562837451, |
|
"grad_norm": 0.042724609375, |
|
"learning_rate": 0.0016467699120171987, |
|
"loss": 0.0866, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.08333188659919098, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 0.001628712866590885, |
|
"loss": 0.0864, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.08506796757000747, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 0.0016103102618497923, |
|
"loss": 0.0862, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.08680404854082395, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 0.0015915722124135226, |
|
"loss": 0.0855, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08680404854082395, |
|
"eval_covost2-en-de_loss": 1.7862941026687622, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.2861, |
|
"eval_covost2-en-de_samples_per_second": 7.724, |
|
"eval_covost2-en-de_steps_per_second": 0.965, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08680404854082395, |
|
"eval_covost2-zh-en_loss": 3.33290433883667, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.4063, |
|
"eval_covost2-zh-en_samples_per_second": 7.613, |
|
"eval_covost2-zh-en_steps_per_second": 0.952, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08680404854082395, |
|
"eval_peoplespeech-clean-transcription_loss": 2.2601113319396973, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.4946, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.741, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.843, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08854012951164042, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 0.001572509017272072, |
|
"loss": 0.0872, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.0902762104824569, |
|
"grad_norm": 0.044189453125, |
|
"learning_rate": 0.0015531311541251993, |
|
"loss": 0.0859, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.09201229145327339, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 0.0015334492736235703, |
|
"loss": 0.085, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.09374837242408986, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 0.0015134741935148419, |
|
"loss": 0.0844, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.09548445339490634, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 0.0014932168926979072, |
|
"loss": 0.0844, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.09722053436572281, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 0.0014726885051885652, |
|
"loss": 0.0856, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.0989566153365393, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 0.0014519003139999338, |
|
"loss": 0.0841, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.10069269630735578, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 0.0014308637449409706, |
|
"loss": 0.0841, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.10242877727817225, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 0.0014095903603365066, |
|
"loss": 0.0825, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.10416485824898873, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 0.0013880918526722496, |
|
"loss": 0.0828, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.10416485824898873, |
|
"eval_covost2-en-de_loss": 1.8097732067108154, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.2052, |
|
"eval_covost2-en-de_samples_per_second": 7.8, |
|
"eval_covost2-en-de_steps_per_second": 0.975, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.10416485824898873, |
|
"eval_covost2-zh-en_loss": 3.331326961517334, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.2653, |
|
"eval_covost2-zh-en_samples_per_second": 7.743, |
|
"eval_covost2-zh-en_steps_per_second": 0.968, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.10416485824898873, |
|
"eval_peoplespeech-clean-transcription_loss": 2.250232219696045, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.4708, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.758, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.845, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.10590093921980522, |
|
"grad_norm": 0.04443359375, |
|
"learning_rate": 0.0013663800381682463, |
|
"loss": 0.0819, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.10763702019062169, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 0.0013444668502843329, |
|
"loss": 0.08, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.10937310116143817, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 0.0013223643331611537, |
|
"loss": 0.0805, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.11110918213225465, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 0.001300084635000341, |
|
"loss": 0.0799, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.11284526310307112, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 0.0012776400013875004, |
|
"loss": 0.0807, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.1145813440738876, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 0.0012550427685616766, |
|
"loss": 0.0799, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.11631742504470409, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 0.0012323053566349834, |
|
"loss": 0.0802, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.11805350601552056, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 0.0012094402627661448, |
|
"loss": 0.0796, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.11978958698633704, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 0.0011864600542916813, |
|
"loss": 0.0784, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.12152566795715353, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 0.0011633773618185302, |
|
"loss": 0.0808, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.12152566795715353, |
|
"eval_covost2-en-de_loss": 1.7786378860473633, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.0291, |
|
"eval_covost2-en-de_samples_per_second": 7.971, |
|
"eval_covost2-en-de_steps_per_second": 0.996, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.12152566795715353, |
|
"eval_covost2-zh-en_loss": 3.273571252822876, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.3234, |
|
"eval_covost2-zh-en_samples_per_second": 7.689, |
|
"eval_covost2-zh-en_steps_per_second": 0.961, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.12152566795715353, |
|
"eval_peoplespeech-clean-transcription_loss": 2.2290830612182617, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.7693, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.551, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.819, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.12326174892797, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 0.0011402048722818862, |
|
"loss": 0.0786, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.12499782989878648, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 0.0011169553219720827, |
|
"loss": 0.0795, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.12673391086960295, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 0.001093641489534351, |
|
"loss": 0.0787, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.12846999184041943, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 0.001070276188945293, |
|
"loss": 0.0784, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.13020607281123592, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 0.00104687226246994, |
|
"loss": 0.0787, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.1319421537820524, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 0.0010234425736032607, |
|
"loss": 0.0788, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.13367823475286889, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 0.001, |
|
"loss": 0.0769, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.13541431572368534, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 0.0009765574263967396, |
|
"loss": 0.077, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.13715039669450182, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 0.0009531277375300599, |
|
"loss": 0.0764, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.1388864776653183, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 0.0009297238110547074, |
|
"loss": 0.0764, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.1388864776653183, |
|
"eval_covost2-en-de_loss": 1.7951624393463135, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.1477, |
|
"eval_covost2-en-de_samples_per_second": 7.855, |
|
"eval_covost2-en-de_steps_per_second": 0.982, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.1388864776653183, |
|
"eval_covost2-zh-en_loss": 3.301699161529541, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.8691, |
|
"eval_covost2-zh-en_samples_per_second": 7.216, |
|
"eval_covost2-zh-en_steps_per_second": 0.902, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.1388864776653183, |
|
"eval_peoplespeech-clean-transcription_loss": 2.1518499851226807, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.5239, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.72, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.84, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.1406225586361348, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 0.0009063585104656494, |
|
"loss": 0.0762, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.14235863960695128, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 0.0008830446780279176, |
|
"loss": 0.0769, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.14409472057776776, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 0.0008597951277181142, |
|
"loss": 0.0751, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.14583080154858422, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 0.0008366226381814697, |
|
"loss": 0.0765, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.1475668825194007, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 0.000813539945708319, |
|
"loss": 0.0763, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.14930296349021718, |
|
"grad_norm": 0.068359375, |
|
"learning_rate": 0.0007905597372338558, |
|
"loss": 0.0744, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.15103904446103367, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 0.0007676946433650169, |
|
"loss": 0.0737, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.15277512543185015, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 0.0007449572314383236, |
|
"loss": 0.0758, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.1545112064026666, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 0.0007223599986124993, |
|
"loss": 0.0753, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.1562472873734831, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 0.0006999153649996595, |
|
"loss": 0.0736, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.1562472873734831, |
|
"eval_covost2-en-de_loss": 1.7736568450927734, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.2798, |
|
"eval_covost2-en-de_samples_per_second": 7.73, |
|
"eval_covost2-en-de_steps_per_second": 0.966, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.1562472873734831, |
|
"eval_covost2-zh-en_loss": 3.2736916542053223, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.6328, |
|
"eval_covost2-zh-en_samples_per_second": 7.414, |
|
"eval_covost2-zh-en_steps_per_second": 0.927, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.1562472873734831, |
|
"eval_peoplespeech-clean-transcription_loss": 2.169971227645874, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 10.7684, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 5.943, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.743, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.15798336834429957, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 0.0006776356668388464, |
|
"loss": 0.073, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.15971944931511606, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 0.0006555331497156671, |
|
"loss": 0.0753, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.16145553028593254, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 0.0006336199618317538, |
|
"loss": 0.0754, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.16319161125674903, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 0.0006119081473277501, |
|
"loss": 0.0736, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.16492769222756548, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 0.0005904096396634935, |
|
"loss": 0.0721, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.16666377319838196, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 0.0005691362550590297, |
|
"loss": 0.0717, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.16839985416919845, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 0.0005480996860000663, |
|
"loss": 0.0738, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.17013593514001493, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 0.0005273114948114346, |
|
"loss": 0.0737, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.17187201611083142, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 0.0005067831073020928, |
|
"loss": 0.0711, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.1736080970816479, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 0.00048652580648515787, |
|
"loss": 0.0722, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1736080970816479, |
|
"eval_covost2-en-de_loss": 1.768043875694275, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.2025, |
|
"eval_covost2-en-de_samples_per_second": 7.802, |
|
"eval_covost2-en-de_steps_per_second": 0.975, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1736080970816479, |
|
"eval_covost2-zh-en_loss": 3.288457155227661, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.5315, |
|
"eval_covost2-zh-en_samples_per_second": 7.502, |
|
"eval_covost2-zh-en_steps_per_second": 0.938, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1736080970816479, |
|
"eval_peoplespeech-clean-transcription_loss": 2.099651336669922, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.7081, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.592, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.824, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.17534417805246436, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 0.0004665507263764299, |
|
"loss": 0.0717, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.17708025902328084, |
|
"grad_norm": 0.06103515625, |
|
"learning_rate": 0.0004468688458748006, |
|
"loss": 0.0729, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.17881633999409732, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 0.0004274909827279283, |
|
"loss": 0.0711, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.1805524209649138, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 0.0004084277875864776, |
|
"loss": 0.0712, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.1822885019357303, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 0.00038968973815020803, |
|
"loss": 0.0708, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.18402458290654677, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 0.00037128713340911534, |
|
"loss": 0.0716, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.18576066387736323, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 0.00035323008798280133, |
|
"loss": 0.0728, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.1874967448481797, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.00033552852656117837, |
|
"loss": 0.0711, |
|
"step": 10800 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 14400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3600, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.964486494302536e+17, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|