|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.06249891494939324, |
|
"eval_steps": 1000, |
|
"global_step": 3600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.736080970816479e-05, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 2e-06, |
|
"loss": 1.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001736080970816479, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3311, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.003472161941632958, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 0.0004, |
|
"loss": 0.2169, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.005208242912449436, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 0.0006, |
|
"loss": 0.2032, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.006944323883265916, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 0.0008, |
|
"loss": 0.188, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.008680404854082394, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 0.001, |
|
"loss": 0.1758, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.010416485824898873, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0012, |
|
"loss": 0.1637, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.012152566795715351, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0014, |
|
"loss": 0.1518, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.013888647766531832, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0016, |
|
"loss": 0.1485, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.01562472873734831, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0018000000000000002, |
|
"loss": 0.1433, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01736080970816479, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 0.002, |
|
"loss": 0.139, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01736080970816479, |
|
"eval_covost2-en-de_loss": 1.896493673324585, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 9.8697, |
|
"eval_covost2-en-de_samples_per_second": 6.485, |
|
"eval_covost2-en-de_steps_per_second": 0.811, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01736080970816479, |
|
"eval_covost2-zh-en_loss": 3.1452860832214355, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.3732, |
|
"eval_covost2-zh-en_samples_per_second": 7.643, |
|
"eval_covost2-zh-en_steps_per_second": 0.955, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01736080970816479, |
|
"eval_peoplespeech-clean-transcription_loss": 3.2206106185913086, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.6941, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.602, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.825, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01909689067898127, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 0.001999725185109816, |
|
"loss": 0.1334, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.020832971649797746, |
|
"grad_norm": 0.07373046875, |
|
"learning_rate": 0.0019989008914857113, |
|
"loss": 0.1288, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.022569052620614226, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 0.00199752757218401, |
|
"loss": 0.1262, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.024305133591430703, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 0.001995605982021898, |
|
"loss": 0.1222, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.026041214562247183, |
|
"grad_norm": 0.058349609375, |
|
"learning_rate": 0.0019931371771625545, |
|
"loss": 0.1193, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.027777295533063663, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 0.001990122514534651, |
|
"loss": 0.1196, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.02951337650388014, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 0.0019865636510865464, |
|
"loss": 0.115, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.03124945747469662, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 0.001982462542875576, |
|
"loss": 0.115, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.0329855384455131, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 0.001977821443992945, |
|
"loss": 0.1125, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.03472161941632958, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 0.001972642905324813, |
|
"loss": 0.1094, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03472161941632958, |
|
"eval_covost2-en-de_loss": 1.6700351238250732, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.1279, |
|
"eval_covost2-en-de_samples_per_second": 7.874, |
|
"eval_covost2-en-de_steps_per_second": 0.984, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03472161941632958, |
|
"eval_covost2-zh-en_loss": 3.093877077102661, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.1488, |
|
"eval_covost2-zh-en_samples_per_second": 7.854, |
|
"eval_covost2-zh-en_steps_per_second": 0.982, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03472161941632958, |
|
"eval_peoplespeech-clean-transcription_loss": 2.478968620300293, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 9.5507, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 6.701, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.838, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.036457700387146054, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 0.0019669297731502505, |
|
"loss": 0.1077, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.03819378135796254, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 0.00196068518757684, |
|
"loss": 0.1069, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.039929862328779014, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 0.001953912580814779, |
|
"loss": 0.1043, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.04166594329959549, |
|
"grad_norm": 0.044921875, |
|
"learning_rate": 0.0019466156752904343, |
|
"loss": 0.1035, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.043402024270411975, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 0.0019387984816003866, |
|
"loss": 0.1033, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04513810524122845, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 0.0019304652963070869, |
|
"loss": 0.102, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.04687418621204493, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 0.0019216206995773372, |
|
"loss": 0.0998, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.048610267182861405, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 0.0019122695526648968, |
|
"loss": 0.1002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.05034634815367789, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 0.0019024169952385887, |
|
"loss": 0.0978, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.052082429124494366, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 0.0018920684425573864, |
|
"loss": 0.097, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.052082429124494366, |
|
"eval_covost2-en-de_loss": 1.749150276184082, |
|
"eval_covost2-en-de_model_preparation_time": 0.0057, |
|
"eval_covost2-en-de_runtime": 8.1948, |
|
"eval_covost2-en-de_samples_per_second": 7.81, |
|
"eval_covost2-en-de_steps_per_second": 0.976, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.052082429124494366, |
|
"eval_covost2-zh-en_loss": 3.198117971420288, |
|
"eval_covost2-zh-en_model_preparation_time": 0.0057, |
|
"eval_covost2-zh-en_runtime": 8.1979, |
|
"eval_covost2-zh-en_samples_per_second": 7.807, |
|
"eval_covost2-zh-en_steps_per_second": 0.976, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.052082429124494366, |
|
"eval_peoplespeech-clean-transcription_loss": 2.345036506652832, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, |
|
"eval_peoplespeech-clean-transcription_runtime": 11.4402, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 5.594, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.699, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05381851009531084, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 0.0018812295824940284, |
|
"loss": 0.0955, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.055554591066127326, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 0.0018699063724087904, |
|
"loss": 0.0951, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.0572906720369438, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 0.0018581050358751443, |
|
"loss": 0.0947, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.05902675300776028, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 0.0018458320592590974, |
|
"loss": 0.0939, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.060762833978576763, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 0.0018330941881540914, |
|
"loss": 0.0941, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.06249891494939324, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 0.0018198984236734246, |
|
"loss": 0.0927, |
|
"step": 3600 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 14400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3600, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.65445799352533e+17, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|